数据库版本:pg12.1 opengauss/mogdb 2.1.0
最近看到了许多关于pg等待事件的文章,对等待事件这部分也有了很大的兴趣。
等待事件是一个累计的统计信息,表明一个server process要继续完成作业,必须等待一个时间的结束;因为系统资源有限,那么完成某些工作,所需资源就要轮流使用,那么在这个过程当中,就会产生等待资源的情况。数据库会用不同类型的定义,来描述这个事情,称之为等待事件。
opengauss/mogdb数据库是基于pg研发的,pg是从9.6版本加入了等待事件特性,可以通过查询pg_stat_activity中的wait_event_type和wait_event了解到每个sql进程在当前更详细的执行状态 。opengauss/mogdb在pg的基础上有许多优化及改动,把一些等待事件重新定义,等待事件在保留了部分原等待事件的基础上也增加了一部分。
在分析问题的时候,等待事件对于我们还是较为重要的,我们可以根据等待事件,初步定位问题,并结合相关测试进行验证,看到了熟悉的等待事件,我们甚至能大概猜出问题所在。相对于oracle来说,pg以及opengauss/mogdb的等待事件种类和数量较少。在等待事件这方面可能还有极大优化的空间,如果能把等待事件的细粒程度增加,应该会帮助我们更好的了解数据库状态,解决数据库问题。
一般来说产生等待事件的几种情况:
1.请求的资源忙,需要资源释放
2.会话处于空闲状态,等待任务
3.会话被阻塞,需要等待阻塞解除
以下内容对比可能根据pg版本有变化,如果有误,欢迎帮我指正交流。
在oracle 11g里,共有13类等待事件,包含了1367个等待事件,如下所示:
sys@orcl11g> select distinct wait_class from v$event_name order by 1;
wait_class
--------------------------------------
administrative --管理类
application --应用类
cluster --集群类
commit --提交类
concurrency --并发
configuration --配置
idle --空闲
network --网络
other --其他
queueing --队列
scheduler --任务调度
system i/o --系统i/o
user i/o --用户i/o
13 rows selected.
sys@orcl11g>select count(name) from v$event_name;
count(name)
-------------------
1367
而pg数据库里,有着9类等待事件
/* ----------
* wait classes
* ----------
*/
#define pg_wait_lwlock 0x01000000u /* 等待lwlock */
#define pg_wait_lock 0x03000000u /* 等待lock */
#define pg_wait_buffer_pin 0x04000000u /* 等待访问数据缓冲区 */
#define pg_wait_activity 0x05000000u /* 服务器进程处于空闲状态 */
#define pg_wait_client 0x06000000u /* 等待应用客户端程序在套接字中进行操作 */
#define pg_wait_extension 0x07000000u /* 等待扩展模块中的操作 */
#define pg_wait_ipc 0x08000000u /* 等待进程间通信 */
#define pg_wait_timeout 0x09000000u /* 等待达到超时时间 */
#define pg_wait_io 0x0a000000u /* 等待io操作完成 */
opengauss/mogdb里,有着5类等待事件
/* ----------
* wait event classes
* ----------
*/
#define wait_event_end 0x00000000u /* 等待事件结束*/
#define pg_wait_lwlock 0x01000000u /* 等待lwlock */
#define pg_wait_lock 0x03000000u /* 等待lock */
#define pg_wait_io 0x0a000000u /* 等待io操作完成 */
#define pg_wait_sql 0x0b000000u /* 等待sql的类型 */
1.wait_event_end
在类型定义里,wait_event_end更像是一种声明等待事件结束的状态,可以看到代码使用部分,在调用pgstat_report_waitevent 函数报告某个等待事件之后,进行相关处理,最后再调用一次pgstat_report_waitevent 函数报告wait_event_end,类似于声明操作结束,等待结束。
pgstat_report_waitevent 函数部分代码如下,这个函数会从服务器进程需要等待的地方调用,会报告等待事件信息,等待信息被存储作为4字节。
2.pg_wait_lock
lock类的等待事件表示backend后台进程等待重量级的锁,通常是指 relation、tuple、page、transactionid 等子类型锁 。
在pg里共有10种,
/*
* locktag is the key information needed to look up a lock item in the
* lock hashtable. a locktag value uniquely identifies a lockable object.
*
* the locktagtype enum defines the different kinds of objects we can lock.
* we can handle up to 256 different locktagtypes.
*/
typedef enum locktagtype
{
locktag_relation, /* whole relation */
locktag_relation_extend, /* the right to extend a relation */
locktag_page, /* one page of a relation */
locktag_tuple, /* one physical tuple */
locktag_transaction, /* transaction (for waiting for xact done) */
locktag_virtualtransaction, /* virtual transaction (ditto) */
locktag_speculative_token, /* speculative insertion xid and token */
locktag_object, /* non-relation database object */
locktag_userlock, /* reserved for old contrib/userlock code */
locktag_advisory /* advisory user locks */
} locktagtype;
而opengauss/mogdb的lock类等待事件增加了locktag_partition、locktag_partition_sequence、locktag_cstore_freespace、locktag_relfilenode、locktag_subtransaction,分别是分区、分区序列、cstore的空闲空间、relfilenode以及子事务的等待。
/*
* locktag is the key information needed to look up a lock item in the
* lock hashtable. a locktag value uniquely identifies a lockable object.
*
* the locktagtype enum defines the different kinds of objects we can lock.
* we can handle up to 256 different locktagtypes.
*/
typedef enum locktagtype {
locktag_relation, /* whole relation */
/* id info for a relation is db oid rel oid; db oid = 0 if shared */
locktag_relation_extend, /* the right to extend a relation */
/* same id info as relation */
locktag_partition, /*partition*/
locktag_partition_sequence, /*partition sequence*/
locktag_page, /* one page of a relation */
/* id info for a page is relation info blocknumber */
locktag_tuple, /* one physical tuple */
/* id info for a tuple is page info offsetnumber */
locktag_transaction, /* transaction (for waiting for xact done) */
/* id info for a transaction is its transactionid */
locktag_virtualtransaction, /* virtual transaction (ditto) */
/* id info for a virtual transaction is its virtualtransactionid */
locktag_object, /* non-relation database object */
/* id info for an object is db oid class oid object oid subid */
locktag_cstore_freespace, /* cstore free space */
/*
* note: object id has same representation as in pg_depend and
* pg_description, but notice that we are constraining subid to 16 bits.
* also, we use db oid = 0 for shared objects such as tablespaces.
*/
locktag_userlock, /* reserved for old contrib/userlock code */
locktag_advisory, /* advisory user locks */
/* same id info as spcoid, dboid, reloid */
locktag_relfilenode, /* relfilenode */
locktag_subtransaction, /* subtransaction (for waiting for subxact done) */
/* id info for a transaction is its transactionid subtransactionid */
lock_event_num
} locktagtype;
3.pg_wait_io
如下的io类部分为opengauss/mogdb和pg12.1对比所不具有的,可以看到pg12.1比opengauss/mogdb多了逻辑复制查询重写,reorder buffer的读写等待、时间线历史文件的同步、wal同步、wal bootstrap的同步、写等待等等。
wait_event_dsm_fill_zero_write, wait_event_lock_file_recheckdatadir_read, wait_event_logical_rewrite_checkpoint_sync, wait_event_logical_rewrite_mapping_sync, wait_event_logical_rewrite_mapping_write, wait_event_logical_rewrite_sync, wait_event_logical_rewrite_truncate, wait_event_logical_rewrite_write, wait_event_reorder_buffer_read, wait_event_reorder_buffer_write, wait_event_reorder_logical_mapping_read, wait_event_timeline_history_file_sync, wait_event_timeline_history_file_write, wait_event_timeline_history_read, wait_event_timeline_history_sync, wait_event_timeline_history_write, wait_event_walsender_timeline_history_read, wait_event_wal_bootstrap_sync, wait_event_wal_bootstrap_write, wait_event_wal_sync,
而如下部分为pg12.1不具有而opengauss/mogdb独有的,多了undo文件相关,doublerwite文件读写等等、
wait_event_buf_hash_search, wait_event_buf_strategy_get, wait_event_undo_file_extend, wait_event_undo_file_prefetch, wait_event_undo_file_read, wait_event_undo_file_write, wait_event_undo_file_flush, wait_event_undo_file_sync, wait_event_wal_buffer_access, wait_event_wal_buffer_full, wait_event_dw_read, wait_event_dw_write, wait_event_dw_single_pos, wait_event_dw_single_write, wait_event_predo_process_pending, wait_event_predo_apply, wait_event_disable_connect_file_read, wait_event_disable_connect_file_sync, wait_event_disable_connect_file_write, wait_event_mpfl_init, wait_event_mpfl_read, wait_event_mpfl_write, wait_event_obs_list, wait_event_obs_read, wait_event_obs_write, wait_event_logctrl_sleep, wait_event_compress_address_file_flush, wait_event_compress_address_file_sync,
4.pg_wait_lwlock
lwlock的等待事件主要包含两种:lwlocknamed 和lwlocktranche ,前者表示backend后台进程等待某种特定的轻量级锁 ,后者表示表示backend后台进程等待一组相关轻量级锁。
这一部分的等待事件较多,就不一一列举了,但是可以看到,opengauss/mogdb和pg12.1的lwlocknamed 类等待事件只有少部分一致,这个可能与opengauss/mogdb基于pg 9.2.4版本研发有关,等待事件重新定义了。而lwlocktranche 这部分还是有一部分是一致的,但明显opengauss/mogdb补充的等待事件数量也更加多。
5.pg_wait_sql
这一类的等待事件是opengauss/mogdb分的一类关于sql的,可以看到等待的sql类型。
/* ----------
* wait events - sql
*
* using this to indicate the type of sql dml event.
* ----------
*/
typedef enum waiteventsql {
wait_event_sql_select = pg_wait_sql,
wait_event_sql_update,
wait_event_sql_insert,
wait_event_sql_delete,
wait_event_sql_mergeinto,
wait_event_sql_ddl,
wait_event_sql_dml,
wait_event_sql_dcl,
wait_event_sql_tcl
} waiteventsql;
如下是相应的pgstat_report_wait_count 函数使用的关于这个等待事件的部分,可以看到它主要是使用pg atomic函数根据wait_event_info为用户添加sql计数。
/* using pg atomic function to add count for corresponsible waiteventsql */
if (classid == pg_wait_sql) {
waiteventsql w = (waiteventsql)wait_event_info;
switch (w) {
case wait_event_sql_select: {
update_sql_count(waitcountstatuscell->waitcountarray[dataid].wc_cnt.wc_sql_select,
waitcountstatuscell->waitcountarray[dataid].wc_cnt.selectelapse);
} break;
case wait_event_sql_update: {
update_sql_count(waitcountstatuscell->waitcountarray[dataid].wc_cnt.wc_sql_update,
waitcountstatuscell->waitcountarray[dataid].wc_cnt.updateelapse);
} break;
case wait_event_sql_insert: {
update_sql_count(waitcountstatuscell->waitcountarray[dataid].wc_cnt.wc_sql_insert,
waitcountstatuscell->waitcountarray[dataid].wc_cnt.insertelapse);
} break;
case wait_event_sql_delete: {
update_sql_count(waitcountstatuscell->waitcountarray[dataid].wc_cnt.wc_sql_delete,
waitcountstatuscell->waitcountarray[dataid].wc_cnt.deleteelapse);
} break;
case wait_event_sql_mergeinto:
pg_atomic_fetch_add_u64(&(waitcountstatuscell->waitcountarray[dataid].wc_cnt.wc_sql_mergeinto), 1);
break;
case wait_event_sql_ddl:
pg_atomic_fetch_add_u64(&(waitcountstatuscell->waitcountarray[dataid].wc_cnt.wc_sql_ddl), 1);
break;
case wait_event_sql_dml:
pg_atomic_fetch_add_u64(&(waitcountstatuscell->waitcountarray[dataid].wc_cnt.wc_sql_dml), 1);
break;
case wait_event_sql_dcl:
pg_atomic_fetch_add_u64(&(waitcountstatuscell->waitcountarray[dataid].wc_cnt.wc_sql_dcl), 1);
break;
case wait_event_sql_tcl:
pg_atomic_fetch_add_u64(&(waitcountstatuscell->waitcountarray[dataid].wc_cnt.wc_sql_tcl), 1);
break;
default:
break;
}
}
lwlockrelease(waitcounthashlock);
}