diff options
Diffstat (limited to 'src/include')
-rw-r--r-- | src/include/miscadmin.h | 2 | ||||
-rw-r--r-- | src/include/pgstat.h | 679 | ||||
-rw-r--r-- | src/include/storage/lwlock.h | 3 | ||||
-rw-r--r-- | src/include/utils/pgstat_internal.h | 663 | ||||
-rw-r--r-- | src/include/utils/rel.h | 1 | ||||
-rw-r--r-- | src/include/utils/timeout.h | 1 | ||||
-rw-r--r-- | src/include/utils/wait_event.h | 1 |
7 files changed, 702 insertions, 648 deletions
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index 9321d7f264b..66c404c666d 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -94,6 +94,7 @@ extern PGDLLIMPORT volatile sig_atomic_t IdleInTransactionSessionTimeoutPending; extern PGDLLIMPORT volatile sig_atomic_t IdleSessionTimeoutPending; extern PGDLLIMPORT volatile sig_atomic_t ProcSignalBarrierPending; extern PGDLLIMPORT volatile sig_atomic_t LogMemoryContextPending; +extern PGDLLIMPORT volatile sig_atomic_t IdleStatsUpdateTimeoutPending; extern PGDLLIMPORT volatile sig_atomic_t CheckClientConnectionPending; extern PGDLLIMPORT volatile sig_atomic_t ClientConnectionLost; @@ -333,7 +334,6 @@ typedef enum BackendType B_WAL_SENDER, B_WAL_WRITER, B_ARCHIVER, - B_STATS_COLLECTOR, B_LOGGER, } BackendType; diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 99115bacde7..1d2d3de86c9 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -14,10 +14,8 @@ #include "datatype/timestamp.h" #include "portability/instr_time.h" #include "postmaster/pgarch.h" /* for MAX_XFN_CHARS */ -#include "replication/logicalproto.h" #include "utils/backend_progress.h" /* for backward compatibility */ #include "utils/backend_status.h" /* for backward compatibility */ -#include "utils/hsearch.h" #include "utils/relcache.h" #include "utils/wait_event.h" /* for backward compatibility */ @@ -27,8 +25,8 @@ * ---------- */ #define PGSTAT_STAT_PERMANENT_DIRECTORY "pg_stat" -#define PGSTAT_STAT_PERMANENT_FILENAME "pg_stat/global.stat" -#define PGSTAT_STAT_PERMANENT_TMPFILE "pg_stat/global.tmp" +#define PGSTAT_STAT_PERMANENT_FILENAME "pg_stat/pgstat.stat" +#define PGSTAT_STAT_PERMANENT_TMPFILE "pg_stat/pgstat.tmp" /* Default directory to store temporary statistics data in */ #define PG_STAT_TMP_DIR "pg_stat_tmp" @@ -66,6 +64,13 @@ typedef enum TrackFunctionsLevel TRACK_FUNC_ALL } TrackFunctionsLevel; +typedef enum PgStat_FetchConsistency +{ + PGSTAT_FETCH_CONSISTENCY_NONE, + PGSTAT_FETCH_CONSISTENCY_CACHE, + PGSTAT_FETCH_CONSISTENCY_SNAPSHOT, +} PgStat_FetchConsistency; + /* Values to track the cause of session termination */ typedef enum SessionEndType { @@ -92,7 +97,7 @@ typedef int64 PgStat_Counter; * PgStat_FunctionCounts The actual per-function counts kept by a backend * * This struct should contain only actual event counters, because we memcmp - * it against zeroes to detect whether there are any counts to transmit. + * it against zeroes to detect whether there are any pending stats. * * Note that the time counters are in instr_time format here. We convert to * microseconds in PgStat_Counter format when flushing out pending statistics. @@ -106,12 +111,11 @@ typedef struct PgStat_FunctionCounts } PgStat_FunctionCounts; /* ---------- - * PgStat_BackendFunctionEntry Entry in backend's per-function hash table + * PgStat_BackendFunctionEntry Non-flushed function stats. * ---------- */ typedef struct PgStat_BackendFunctionEntry { - Oid f_id; PgStat_FunctionCounts f_counts; } PgStat_BackendFunctionEntry; @@ -132,12 +136,21 @@ typedef struct PgStat_FunctionCallUsage } PgStat_FunctionCallUsage; /* ---------- + * PgStat_BackendSubEntry Non-flushed subscription stats. + * ---------- + */ +typedef struct PgStat_BackendSubEntry +{ + PgStat_Counter apply_error_count; + PgStat_Counter sync_error_count; +} PgStat_BackendSubEntry; + +/* ---------- * PgStat_TableCounts The actual per-table counts kept by a backend * * This struct should contain only actual event counters, because we memcmp - * it against zeroes to detect whether there are any counts to transmit. - * It is a component of PgStat_TableStatus (within-backend state) and - * PgStat_TableEntry (the transmitted message format). + * it against zeroes to detect whether there are any stats updates to apply. + * It is a component of PgStat_TableStatus (within-backend state). * * Note: for a table, tuples_returned is the number of tuples successfully * fetched by heap_getnext, while tuples_fetched is the number of tuples @@ -194,6 +207,7 @@ typedef struct PgStat_TableStatus bool t_shared; /* is it a shared catalog? */ struct PgStat_TableXactStatus *trans; /* lowest subxact's counts */ PgStat_TableCounts t_counts; /* event counts to be sent */ + Relation relation; /* rel that is using this entry */ } PgStat_TableStatus; /* ---------- @@ -221,569 +235,14 @@ typedef struct PgStat_TableXactStatus /* ------------------------------------------------------------ - * Message formats follow - * ------------------------------------------------------------ - */ - -/* ---------- - * The types of backend -> collector messages - * ---------- - */ -typedef enum StatMsgType -{ - PGSTAT_MTYPE_DUMMY, - PGSTAT_MTYPE_INQUIRY, - PGSTAT_MTYPE_TABSTAT, - PGSTAT_MTYPE_TABPURGE, - PGSTAT_MTYPE_DROPDB, - PGSTAT_MTYPE_RESETCOUNTER, - PGSTAT_MTYPE_RESETSHAREDCOUNTER, - PGSTAT_MTYPE_RESETSINGLECOUNTER, - PGSTAT_MTYPE_RESETSLRUCOUNTER, - PGSTAT_MTYPE_RESETREPLSLOTCOUNTER, - PGSTAT_MTYPE_RESETSUBCOUNTER, - PGSTAT_MTYPE_AUTOVAC_START, - PGSTAT_MTYPE_VACUUM, - PGSTAT_MTYPE_ANALYZE, - PGSTAT_MTYPE_ARCHIVER, - PGSTAT_MTYPE_BGWRITER, - PGSTAT_MTYPE_CHECKPOINTER, - PGSTAT_MTYPE_WAL, - PGSTAT_MTYPE_SLRU, - PGSTAT_MTYPE_FUNCSTAT, - PGSTAT_MTYPE_FUNCPURGE, - PGSTAT_MTYPE_RECOVERYCONFLICT, - PGSTAT_MTYPE_TEMPFILE, - PGSTAT_MTYPE_DEADLOCK, - PGSTAT_MTYPE_CHECKSUMFAILURE, - PGSTAT_MTYPE_REPLSLOT, - PGSTAT_MTYPE_CONNECT, - PGSTAT_MTYPE_DISCONNECT, - PGSTAT_MTYPE_SUBSCRIPTIONDROP, - PGSTAT_MTYPE_SUBSCRIPTIONERROR, -} StatMsgType; - -/* ---------- - * PgStat_MsgHdr The common message header - * ---------- - */ -typedef struct PgStat_MsgHdr -{ - StatMsgType m_type; - int m_size; -} PgStat_MsgHdr; - -/* ---------- - * Space available in a message. This will keep the UDP packets below 1K, - * which should fit unfragmented into the MTU of the loopback interface. - * (Larger values of PGSTAT_MAX_MSG_SIZE would work for that on most - * platforms, but we're being conservative here.) - * ---------- - */ -#define PGSTAT_MAX_MSG_SIZE 1000 -#define PGSTAT_MSG_PAYLOAD (PGSTAT_MAX_MSG_SIZE - sizeof(PgStat_MsgHdr)) - - -/* ---------- - * PgStat_MsgDummy A dummy message, ignored by the collector - * ---------- - */ -typedef struct PgStat_MsgDummy -{ - PgStat_MsgHdr m_hdr; -} PgStat_MsgDummy; - -/* ---------- - * PgStat_MsgInquiry Sent by a backend to ask the collector - * to write the stats file(s). - * - * Ordinarily, an inquiry message prompts writing of the global stats file, - * the stats file for shared catalogs, and the stats file for the specified - * database. If databaseid is InvalidOid, only the first two are written. - * - * New file(s) will be written only if the existing file has a timestamp - * older than the specified cutoff_time; this prevents duplicated effort - * when multiple requests arrive at nearly the same time, assuming that - * backends send requests with cutoff_times a little bit in the past. - * - * clock_time should be the requestor's current local time; the collector - * uses this to check for the system clock going backward, but it has no - * effect unless that occurs. We assume clock_time >= cutoff_time, though. - * ---------- - */ -typedef struct PgStat_MsgInquiry -{ - PgStat_MsgHdr m_hdr; - TimestampTz clock_time; /* observed local clock time */ - TimestampTz cutoff_time; /* minimum acceptable file timestamp */ - Oid databaseid; /* requested DB (InvalidOid => shared only) */ -} PgStat_MsgInquiry; - -/* ---------- - * PgStat_TableEntry Per-table info in a MsgTabstat - * ---------- - */ -typedef struct PgStat_TableEntry -{ - Oid t_id; - PgStat_TableCounts t_counts; -} PgStat_TableEntry; - -/* ---------- - * PgStat_MsgTabstat Sent by the backend to report table - * and buffer access statistics. - * ---------- - */ -#define PGSTAT_NUM_TABENTRIES \ - ((PGSTAT_MSG_PAYLOAD - sizeof(Oid) - 3 * sizeof(int) - 5 * sizeof(PgStat_Counter)) \ - / sizeof(PgStat_TableEntry)) - -typedef struct PgStat_MsgTabstat -{ - PgStat_MsgHdr m_hdr; - Oid m_databaseid; - int m_nentries; - int m_xact_commit; - int m_xact_rollback; - PgStat_Counter m_block_read_time; /* times in microseconds */ - PgStat_Counter m_block_write_time; - PgStat_Counter m_session_time; - PgStat_Counter m_active_time; - PgStat_Counter m_idle_in_xact_time; - PgStat_TableEntry m_entry[PGSTAT_NUM_TABENTRIES]; -} PgStat_MsgTabstat; - -/* ---------- - * PgStat_MsgTabpurge Sent by the backend to tell the collector - * about dead tables. - * ---------- - */ -#define PGSTAT_NUM_TABPURGE \ - ((PGSTAT_MSG_PAYLOAD - sizeof(Oid) - sizeof(int)) \ - / sizeof(Oid)) - -typedef struct PgStat_MsgTabpurge -{ - PgStat_MsgHdr m_hdr; - Oid m_databaseid; - int m_nentries; - Oid m_tableid[PGSTAT_NUM_TABPURGE]; -} PgStat_MsgTabpurge; - -/* ---------- - * PgStat_MsgDropdb Sent by the backend to tell the collector - * about a dropped database - * ---------- - */ -typedef struct PgStat_MsgDropdb -{ - PgStat_MsgHdr m_hdr; - Oid m_databaseid; -} PgStat_MsgDropdb; - -/* ---------- - * PgStat_MsgResetcounter Sent by the backend to tell the collector - * to reset counters - * ---------- - */ -typedef struct PgStat_MsgResetcounter -{ - PgStat_MsgHdr m_hdr; - Oid m_databaseid; -} PgStat_MsgResetcounter; - -/* ---------- - * PgStat_MsgResetsharedcounter Sent by the backend to tell the collector - * to reset a shared counter - * ---------- - */ -typedef struct PgStat_MsgResetsharedcounter -{ - PgStat_MsgHdr m_hdr; - PgStat_Kind m_resettarget; -} PgStat_MsgResetsharedcounter; - -/* ---------- - * PgStat_MsgResetsinglecounter Sent by the backend to tell the collector - * to reset a single counter - * ---------- - */ -typedef struct PgStat_MsgResetsinglecounter -{ - PgStat_MsgHdr m_hdr; - Oid m_databaseid; - PgStat_Kind m_resettype; - Oid m_objectid; -} PgStat_MsgResetsinglecounter; - -/* ---------- - * PgStat_MsgResetslrucounter Sent by the backend to tell the collector - * to reset a SLRU counter - * ---------- - */ -typedef struct PgStat_MsgResetslrucounter -{ - PgStat_MsgHdr m_hdr; - int m_index; -} PgStat_MsgResetslrucounter; - -/* ---------- - * PgStat_MsgResetreplslotcounter Sent by the backend to tell the collector - * to reset replication slot counter(s) - * ---------- - */ -typedef struct PgStat_MsgResetreplslotcounter -{ - PgStat_MsgHdr m_hdr; - NameData m_slotname; - bool clearall; -} PgStat_MsgResetreplslotcounter; - -/* ---------- - * PgStat_MsgResetsubcounter Sent by the backend to tell the collector - * to reset subscription counter(s) - * ---------- - */ -typedef struct PgStat_MsgResetsubcounter -{ - PgStat_MsgHdr m_hdr; - Oid m_subid; /* InvalidOid means reset all subscription - * stats */ -} PgStat_MsgResetsubcounter; - -/* ---------- - * PgStat_MsgAutovacStart Sent by the autovacuum daemon to signal - * that a database is going to be processed - * ---------- - */ -typedef struct PgStat_MsgAutovacStart -{ - PgStat_MsgHdr m_hdr; - Oid m_databaseid; - TimestampTz m_start_time; -} PgStat_MsgAutovacStart; - -/* ---------- - * PgStat_MsgVacuum Sent by the backend or autovacuum daemon - * after VACUUM - * ---------- - */ -typedef struct PgStat_MsgVacuum -{ - PgStat_MsgHdr m_hdr; - Oid m_databaseid; - Oid m_tableoid; - bool m_autovacuum; - TimestampTz m_vacuumtime; - PgStat_Counter m_live_tuples; - PgStat_Counter m_dead_tuples; -} PgStat_MsgVacuum; - -/* ---------- - * PgStat_MsgAnalyze Sent by the backend or autovacuum daemon - * after ANALYZE - * ---------- - */ -typedef struct PgStat_MsgAnalyze -{ - PgStat_MsgHdr m_hdr; - Oid m_databaseid; - Oid m_tableoid; - bool m_autovacuum; - bool m_resetcounter; - TimestampTz m_analyzetime; - PgStat_Counter m_live_tuples; - PgStat_Counter m_dead_tuples; -} PgStat_MsgAnalyze; - -/* ---------- - * PgStat_MsgArchiver Sent by the archiver to update statistics. - * ---------- - */ -typedef struct PgStat_MsgArchiver -{ - PgStat_MsgHdr m_hdr; - bool m_failed; /* Failed attempt */ - char m_xlog[MAX_XFN_CHARS + 1]; - TimestampTz m_timestamp; -} PgStat_MsgArchiver; - -/* ---------- - * PgStat_MsgBgWriter Sent by the bgwriter to update statistics. - * ---------- - */ -typedef struct PgStat_MsgBgWriter -{ - PgStat_MsgHdr m_hdr; - - PgStat_Counter m_buf_written_clean; - PgStat_Counter m_maxwritten_clean; - PgStat_Counter m_buf_alloc; -} PgStat_MsgBgWriter; - -/* ---------- - * PgStat_MsgCheckpointer Sent by the checkpointer to update statistics. - * ---------- - */ -typedef struct PgStat_MsgCheckpointer -{ - PgStat_MsgHdr m_hdr; - - PgStat_Counter m_timed_checkpoints; - PgStat_Counter m_requested_checkpoints; - PgStat_Counter m_buf_written_checkpoints; - PgStat_Counter m_buf_written_backend; - PgStat_Counter m_buf_fsync_backend; - PgStat_Counter m_checkpoint_write_time; /* times in milliseconds */ - PgStat_Counter m_checkpoint_sync_time; -} PgStat_MsgCheckpointer; - -/* ---------- - * PgStat_MsgWal Sent by backends and background processes to update WAL statistics. - * ---------- - */ -typedef struct PgStat_MsgWal -{ - PgStat_MsgHdr m_hdr; - PgStat_Counter m_wal_records; - PgStat_Counter m_wal_fpi; - uint64 m_wal_bytes; - PgStat_Counter m_wal_buffers_full; - PgStat_Counter m_wal_write; - PgStat_Counter m_wal_sync; - PgStat_Counter m_wal_write_time; /* time spent writing wal records in - * microseconds */ - PgStat_Counter m_wal_sync_time; /* time spent syncing wal records in - * microseconds */ -} PgStat_MsgWal; - -/* ---------- - * PgStat_MsgSLRU Sent by a backend to update SLRU statistics. - * ---------- - */ -typedef struct PgStat_MsgSLRU -{ - PgStat_MsgHdr m_hdr; - PgStat_Counter m_index; - PgStat_Counter m_blocks_zeroed; - PgStat_Counter m_blocks_hit; - PgStat_Counter m_blocks_read; - PgStat_Counter m_blocks_written; - PgStat_Counter m_blocks_exists; - PgStat_Counter m_flush; - PgStat_Counter m_truncate; -} PgStat_MsgSLRU; - -/* ---------- - * PgStat_MsgReplSlot Sent by a backend or a wal sender to update replication - * slot statistics. - * ---------- - */ -typedef struct PgStat_MsgReplSlot -{ - PgStat_MsgHdr m_hdr; - NameData m_slotname; - bool m_create; - bool m_drop; - PgStat_Counter m_spill_txns; - PgStat_Counter m_spill_count; - PgStat_Counter m_spill_bytes; - PgStat_Counter m_stream_txns; - PgStat_Counter m_stream_count; - PgStat_Counter m_stream_bytes; - PgStat_Counter m_total_txns; - PgStat_Counter m_total_bytes; -} PgStat_MsgReplSlot; - -/* ---------- - * PgStat_MsgSubscriptionDrop Sent by the backend and autovacuum to tell the - * collector about the dead subscription. - * ---------- - */ -typedef struct PgStat_MsgSubscriptionDrop -{ - PgStat_MsgHdr m_hdr; - Oid m_subid; -} PgStat_MsgSubscriptionDrop; - -/* ---------- - * PgStat_MsgSubscriptionError Sent by the apply worker or the table sync - * worker to report an error on the subscription. - * ---------- - */ -typedef struct PgStat_MsgSubscriptionError -{ - PgStat_MsgHdr m_hdr; - - Oid m_subid; - bool m_is_apply_error; -} PgStat_MsgSubscriptionError; - -/* ---------- - * PgStat_MsgRecoveryConflict Sent by the backend upon recovery conflict - * ---------- - */ -typedef struct PgStat_MsgRecoveryConflict -{ - PgStat_MsgHdr m_hdr; - - Oid m_databaseid; - int m_reason; -} PgStat_MsgRecoveryConflict; - -/* ---------- - * PgStat_MsgTempFile Sent by the backend upon creating a temp file - * ---------- - */ -typedef struct PgStat_MsgTempFile -{ - PgStat_MsgHdr m_hdr; - - Oid m_databaseid; - size_t m_filesize; -} PgStat_MsgTempFile; - -/* ---------- - * PgStat_FunctionEntry Per-function info in a MsgFuncstat - * ---------- - */ -typedef struct PgStat_FunctionEntry -{ - Oid f_id; - PgStat_Counter f_numcalls; - PgStat_Counter f_total_time; /* times in microseconds */ - PgStat_Counter f_self_time; -} PgStat_FunctionEntry; - -/* ---------- - * PgStat_MsgFuncstat Sent by the backend to report function - * usage statistics. - * ---------- - */ -#define PGSTAT_NUM_FUNCENTRIES \ - ((PGSTAT_MSG_PAYLOAD - sizeof(Oid) - sizeof(int)) \ - / sizeof(PgStat_FunctionEntry)) - -typedef struct PgStat_MsgFuncstat -{ - PgStat_MsgHdr m_hdr; - Oid m_databaseid; - int m_nentries; - PgStat_FunctionEntry m_entry[PGSTAT_NUM_FUNCENTRIES]; -} PgStat_MsgFuncstat; - -/* ---------- - * PgStat_MsgFuncpurge Sent by the backend to tell the collector - * about dead functions. - * ---------- - */ -#define PGSTAT_NUM_FUNCPURGE \ - ((PGSTAT_MSG_PAYLOAD - sizeof(Oid) - sizeof(int)) \ - / sizeof(Oid)) - -typedef struct PgStat_MsgFuncpurge -{ - PgStat_MsgHdr m_hdr; - Oid m_databaseid; - int m_nentries; - Oid m_functionid[PGSTAT_NUM_FUNCPURGE]; -} PgStat_MsgFuncpurge; - -/* ---------- - * PgStat_MsgDeadlock Sent by the backend to tell the collector - * about a deadlock that occurred. - * ---------- - */ -typedef struct PgStat_MsgDeadlock -{ - PgStat_MsgHdr m_hdr; - Oid m_databaseid; -} PgStat_MsgDeadlock; - -/* ---------- - * PgStat_MsgChecksumFailure Sent by the backend to tell the collector - * about checksum failures noticed. - * ---------- - */ -typedef struct PgStat_MsgChecksumFailure -{ - PgStat_MsgHdr m_hdr; - Oid m_databaseid; - int m_failurecount; - TimestampTz m_failure_time; -} PgStat_MsgChecksumFailure; - -/* ---------- - * PgStat_MsgConnect Sent by the backend upon connection - * establishment - * ---------- - */ -typedef struct PgStat_MsgConnect -{ - PgStat_MsgHdr m_hdr; - Oid m_databaseid; -} PgStat_MsgConnect; - -/* ---------- - * PgStat_MsgDisconnect Sent by the backend when disconnecting - * ---------- - */ -typedef struct PgStat_MsgDisconnect -{ - PgStat_MsgHdr m_hdr; - Oid m_databaseid; - SessionEndType m_cause; -} PgStat_MsgDisconnect; - -/* ---------- - * PgStat_Msg Union over all possible messages. - * ---------- - */ -typedef union PgStat_Msg -{ - PgStat_MsgHdr msg_hdr; - PgStat_MsgDummy msg_dummy; - PgStat_MsgInquiry msg_inquiry; - PgStat_MsgTabstat msg_tabstat; - PgStat_MsgTabpurge msg_tabpurge; - PgStat_MsgDropdb msg_dropdb; - PgStat_MsgResetcounter msg_resetcounter; - PgStat_MsgResetsharedcounter msg_resetsharedcounter; - PgStat_MsgResetsinglecounter msg_resetsinglecounter; - PgStat_MsgResetslrucounter msg_resetslrucounter; - PgStat_MsgResetreplslotcounter msg_resetreplslotcounter; - PgStat_MsgResetsubcounter msg_resetsubcounter; - PgStat_MsgAutovacStart msg_autovacuum_start; - PgStat_MsgVacuum msg_vacuum; - PgStat_MsgAnalyze msg_analyze; - PgStat_MsgArchiver msg_archiver; - PgStat_MsgBgWriter msg_bgwriter; - PgStat_MsgCheckpointer msg_checkpointer; - PgStat_MsgWal msg_wal; - PgStat_MsgSLRU msg_slru; - PgStat_MsgFuncstat msg_funcstat; - PgStat_MsgFuncpurge msg_funcpurge; - PgStat_MsgRecoveryConflict msg_recoveryconflict; - PgStat_MsgDeadlock msg_deadlock; - PgStat_MsgTempFile msg_tempfile; - PgStat_MsgChecksumFailure msg_checksumfailure; - PgStat_MsgReplSlot msg_replslot; - PgStat_MsgConnect msg_connect; - PgStat_MsgDisconnect msg_disconnect; - PgStat_MsgSubscriptionError msg_subscriptionerror; - PgStat_MsgSubscriptionDrop msg_subscriptiondrop; -} PgStat_Msg; - - -/* ------------------------------------------------------------ - * Statistic collector data structures follow + * Data structures on disk and in shared memory follow * * PGSTAT_FILE_FORMAT_ID should be changed whenever any of these * data structures change. * ------------------------------------------------------------ */ -#define PGSTAT_FILE_FORMAT_ID 0x01A5BCA6 +#define PGSTAT_FILE_FORMAT_ID 0x01A5BCA7 typedef struct PgStat_ArchiverStats { @@ -808,7 +267,6 @@ typedef struct PgStat_BgWriterStats typedef struct PgStat_CheckpointerStats { - TimestampTz stats_timestamp; /* time of stats file update */ PgStat_Counter timed_checkpoints; PgStat_Counter requested_checkpoints; PgStat_Counter checkpoint_write_time; /* times in milliseconds */ @@ -820,7 +278,6 @@ typedef struct PgStat_CheckpointerStats typedef struct PgStat_StatDBEntry { - Oid databaseid; PgStat_Counter n_xact_commit; PgStat_Counter n_xact_rollback; PgStat_Counter n_blocks_fetched; @@ -852,34 +309,16 @@ typedef struct PgStat_StatDBEntry PgStat_Counter n_sessions_killed; TimestampTz stat_reset_timestamp; - TimestampTz stats_timestamp; /* time of db stats file update */ - - /* - * tables and functions must be last in the struct, because we don't write - * the pointers out to the stats file. - */ - HTAB *tables; - HTAB *functions; } PgStat_StatDBEntry; typedef struct PgStat_StatFuncEntry { - Oid functionid; - PgStat_Counter f_numcalls; PgStat_Counter f_total_time; /* times in microseconds */ PgStat_Counter f_self_time; } PgStat_StatFuncEntry; -typedef struct PgStat_GlobalStats -{ - TimestampTz stats_timestamp; /* time of stats file update */ - - PgStat_CheckpointerStats checkpointer; - PgStat_BgWriterStats bgwriter; -} PgStat_GlobalStats; - typedef struct PgStat_StatReplSlotEntry { NameData slotname; @@ -908,8 +347,6 @@ typedef struct PgStat_SLRUStats typedef struct PgStat_StatSubEntry { - Oid subid; /* hash key (must be first) */ - PgStat_Counter apply_error_count; PgStat_Counter sync_error_count; TimestampTz stat_reset_timestamp; @@ -917,8 +354,6 @@ typedef struct PgStat_StatSubEntry typedef struct PgStat_StatTabEntry { - Oid tableid; - PgStat_Counter numscans; PgStat_Counter tuples_returned; @@ -966,22 +401,19 @@ typedef struct PgStat_WalStats */ /* functions called from postmaster */ -extern void pgstat_init(void); -extern void pgstat_reset_all(void); -extern int pgstat_start(void); -extern void allow_immediate_pgstat_restart(void); +extern Size StatsShmemSize(void); +extern void StatsShmemInit(void); -#ifdef EXEC_BACKEND -extern void PgstatCollectorMain(int argc, char *argv[]) pg_attribute_noreturn(); -#endif +/* Functions called during server startup / shutdown */ +extern void pgstat_restore_stats(void); +extern void pgstat_discard_stats(void); +extern void pgstat_before_server_shutdown(int code, Datum arg); /* Functions for backend initialization */ extern void pgstat_initialize(void); /* Functions called from backends */ -extern void pgstat_report_stat(bool force); -extern void pgstat_vacuum_stat(void); -extern void pgstat_ping(void); +extern long pgstat_report_stat(bool force); extern void pgstat_reset_counters(void); extern void pgstat_reset(PgStat_Kind kind, Oid dboid, Oid objectid); @@ -989,24 +421,17 @@ extern void pgstat_reset_of_kind(PgStat_Kind kind); /* stats accessors */ extern void pgstat_clear_snapshot(void); -extern PgStat_ArchiverStats *pgstat_fetch_stat_archiver(void); -extern PgStat_BgWriterStats *pgstat_fetch_stat_bgwriter(void); -extern PgStat_CheckpointerStats *pgstat_fetch_stat_checkpointer(void); -extern PgStat_StatDBEntry *pgstat_fetch_stat_dbentry(Oid dbid); -extern PgStat_StatFuncEntry *pgstat_fetch_stat_funcentry(Oid funcid); -extern PgStat_GlobalStats *pgstat_fetch_global(void); -extern PgStat_StatReplSlotEntry *pgstat_fetch_replslot(NameData slotname); -extern PgStat_StatSubEntry *pgstat_fetch_stat_subscription(Oid subid); -extern PgStat_SLRUStats *pgstat_fetch_slru(void); -extern PgStat_StatTabEntry *pgstat_fetch_stat_tabentry(Oid relid); -extern PgStat_WalStats *pgstat_fetch_stat_wal(void); +extern TimestampTz pgstat_get_stat_snapshot_timestamp(bool *have_snapshot); +/* helpers */ +extern PgStat_Kind pgstat_get_kind_from_str(char *kind_str); /* * Functions in pgstat_archiver.c */ extern void pgstat_report_archiver(const char *xlog, bool failed); +extern PgStat_ArchiverStats *pgstat_fetch_stat_archiver(void); /* @@ -1014,6 +439,7 @@ extern void pgstat_report_archiver(const char *xlog, bool failed); */ extern void pgstat_report_bgwriter(void); +extern PgStat_BgWriterStats *pgstat_fetch_stat_bgwriter(void); /* @@ -1021,6 +447,7 @@ extern void pgstat_report_bgwriter(void); */ extern void pgstat_report_checkpointer(void); +extern PgStat_CheckpointerStats *pgstat_fetch_stat_checkpointer(void); /* @@ -1044,6 +471,7 @@ extern void pgstat_report_connect(Oid dboid); #define pgstat_count_conn_txn_idle_time(n) \ (pgStatTransactionIdleTime += (n)) +extern PgStat_StatDBEntry *pgstat_fetch_stat_dbentry(Oid dbid); /* * Functions in pgstat_function.c @@ -1058,6 +486,7 @@ extern void pgstat_init_function_usage(struct FunctionCallInfoBaseData *fcinfo, extern void pgstat_end_function_usage(PgStat_FunctionCallUsage *fcu, bool finalize); +extern PgStat_StatFuncEntry *pgstat_fetch_stat_funcentry(Oid funcid); extern PgStat_BackendFunctionEntry *find_funcstat_entry(Oid func_id); @@ -1070,6 +499,8 @@ extern void pgstat_drop_relation(Relation rel); extern void pgstat_copy_relation_stats(Relation dstrel, Relation srcrel); extern void pgstat_init_relation(Relation rel); +extern void pgstat_assoc_relation(Relation rel); +extern void pgstat_unlink_relation(Relation rel); extern void pgstat_report_vacuum(Oid tableoid, bool shared, PgStat_Counter livetuples, PgStat_Counter deadtuples); @@ -1077,8 +508,14 @@ extern void pgstat_report_analyze(Relation rel, PgStat_Counter livetuples, PgStat_Counter deadtuples, bool resetcounter); +/* + * If stats are enabled, but pending data hasn't been prepared yet, call + * pgstat_assoc_relation() to do so. See its comment for why this is done + * separately from pgstat_init_relation(). + */ #define pgstat_should_count_relation(rel) \ - (likely((rel)->pgstat_info != NULL)) + (likely((rel)->pgstat_info != NULL) ? true : \ + ((rel)->pgstat_enabled ? pgstat_assoc_relation(rel), true : false)) /* nontransactional event counts are simple enough to inline */ @@ -1129,6 +566,9 @@ extern void pgstat_twophase_postcommit(TransactionId xid, uint16 info, extern void pgstat_twophase_postabort(TransactionId xid, uint16 info, void *recdata, uint32 len); +extern PgStat_StatTabEntry *pgstat_fetch_stat_tabentry(Oid relid); +extern PgStat_StatTabEntry *pgstat_fetch_stat_tabentry_ext(bool shared, + Oid relid); extern PgStat_TableStatus *find_tabstat_entry(Oid rel_id); @@ -1140,7 +580,9 @@ extern void pgstat_reset_replslot(const char *name); struct ReplicationSlot; extern void pgstat_report_replslot(struct ReplicationSlot *slot, const PgStat_StatReplSlotEntry *repSlotStat); extern void pgstat_create_replslot(struct ReplicationSlot *slot); +extern void pgstat_acquire_replslot(struct ReplicationSlot *slot); extern void pgstat_drop_replslot(struct ReplicationSlot *slot); +extern PgStat_StatReplSlotEntry *pgstat_fetch_replslot(NameData slotname); /* @@ -1157,6 +599,7 @@ extern void pgstat_count_slru_flush(int slru_idx); extern void pgstat_count_slru_truncate(int slru_idx); extern const char *pgstat_get_slru_name(int slru_idx); extern int pgstat_get_slru_index(const char *name); +extern PgStat_SLRUStats *pgstat_fetch_slru(void); /* @@ -1166,6 +609,7 @@ extern int pgstat_get_slru_index(const char *name); extern void pgstat_report_subscription_error(Oid subid, bool is_apply_error); extern void pgstat_create_subscription(Oid subid); extern void pgstat_drop_subscription(Oid subid); +extern PgStat_StatSubEntry *pgstat_fetch_stat_subscription(Oid subid); /* @@ -1186,6 +630,7 @@ extern void pgstat_execute_transactional_drops(int ndrops, struct xl_xact_stats_ */ extern void pgstat_report_wal(bool force); +extern PgStat_WalStats *pgstat_fetch_stat_wal(void); /* @@ -1195,6 +640,8 @@ extern void pgstat_report_wal(bool force); /* GUC parameters */ extern PGDLLIMPORT bool pgstat_track_counts; extern PGDLLIMPORT int pgstat_track_functions; +extern PGDLLIMPORT int pgstat_fetch_consistency; + extern char *pgstat_stat_directory; extern char *pgstat_stat_tmpname; extern char *pgstat_stat_filename; @@ -1205,7 +652,7 @@ extern char *pgstat_stat_filename; */ /* updated directly by bgwriter and bufmgr */ -extern PgStat_MsgBgWriter PendingBgWriterStats; +extern PgStat_BgWriterStats PendingBgWriterStats; /* @@ -1216,7 +663,7 @@ extern PgStat_MsgBgWriter PendingBgWriterStats; * Checkpointer statistics counters are updated directly by checkpointer and * bufmgr. */ -extern PgStat_MsgCheckpointer PendingCheckpointerStats; +extern PgStat_CheckpointerStats PendingCheckpointerStats; /* @@ -1243,7 +690,7 @@ extern SessionEndType pgStatSessionEndCause; */ /* updated directly by backends and background processes */ -extern PgStat_MsgWal WalStats; +extern PgStat_WalStats PendingWalStats; #endif /* PGSTAT_H */ diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h index c3d5889d7b2..33eb4c10339 100644 --- a/src/include/storage/lwlock.h +++ b/src/include/storage/lwlock.h @@ -190,6 +190,9 @@ typedef enum BuiltinTrancheIds LWTRANCHE_SHARED_TIDBITMAP, LWTRANCHE_PARALLEL_APPEND, LWTRANCHE_PER_XACT_PREDICATE_LIST, + LWTRANCHE_PGSTATS_DSA, + LWTRANCHE_PGSTATS_HASH, + LWTRANCHE_PGSTATS_DATA, LWTRANCHE_FIRST_USER_DEFINED } BuiltinTrancheIds; diff --git a/src/include/utils/pgstat_internal.h b/src/include/utils/pgstat_internal.h index c3f83c74c62..ab27bc47c5e 100644 --- a/src/include/utils/pgstat_internal.h +++ b/src/include/utils/pgstat_internal.h @@ -14,21 +14,134 @@ #define PGSTAT_INTERNAL_H +#include "common/hashfn.h" +#include "lib/dshash.h" +#include "lib/ilist.h" #include "pgstat.h" +#include "storage/lwlock.h" +#include "utils/dsa.h" -#define PGSTAT_STAT_INTERVAL 500 /* Minimum time between stats file - * updates; in milliseconds. */ +/* + * Types related to shared memory storage of statistics. + * + * Per-object statistics are stored in the "shared stats" hashtable. That + * table's entries (PgStatShared_HashEntry) contain a pointer to the actual stats + * data for the object (the size of the stats data varies depending on the + * kind of stats). The table is keyed by PgStat_HashKey. + * + * Once a backend has a reference to a shared stats entry, it increments the + * entry's refcount. Even after stats data is dropped (e.g., due to a DROP + * TABLE), the entry itself can only be deleted once all references have been + * released. + * + * These refcounts, in combination with a backend local hashtable + * (pgStatEntryRefHash, with entries pointing to PgStat_EntryRef) in front of + * the shared hash table, mean that most stats work can happen without + * touching the shared hash table, reducing contention. + * + * Once there are pending stats updates for a table PgStat_EntryRef->pending + * is allocated to contain a working space for as-of-yet-unapplied stats + * updates. Once the stats are flushed, PgStat_EntryRef->pending is freed. + * + * Each stat kind in the shared hash table has a fixed member + * PgStatShared_Common as the first element. + */ -/* ---------- - * The initial size hints for the hash tables used in the collector. - * ---------- +/* struct for shared statistics hash entry key. */ +typedef struct PgStat_HashKey +{ + PgStat_Kind kind; /* statistics entry kind */ + Oid dboid; /* database ID. InvalidOid for shared objects. */ + Oid objoid; /* object ID, either table or function. */ +} PgStat_HashKey; + +/* + * Shared statistics hash entry. Doesn't itself contain any stats, but points + * to them (with ->body). That allows the stats entries themselves to be of + * variable size. */ -#define PGSTAT_DB_HASH_SIZE 16 -#define PGSTAT_TAB_HASH_SIZE 512 -#define PGSTAT_FUNCTION_HASH_SIZE 512 -#define PGSTAT_SUBSCRIPTION_HASH_SIZE 32 -#define PGSTAT_REPLSLOT_HASH_SIZE 32 +typedef struct PgStatShared_HashEntry +{ + PgStat_HashKey key; /* hash key */ + + /* + * If dropped is set, backends need to release their references so that + * the memory for the entry can be freed. No new references may be made + * once marked as dropped. + */ + bool dropped; + + /* + * Refcount managing lifetime of the entry itself (as opposed to the + * dshash entry pointing to it). The stats lifetime has to be separate + * from the hash table entry lifetime because we allow backends to point + * to a stats entry without holding a hash table lock (and some other + * reasons). + * + * As long as the entry is not dropped, 1 is added to the refcount + * representing that the entry should not be dropped. In addition each + * backend that has a reference to the entry needs to increment the + * refcount as long as it does. + * + * May only be incremented / decremented while holding at least a shared + * lock on the dshash partition containing the entry. It needs to be an + * atomic variable because multiple backends can increment the refcount + * with just a shared lock. + * + * When the refcount reaches 0 the entry needs to be freed. + */ + pg_atomic_uint32 refcount; + + /* + * Pointer to shared stats. The stats entry always starts with + * PgStatShared_Common, embedded in a larger struct containing the + * PgStat_Kind specific stats fields. + */ + dsa_pointer body; +} PgStatShared_HashEntry; + +/* + * Common header struct for PgStatShm_Stat*Entry. + */ +typedef struct PgStatShared_Common +{ + uint32 magic; /* just a validity cross-check */ + /* lock protecting stats contents (i.e. data following the header) */ + LWLock lock; +} PgStatShared_Common; + +/* + * A backend local reference to a shared stats entry. As long as at least one + * such reference exists, the shared stats entry will not be released. + * + * If there are pending stats update to the shared stats, these are stored in + * ->pending. + */ +typedef struct PgStat_EntryRef +{ + /* + * Pointer to the PgStatShared_HashEntry entry in the shared stats + * hashtable. + */ + PgStatShared_HashEntry *shared_entry; + + /* + * Pointer to the stats data (i.e. PgStatShared_HashEntry->body), resolved + * as a local pointer, to avoid repeated dsa_get_address() calls. + */ + PgStatShared_Common *shared_stats; + + /* + * Pending statistics data that will need to be flushed to shared memory + * stats eventually. Each stats kind utilizing pending data defines what + * format its pending data has and needs to provide a + * PgStat_KindInfo->flush_pending_cb callback to merge pending into shared + * stats. + */ + void *pending; + dlist_node pending_node; /* membership in pgStatPending list */ +} PgStat_EntryRef; /* @@ -43,11 +156,11 @@ typedef struct PgStat_SubXactStatus struct PgStat_SubXactStatus *prev; /* higher-level subxact if any */ /* - * Dropping the statistics for objects that dropped transactionally itself - * needs to be transactional. Therefore we collect the stats dropped in - * the current (sub-)transaction and only execute the stats drop when we - * know if the transaction commits/aborts. To handle replicas and crashes, - * stats drops are included in commit records. + * Statistics for transactionally dropped objects need to be + * transactionally dropped as well. Collect the stats dropped in the + * current (sub-)transaction and only execute the stats drop when we know + * if the transaction commits/aborts. To handle replicas and crashes, + * stats drops are included in commit / abort records. */ dlist_head pending_drops; int pending_drops_count; @@ -65,9 +178,95 @@ typedef struct PgStat_SubXactStatus /* + * Metadata for a specific kind of statistics. + */ +typedef struct PgStat_KindInfo +{ + /* + * Do a fixed number of stats objects exist for this kind of stats (e.g. + * bgwriter stats) or not (e.g. tables). + */ + bool fixed_amount:1; + + /* + * Can stats of this kind be accessed from another database? Determines + * whether a stats object gets included in stats snapshots. + */ + bool accessed_across_databases:1; + + /* + * For variable-numbered stats: Identified on-disk using a name, rather + * than PgStat_HashKey. Probably only needed for replication slot stats. + */ + bool named_on_disk:1; + + /* + * The size of an entry in the shared stats hash table (pointed to by + * PgStatShared_HashEntry->body). + */ + uint32 shared_size; + + /* + * The offset/size of statistics inside the shared stats entry. Used when + * [de-]serializing statistics to / from disk respectively. Separate from + * shared_size because [de-]serialization may not include in-memory state + * like lwlocks. + */ + uint32 shared_data_off; + uint32 shared_data_len; + + /* + * The size of the pending data for this kind. E.g. how large + * PgStat_EntryRef->pending is. Used for allocations. + * + * 0 signals that an entry of this kind should never have a pending entry. + */ + uint32 pending_size; + + /* + * For variable-numbered stats: flush pending stats. Required if pending + * data is used. + */ + bool (*flush_pending_cb) (PgStat_EntryRef *sr, bool nowait); + + /* + * For variable-numbered stats: delete pending stats. Optional. + */ + void (*delete_pending_cb) (PgStat_EntryRef *sr); + + /* + * For variable-numbered stats: reset the reset timestamp. Optional. + */ + void (*reset_timestamp_cb) (PgStatShared_Common *header, TimestampTz ts); + + /* + * For variable-numbered stats with named_on_disk. Optional. + */ + void (*to_serialized_name) (const PgStatShared_Common *header, NameData *name); + bool (*from_serialized_name) (const NameData *name, PgStat_HashKey *key); + + /* + * For fixed-numbered statistics: Reset All. + */ + void (*reset_all_cb) (TimestampTz ts); + + /* + * For fixed-numbered statistics: Build snapshot for entry + */ + void (*snapshot_cb) (void); + + /* name of the kind of stats */ + const char *const name; +} PgStat_KindInfo; + + +/* * List of SLRU names that we keep stats for. There is no central registry of * SLRUs, so we use this fixed list instead. The "other" entry is used for * all SLRUs without an explicit entry (e.g. SLRUs in extensions). + * + * This is only defined here so that SLRU_NUM_ELEMENTS is known for later type + * definitions. */ static const char *const slru_names[] = { "CommitTs", @@ -83,33 +282,271 @@ static const char *const slru_names[] = { #define SLRU_NUM_ELEMENTS lengthof(slru_names) +/* ---------- + * Types and definitions for different kinds of fixed-amount stats. + * + * Single-writer stats use the changecount mechanism to achieve low-overhead + * writes - they're obviously more performance critical than reads. Check the + * definition of struct PgBackendStatus for some explanation of the + * changecount mechanism. + * + * Because the obvious implementation of resetting single-writer stats isn't + * compatible with that (another backend needs to write), we don't scribble on + * shared stats while resetting. Instead, just record the current counter + * values in a copy of the stats data, which is protected by ->lock. See + * pgstat_fetch_stat_(archiver|bgwriter|checkpointer) for the reader side. + * + * The only exception to that is the the stat_reset_timestamp in these + * structs, which is protected by ->lock, because it has to be written by + * another backend while resetting + * ---------- + */ + +typedef struct PgStatShared_Archiver +{ + /* lock protects ->reset_offset as well as stats->stat_reset_timestamp */ + LWLock lock; + uint32 changecount; + PgStat_ArchiverStats stats; + PgStat_ArchiverStats reset_offset; +} PgStatShared_Archiver; + +typedef struct PgStatShared_BgWriter +{ + /* lock protects ->reset_offset as well as stats->stat_reset_timestamp */ + LWLock lock; + uint32 changecount; + PgStat_BgWriterStats stats; + PgStat_BgWriterStats reset_offset; +} PgStatShared_BgWriter; + +typedef struct PgStatShared_Checkpointer +{ + /* lock protects ->reset_offset as well as stats->stat_reset_timestamp */ + LWLock lock; + uint32 changecount; + PgStat_CheckpointerStats stats; + PgStat_CheckpointerStats reset_offset; +} PgStatShared_Checkpointer; + +typedef struct PgStatShared_SLRU +{ + /* lock protects ->stats */ + LWLock lock; + PgStat_SLRUStats stats[SLRU_NUM_ELEMENTS]; +} PgStatShared_SLRU; + +typedef struct PgStatShared_Wal +{ + /* lock protects ->stats */ + LWLock lock; + PgStat_WalStats stats; +} PgStatShared_Wal; + + + +/* ---------- + * Types and definitions for different kinds of variable-amount stats. + * + * Each struct has to start with PgStatShared_Common, containing information + * common across the different types of stats. Kind-specific data follows. + * ---------- + */ + +typedef struct PgStatShared_Database +{ + PgStatShared_Common header; + PgStat_StatDBEntry stats; +} PgStatShared_Database; + +typedef struct PgStatShared_Relation +{ + PgStatShared_Common header; + PgStat_StatTabEntry stats; +} PgStatShared_Relation; + +typedef struct PgStatShared_Function +{ + PgStatShared_Common header; + PgStat_StatFuncEntry stats; +} PgStatShared_Function; + +typedef struct PgStatShared_Subscription +{ + PgStatShared_Common header; + PgStat_StatSubEntry stats; +} PgStatShared_Subscription; + +typedef struct PgStatShared_ReplSlot +{ + PgStatShared_Common header; + PgStat_StatReplSlotEntry stats; +} PgStatShared_ReplSlot; + + +/* + * Central shared memory entry for the cumulative stats system. + * + * Fixed amount stats, the dynamic shared memory hash table for + * non-fixed-amount stats, as well as remaining bits and pieces are all + * reached from here. + */ +typedef struct PgStat_ShmemControl +{ + void *raw_dsa_area; + + /* + * Stats for variable-numbered objects are kept in this shared hash table. + * See comment above PgStat_Kind for details. + */ + dshash_table_handle hash_handle; /* shared dbstat hash */ + + /* Has the stats system already been shut down? Just a debugging check. */ + bool is_shutdown; + + /* + * Whenever statistics for dropped objects could not be freed - because + * backends still have references - the dropping backend calls + * pgstat_request_entry_refs_gc() incrementing this counter. Eventually + * that causes backends to run pgstat_gc_entry_refs(), allowing memory to + * be reclaimed. + */ + pg_atomic_uint64 gc_request_count; + + /* + * Stats data for fixed-numbered objects. + */ + PgStatShared_Archiver archiver; + PgStatShared_BgWriter bgwriter; + PgStatShared_Checkpointer checkpointer; + PgStatShared_SLRU slru; + PgStatShared_Wal wal; +} PgStat_ShmemControl; + + +/* + * Cached statistics snapshot + */ +typedef struct PgStat_Snapshot +{ + PgStat_FetchConsistency mode; + + /* time at which snapshot was taken */ + TimestampTz snapshot_timestamp; + + bool fixed_valid[PGSTAT_NUM_KINDS]; + + PgStat_ArchiverStats archiver; + + PgStat_BgWriterStats bgwriter; + + PgStat_CheckpointerStats checkpointer; + + PgStat_SLRUStats slru[SLRU_NUM_ELEMENTS]; + + PgStat_WalStats wal; + + /* to free snapshot in bulk */ + MemoryContext context; + struct pgstat_snapshot_hash *stats; +} PgStat_Snapshot; + + +/* + * Collection of backend-local stats state. + */ +typedef struct PgStat_LocalState +{ + PgStat_ShmemControl *shmem; + dsa_area *dsa; + dshash_table *shared_hash; + + /* the current statistics snapshot */ + PgStat_Snapshot snapshot; +} PgStat_LocalState; + + +/* + * Inline functions defined further below. + */ + +static inline void pgstat_begin_changecount_write(uint32 *cc); +static inline void pgstat_end_changecount_write(uint32 *cc); +static inline uint32 pgstat_begin_changecount_read(uint32 *cc); +static inline bool pgstat_end_changecount_read(uint32 *cc, uint32 cc_before); + +static inline void pgstat_copy_changecounted_stats(void *dst, void *src, size_t len, + uint32 *cc); + +static inline int pgstat_cmp_hash_key(const void *a, const void *b, size_t size, void *arg); +static inline uint32 pgstat_hash_hash_key(const void *d, size_t size, void *arg); +static inline size_t pgstat_get_entry_len(PgStat_Kind kind); +static inline void *pgstat_get_entry_data(PgStat_Kind kind, PgStatShared_Common *entry); + + /* * Functions in pgstat.c */ -extern void pgstat_setheader(PgStat_MsgHdr *hdr, StatMsgType mtype); -extern void pgstat_send(void *msg, int len); +const PgStat_KindInfo *pgstat_get_kind_info(PgStat_Kind kind); + #ifdef USE_ASSERT_CHECKING extern void pgstat_assert_is_up(void); #else #define pgstat_assert_is_up() ((void)true) #endif +extern void pgstat_delete_pending_entry(PgStat_EntryRef *entry_ref); +extern PgStat_EntryRef *pgstat_prep_pending_entry(PgStat_Kind kind, Oid dboid, Oid objoid, bool *created_entry); +extern PgStat_EntryRef *pgstat_fetch_pending_entry(PgStat_Kind kind, Oid dboid, Oid objoid); + +extern void *pgstat_fetch_entry(PgStat_Kind kind, Oid dboid, Oid objoid); +extern void pgstat_snapshot_fixed(PgStat_Kind kind); + + +/* + * Functions in pgstat_archiver.c + */ + +extern void pgstat_archiver_reset_all_cb(TimestampTz ts); +extern void pgstat_archiver_snapshot_cb(void); + + +/* + * Functions in pgstat_bgwriter.c + */ + +extern void pgstat_bgwriter_reset_all_cb(TimestampTz ts); +extern void pgstat_bgwriter_snapshot_cb(void); + + +/* + * Functions in pgstat_checkpointer.c + */ + +extern void pgstat_checkpointer_reset_all_cb(TimestampTz ts); +extern void pgstat_checkpointer_snapshot_cb(void); + /* * Functions in pgstat_database.c */ -extern void AtEOXact_PgStat_Database(bool isCommit, bool parallel); extern void pgstat_report_disconnect(Oid dboid); -extern void pgstat_update_dbstats(PgStat_MsgTabstat *tsmsg, TimestampTz now); +extern void pgstat_update_dbstats(TimestampTz ts); +extern void AtEOXact_PgStat_Database(bool isCommit, bool parallel); + +extern PgStat_StatDBEntry *pgstat_prep_database_pending(Oid dboid); +extern void pgstat_reset_database_timestamp(Oid dboid, TimestampTz ts); +extern bool pgstat_database_flush_cb(PgStat_EntryRef *entry_ref, bool nowait); +extern void pgstat_database_reset_timestamp_cb(PgStatShared_Common *header, TimestampTz ts); /* * Functions in pgstat_function.c */ -extern void pgstat_send_funcstats(void); +extern bool pgstat_function_flush_cb(PgStat_EntryRef *entry_ref, bool nowait); /* @@ -120,23 +557,73 @@ extern void AtEOXact_PgStat_Relations(PgStat_SubXactStatus *xact_state, bool isC extern void AtEOSubXact_PgStat_Relations(PgStat_SubXactStatus *xact_state, bool isCommit, int nestDepth); extern void AtPrepare_PgStat_Relations(PgStat_SubXactStatus *xact_state); extern void PostPrepare_PgStat_Relations(PgStat_SubXactStatus *xact_state); -extern void pgstat_send_tabstats(TimestampTz now, bool disconnect); + +extern bool pgstat_relation_flush_cb(PgStat_EntryRef *entry_ref, bool nowait); +extern void pgstat_relation_delete_pending_cb(PgStat_EntryRef *entry_ref); + + +/* + * Functions in pgstat_replslot.c + */ + +extern void pgstat_replslot_reset_timestamp_cb(PgStatShared_Common *header, TimestampTz ts); +extern void pgstat_replslot_to_serialized_name_cb(const PgStatShared_Common *tmp, NameData *name); +extern bool pgstat_replslot_from_serialized_name_cb(const NameData *name, PgStat_HashKey *key); + + +/* + * Functions in pgstat_shmem.c + */ + +extern void pgstat_attach_shmem(void); +extern void pgstat_detach_shmem(void); + +extern PgStat_EntryRef *pgstat_get_entry_ref(PgStat_Kind kind, Oid dboid, Oid objoid, + bool create, bool *found); +extern bool pgstat_lock_entry(PgStat_EntryRef *entry_ref, bool nowait); +extern void pgstat_unlock_entry(PgStat_EntryRef *entry_ref); +extern bool pgstat_drop_entry(PgStat_Kind kind, Oid dboid, Oid objoid); +extern void pgstat_drop_all_entries(void); +extern PgStat_EntryRef *pgstat_get_entry_ref_locked(PgStat_Kind kind, Oid dboid, Oid objoid, + bool nowait); +extern void pgstat_reset_entry(PgStat_Kind kind, Oid dboid, Oid objoid, TimestampTz ts); +extern void pgstat_reset_entries_of_kind(PgStat_Kind kind, TimestampTz ts); +extern void pgstat_reset_matching_entries(bool (*do_reset) (PgStatShared_HashEntry *, Datum), + Datum match_data, + TimestampTz ts); + +extern void pgstat_request_entry_refs_gc(void); +extern PgStatShared_Common *pgstat_init_entry(PgStat_Kind kind, + PgStatShared_HashEntry *shhashent); /* * Functions in pgstat_slru.c */ -extern void pgstat_send_slru(void); +extern bool pgstat_slru_flush(bool nowait); +extern void pgstat_slru_reset_all_cb(TimestampTz ts); +extern void pgstat_slru_snapshot_cb(void); /* * Functions in pgstat_wal.c */ +extern bool pgstat_flush_wal(bool nowait); extern void pgstat_init_wal(void); extern bool pgstat_have_pending_wal(void); +extern void pgstat_wal_reset_all_cb(TimestampTz ts); +extern void pgstat_wal_snapshot_cb(void); + + +/* + * Functions in pgstat_subscription.c + */ + +extern bool pgstat_subscription_flush_cb(PgStat_EntryRef *entry_ref, bool nowait); +extern void pgstat_subscription_reset_timestamp_cb(PgStatShared_Common *header, TimestampTz ts); /* * Functions in pgstat_xact.c @@ -151,29 +638,145 @@ extern void pgstat_create_transactional(PgStat_Kind kind, Oid dboid, Oid objoid) * Variables in pgstat.c */ -extern pgsocket pgStatSock; +extern PgStat_LocalState pgStatLocal; /* - * Variables in pgstat_database.c + * Variables in pgstat_slru.c */ -extern int pgStatXactCommit; -extern int pgStatXactRollback; +extern bool have_slrustats; /* - * Variables in pgstat_functions.c + * Implementation of inline functions declared above. + */ + +/* + * Helpers for changecount manipulation. See comments around struct + * PgBackendStatus for details. */ -extern bool have_function_stats; +static inline void +pgstat_begin_changecount_write(uint32 *cc) +{ + Assert((*cc & 1) == 0); + + START_CRIT_SECTION(); + (*cc)++; + pg_write_barrier(); +} + +static inline void +pgstat_end_changecount_write(uint32 *cc) +{ + Assert((*cc & 1) == 1); + + pg_write_barrier(); + + (*cc)++; + + END_CRIT_SECTION(); +} + +static inline uint32 +pgstat_begin_changecount_read(uint32 *cc) +{ + uint32 before_cc = *cc; + + CHECK_FOR_INTERRUPTS(); + pg_read_barrier(); + + return before_cc; +} /* - * Variables in pgstat_relation.c + * Returns true if the read succeeded, false if it needs to be repeated. */ +static inline bool +pgstat_end_changecount_read(uint32 *cc, uint32 before_cc) +{ + uint32 after_cc; + + pg_read_barrier(); + + after_cc = *cc; + + /* was a write in progress when we started? */ + if (before_cc & 1) + return false; + + /* did writes start and complete while we read? */ + return before_cc == after_cc; +} + + +/* + * helper function for PgStat_KindInfo->snapshot_cb + * PgStat_KindInfo->reset_all_cb callbacks. + * + * Copies out the specified memory area following change-count protocol. + */ +static inline void +pgstat_copy_changecounted_stats(void *dst, void *src, size_t len, + uint32 *cc) +{ + uint32 cc_before; + + do + { + cc_before = pgstat_begin_changecount_read(cc); + + memcpy(dst, src, len); + } + while (!pgstat_end_changecount_read(cc, cc_before)); +} + +/* helpers for dshash / simplehash hashtables */ +static inline int +pgstat_cmp_hash_key(const void *a, const void *b, size_t size, void *arg) +{ + AssertArg(size == sizeof(PgStat_HashKey) && arg == NULL); + return memcmp(a, b, sizeof(PgStat_HashKey)); +} + +static inline uint32 +pgstat_hash_hash_key(const void *d, size_t size, void *arg) +{ + const PgStat_HashKey *key = (PgStat_HashKey *) d; + uint32 hash; + + AssertArg(size == sizeof(PgStat_HashKey) && arg == NULL); + + hash = murmurhash32(key->kind); + hash = hash_combine(hash, murmurhash32(key->dboid)); + hash = hash_combine(hash, murmurhash32(key->objoid)); + + return hash; +} + +/* + * The length of the data portion of a shared memory stats entry (i.e. without + * transient data such as refcounts, lwlocks, ...). + */ +static inline size_t +pgstat_get_entry_len(PgStat_Kind kind) +{ + return pgstat_get_kind_info(kind)->shared_data_len; +} + +/* + * Returns a pointer to the data portion of a shared memory stats entry. + */ +static inline void * +pgstat_get_entry_data(PgStat_Kind kind, PgStatShared_Common *entry) +{ + size_t off = pgstat_get_kind_info(kind)->shared_data_off; -extern bool have_relation_stats; + Assert(off != 0 && off < PG_UINT32_MAX); + return ((char *) (entry)) + off; +} #endif /* PGSTAT_INTERNAL_H */ diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index 121dbbc9a96..eadbd009045 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -246,6 +246,7 @@ typedef struct RelationData */ Oid rd_toastoid; /* Real TOAST table's OID, or InvalidOid */ + bool pgstat_enabled; /* should relation stats be counted */ /* use "struct" here to avoid needing to include pgstat.h: */ struct PgStat_TableStatus *pgstat_info; /* statistics collection area */ } RelationData; diff --git a/src/include/utils/timeout.h b/src/include/utils/timeout.h index 099f91c61da..c068986d09a 100644 --- a/src/include/utils/timeout.h +++ b/src/include/utils/timeout.h @@ -32,6 +32,7 @@ typedef enum TimeoutId STANDBY_LOCK_TIMEOUT, IDLE_IN_TRANSACTION_SESSION_TIMEOUT, IDLE_SESSION_TIMEOUT, + IDLE_STATS_UPDATE_TIMEOUT, CLIENT_CONNECTION_CHECK_TIMEOUT, STARTUP_PROGRESS_TIMEOUT, /* First user-definable timeout reason */ diff --git a/src/include/utils/wait_event.h b/src/include/utils/wait_event.h index d870c592632..b578e2ec757 100644 --- a/src/include/utils/wait_event.h +++ b/src/include/utils/wait_event.h @@ -42,7 +42,6 @@ typedef enum WAIT_EVENT_CHECKPOINTER_MAIN, WAIT_EVENT_LOGICAL_APPLY_MAIN, WAIT_EVENT_LOGICAL_LAUNCHER_MAIN, - WAIT_EVENT_PGSTAT_MAIN, WAIT_EVENT_RECOVERY_WAL_STREAM, WAIT_EVENT_SYSLOGGER_MAIN, WAIT_EVENT_WAL_RECEIVER_MAIN, |