summaryrefslogtreecommitdiff
path: root/src/include
diff options
context:
space:
mode:
Diffstat (limited to 'src/include')
-rw-r--r--src/include/miscadmin.h2
-rw-r--r--src/include/pgstat.h679
-rw-r--r--src/include/storage/lwlock.h3
-rw-r--r--src/include/utils/pgstat_internal.h663
-rw-r--r--src/include/utils/rel.h1
-rw-r--r--src/include/utils/timeout.h1
-rw-r--r--src/include/utils/wait_event.h1
7 files changed, 702 insertions, 648 deletions
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index 9321d7f264b..66c404c666d 100644
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -94,6 +94,7 @@ extern PGDLLIMPORT volatile sig_atomic_t IdleInTransactionSessionTimeoutPending;
extern PGDLLIMPORT volatile sig_atomic_t IdleSessionTimeoutPending;
extern PGDLLIMPORT volatile sig_atomic_t ProcSignalBarrierPending;
extern PGDLLIMPORT volatile sig_atomic_t LogMemoryContextPending;
+extern PGDLLIMPORT volatile sig_atomic_t IdleStatsUpdateTimeoutPending;
extern PGDLLIMPORT volatile sig_atomic_t CheckClientConnectionPending;
extern PGDLLIMPORT volatile sig_atomic_t ClientConnectionLost;
@@ -333,7 +334,6 @@ typedef enum BackendType
B_WAL_SENDER,
B_WAL_WRITER,
B_ARCHIVER,
- B_STATS_COLLECTOR,
B_LOGGER,
} BackendType;
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 99115bacde7..1d2d3de86c9 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -14,10 +14,8 @@
#include "datatype/timestamp.h"
#include "portability/instr_time.h"
#include "postmaster/pgarch.h" /* for MAX_XFN_CHARS */
-#include "replication/logicalproto.h"
#include "utils/backend_progress.h" /* for backward compatibility */
#include "utils/backend_status.h" /* for backward compatibility */
-#include "utils/hsearch.h"
#include "utils/relcache.h"
#include "utils/wait_event.h" /* for backward compatibility */
@@ -27,8 +25,8 @@
* ----------
*/
#define PGSTAT_STAT_PERMANENT_DIRECTORY "pg_stat"
-#define PGSTAT_STAT_PERMANENT_FILENAME "pg_stat/global.stat"
-#define PGSTAT_STAT_PERMANENT_TMPFILE "pg_stat/global.tmp"
+#define PGSTAT_STAT_PERMANENT_FILENAME "pg_stat/pgstat.stat"
+#define PGSTAT_STAT_PERMANENT_TMPFILE "pg_stat/pgstat.tmp"
/* Default directory to store temporary statistics data in */
#define PG_STAT_TMP_DIR "pg_stat_tmp"
@@ -66,6 +64,13 @@ typedef enum TrackFunctionsLevel
TRACK_FUNC_ALL
} TrackFunctionsLevel;
+typedef enum PgStat_FetchConsistency
+{
+ PGSTAT_FETCH_CONSISTENCY_NONE,
+ PGSTAT_FETCH_CONSISTENCY_CACHE,
+ PGSTAT_FETCH_CONSISTENCY_SNAPSHOT,
+} PgStat_FetchConsistency;
+
/* Values to track the cause of session termination */
typedef enum SessionEndType
{
@@ -92,7 +97,7 @@ typedef int64 PgStat_Counter;
* PgStat_FunctionCounts The actual per-function counts kept by a backend
*
* This struct should contain only actual event counters, because we memcmp
- * it against zeroes to detect whether there are any counts to transmit.
+ * it against zeroes to detect whether there are any pending stats.
*
* Note that the time counters are in instr_time format here. We convert to
* microseconds in PgStat_Counter format when flushing out pending statistics.
@@ -106,12 +111,11 @@ typedef struct PgStat_FunctionCounts
} PgStat_FunctionCounts;
/* ----------
- * PgStat_BackendFunctionEntry Entry in backend's per-function hash table
+ * PgStat_BackendFunctionEntry Non-flushed function stats.
* ----------
*/
typedef struct PgStat_BackendFunctionEntry
{
- Oid f_id;
PgStat_FunctionCounts f_counts;
} PgStat_BackendFunctionEntry;
@@ -132,12 +136,21 @@ typedef struct PgStat_FunctionCallUsage
} PgStat_FunctionCallUsage;
/* ----------
+ * PgStat_BackendSubEntry Non-flushed subscription stats.
+ * ----------
+ */
+typedef struct PgStat_BackendSubEntry
+{
+ PgStat_Counter apply_error_count;
+ PgStat_Counter sync_error_count;
+} PgStat_BackendSubEntry;
+
+/* ----------
* PgStat_TableCounts The actual per-table counts kept by a backend
*
* This struct should contain only actual event counters, because we memcmp
- * it against zeroes to detect whether there are any counts to transmit.
- * It is a component of PgStat_TableStatus (within-backend state) and
- * PgStat_TableEntry (the transmitted message format).
+ * it against zeroes to detect whether there are any stats updates to apply.
+ * It is a component of PgStat_TableStatus (within-backend state).
*
* Note: for a table, tuples_returned is the number of tuples successfully
* fetched by heap_getnext, while tuples_fetched is the number of tuples
@@ -194,6 +207,7 @@ typedef struct PgStat_TableStatus
bool t_shared; /* is it a shared catalog? */
struct PgStat_TableXactStatus *trans; /* lowest subxact's counts */
PgStat_TableCounts t_counts; /* event counts to be sent */
+ Relation relation; /* rel that is using this entry */
} PgStat_TableStatus;
/* ----------
@@ -221,569 +235,14 @@ typedef struct PgStat_TableXactStatus
/* ------------------------------------------------------------
- * Message formats follow
- * ------------------------------------------------------------
- */
-
-/* ----------
- * The types of backend -> collector messages
- * ----------
- */
-typedef enum StatMsgType
-{
- PGSTAT_MTYPE_DUMMY,
- PGSTAT_MTYPE_INQUIRY,
- PGSTAT_MTYPE_TABSTAT,
- PGSTAT_MTYPE_TABPURGE,
- PGSTAT_MTYPE_DROPDB,
- PGSTAT_MTYPE_RESETCOUNTER,
- PGSTAT_MTYPE_RESETSHAREDCOUNTER,
- PGSTAT_MTYPE_RESETSINGLECOUNTER,
- PGSTAT_MTYPE_RESETSLRUCOUNTER,
- PGSTAT_MTYPE_RESETREPLSLOTCOUNTER,
- PGSTAT_MTYPE_RESETSUBCOUNTER,
- PGSTAT_MTYPE_AUTOVAC_START,
- PGSTAT_MTYPE_VACUUM,
- PGSTAT_MTYPE_ANALYZE,
- PGSTAT_MTYPE_ARCHIVER,
- PGSTAT_MTYPE_BGWRITER,
- PGSTAT_MTYPE_CHECKPOINTER,
- PGSTAT_MTYPE_WAL,
- PGSTAT_MTYPE_SLRU,
- PGSTAT_MTYPE_FUNCSTAT,
- PGSTAT_MTYPE_FUNCPURGE,
- PGSTAT_MTYPE_RECOVERYCONFLICT,
- PGSTAT_MTYPE_TEMPFILE,
- PGSTAT_MTYPE_DEADLOCK,
- PGSTAT_MTYPE_CHECKSUMFAILURE,
- PGSTAT_MTYPE_REPLSLOT,
- PGSTAT_MTYPE_CONNECT,
- PGSTAT_MTYPE_DISCONNECT,
- PGSTAT_MTYPE_SUBSCRIPTIONDROP,
- PGSTAT_MTYPE_SUBSCRIPTIONERROR,
-} StatMsgType;
-
-/* ----------
- * PgStat_MsgHdr The common message header
- * ----------
- */
-typedef struct PgStat_MsgHdr
-{
- StatMsgType m_type;
- int m_size;
-} PgStat_MsgHdr;
-
-/* ----------
- * Space available in a message. This will keep the UDP packets below 1K,
- * which should fit unfragmented into the MTU of the loopback interface.
- * (Larger values of PGSTAT_MAX_MSG_SIZE would work for that on most
- * platforms, but we're being conservative here.)
- * ----------
- */
-#define PGSTAT_MAX_MSG_SIZE 1000
-#define PGSTAT_MSG_PAYLOAD (PGSTAT_MAX_MSG_SIZE - sizeof(PgStat_MsgHdr))
-
-
-/* ----------
- * PgStat_MsgDummy A dummy message, ignored by the collector
- * ----------
- */
-typedef struct PgStat_MsgDummy
-{
- PgStat_MsgHdr m_hdr;
-} PgStat_MsgDummy;
-
-/* ----------
- * PgStat_MsgInquiry Sent by a backend to ask the collector
- * to write the stats file(s).
- *
- * Ordinarily, an inquiry message prompts writing of the global stats file,
- * the stats file for shared catalogs, and the stats file for the specified
- * database. If databaseid is InvalidOid, only the first two are written.
- *
- * New file(s) will be written only if the existing file has a timestamp
- * older than the specified cutoff_time; this prevents duplicated effort
- * when multiple requests arrive at nearly the same time, assuming that
- * backends send requests with cutoff_times a little bit in the past.
- *
- * clock_time should be the requestor's current local time; the collector
- * uses this to check for the system clock going backward, but it has no
- * effect unless that occurs. We assume clock_time >= cutoff_time, though.
- * ----------
- */
-typedef struct PgStat_MsgInquiry
-{
- PgStat_MsgHdr m_hdr;
- TimestampTz clock_time; /* observed local clock time */
- TimestampTz cutoff_time; /* minimum acceptable file timestamp */
- Oid databaseid; /* requested DB (InvalidOid => shared only) */
-} PgStat_MsgInquiry;
-
-/* ----------
- * PgStat_TableEntry Per-table info in a MsgTabstat
- * ----------
- */
-typedef struct PgStat_TableEntry
-{
- Oid t_id;
- PgStat_TableCounts t_counts;
-} PgStat_TableEntry;
-
-/* ----------
- * PgStat_MsgTabstat Sent by the backend to report table
- * and buffer access statistics.
- * ----------
- */
-#define PGSTAT_NUM_TABENTRIES \
- ((PGSTAT_MSG_PAYLOAD - sizeof(Oid) - 3 * sizeof(int) - 5 * sizeof(PgStat_Counter)) \
- / sizeof(PgStat_TableEntry))
-
-typedef struct PgStat_MsgTabstat
-{
- PgStat_MsgHdr m_hdr;
- Oid m_databaseid;
- int m_nentries;
- int m_xact_commit;
- int m_xact_rollback;
- PgStat_Counter m_block_read_time; /* times in microseconds */
- PgStat_Counter m_block_write_time;
- PgStat_Counter m_session_time;
- PgStat_Counter m_active_time;
- PgStat_Counter m_idle_in_xact_time;
- PgStat_TableEntry m_entry[PGSTAT_NUM_TABENTRIES];
-} PgStat_MsgTabstat;
-
-/* ----------
- * PgStat_MsgTabpurge Sent by the backend to tell the collector
- * about dead tables.
- * ----------
- */
-#define PGSTAT_NUM_TABPURGE \
- ((PGSTAT_MSG_PAYLOAD - sizeof(Oid) - sizeof(int)) \
- / sizeof(Oid))
-
-typedef struct PgStat_MsgTabpurge
-{
- PgStat_MsgHdr m_hdr;
- Oid m_databaseid;
- int m_nentries;
- Oid m_tableid[PGSTAT_NUM_TABPURGE];
-} PgStat_MsgTabpurge;
-
-/* ----------
- * PgStat_MsgDropdb Sent by the backend to tell the collector
- * about a dropped database
- * ----------
- */
-typedef struct PgStat_MsgDropdb
-{
- PgStat_MsgHdr m_hdr;
- Oid m_databaseid;
-} PgStat_MsgDropdb;
-
-/* ----------
- * PgStat_MsgResetcounter Sent by the backend to tell the collector
- * to reset counters
- * ----------
- */
-typedef struct PgStat_MsgResetcounter
-{
- PgStat_MsgHdr m_hdr;
- Oid m_databaseid;
-} PgStat_MsgResetcounter;
-
-/* ----------
- * PgStat_MsgResetsharedcounter Sent by the backend to tell the collector
- * to reset a shared counter
- * ----------
- */
-typedef struct PgStat_MsgResetsharedcounter
-{
- PgStat_MsgHdr m_hdr;
- PgStat_Kind m_resettarget;
-} PgStat_MsgResetsharedcounter;
-
-/* ----------
- * PgStat_MsgResetsinglecounter Sent by the backend to tell the collector
- * to reset a single counter
- * ----------
- */
-typedef struct PgStat_MsgResetsinglecounter
-{
- PgStat_MsgHdr m_hdr;
- Oid m_databaseid;
- PgStat_Kind m_resettype;
- Oid m_objectid;
-} PgStat_MsgResetsinglecounter;
-
-/* ----------
- * PgStat_MsgResetslrucounter Sent by the backend to tell the collector
- * to reset a SLRU counter
- * ----------
- */
-typedef struct PgStat_MsgResetslrucounter
-{
- PgStat_MsgHdr m_hdr;
- int m_index;
-} PgStat_MsgResetslrucounter;
-
-/* ----------
- * PgStat_MsgResetreplslotcounter Sent by the backend to tell the collector
- * to reset replication slot counter(s)
- * ----------
- */
-typedef struct PgStat_MsgResetreplslotcounter
-{
- PgStat_MsgHdr m_hdr;
- NameData m_slotname;
- bool clearall;
-} PgStat_MsgResetreplslotcounter;
-
-/* ----------
- * PgStat_MsgResetsubcounter Sent by the backend to tell the collector
- * to reset subscription counter(s)
- * ----------
- */
-typedef struct PgStat_MsgResetsubcounter
-{
- PgStat_MsgHdr m_hdr;
- Oid m_subid; /* InvalidOid means reset all subscription
- * stats */
-} PgStat_MsgResetsubcounter;
-
-/* ----------
- * PgStat_MsgAutovacStart Sent by the autovacuum daemon to signal
- * that a database is going to be processed
- * ----------
- */
-typedef struct PgStat_MsgAutovacStart
-{
- PgStat_MsgHdr m_hdr;
- Oid m_databaseid;
- TimestampTz m_start_time;
-} PgStat_MsgAutovacStart;
-
-/* ----------
- * PgStat_MsgVacuum Sent by the backend or autovacuum daemon
- * after VACUUM
- * ----------
- */
-typedef struct PgStat_MsgVacuum
-{
- PgStat_MsgHdr m_hdr;
- Oid m_databaseid;
- Oid m_tableoid;
- bool m_autovacuum;
- TimestampTz m_vacuumtime;
- PgStat_Counter m_live_tuples;
- PgStat_Counter m_dead_tuples;
-} PgStat_MsgVacuum;
-
-/* ----------
- * PgStat_MsgAnalyze Sent by the backend or autovacuum daemon
- * after ANALYZE
- * ----------
- */
-typedef struct PgStat_MsgAnalyze
-{
- PgStat_MsgHdr m_hdr;
- Oid m_databaseid;
- Oid m_tableoid;
- bool m_autovacuum;
- bool m_resetcounter;
- TimestampTz m_analyzetime;
- PgStat_Counter m_live_tuples;
- PgStat_Counter m_dead_tuples;
-} PgStat_MsgAnalyze;
-
-/* ----------
- * PgStat_MsgArchiver Sent by the archiver to update statistics.
- * ----------
- */
-typedef struct PgStat_MsgArchiver
-{
- PgStat_MsgHdr m_hdr;
- bool m_failed; /* Failed attempt */
- char m_xlog[MAX_XFN_CHARS + 1];
- TimestampTz m_timestamp;
-} PgStat_MsgArchiver;
-
-/* ----------
- * PgStat_MsgBgWriter Sent by the bgwriter to update statistics.
- * ----------
- */
-typedef struct PgStat_MsgBgWriter
-{
- PgStat_MsgHdr m_hdr;
-
- PgStat_Counter m_buf_written_clean;
- PgStat_Counter m_maxwritten_clean;
- PgStat_Counter m_buf_alloc;
-} PgStat_MsgBgWriter;
-
-/* ----------
- * PgStat_MsgCheckpointer Sent by the checkpointer to update statistics.
- * ----------
- */
-typedef struct PgStat_MsgCheckpointer
-{
- PgStat_MsgHdr m_hdr;
-
- PgStat_Counter m_timed_checkpoints;
- PgStat_Counter m_requested_checkpoints;
- PgStat_Counter m_buf_written_checkpoints;
- PgStat_Counter m_buf_written_backend;
- PgStat_Counter m_buf_fsync_backend;
- PgStat_Counter m_checkpoint_write_time; /* times in milliseconds */
- PgStat_Counter m_checkpoint_sync_time;
-} PgStat_MsgCheckpointer;
-
-/* ----------
- * PgStat_MsgWal Sent by backends and background processes to update WAL statistics.
- * ----------
- */
-typedef struct PgStat_MsgWal
-{
- PgStat_MsgHdr m_hdr;
- PgStat_Counter m_wal_records;
- PgStat_Counter m_wal_fpi;
- uint64 m_wal_bytes;
- PgStat_Counter m_wal_buffers_full;
- PgStat_Counter m_wal_write;
- PgStat_Counter m_wal_sync;
- PgStat_Counter m_wal_write_time; /* time spent writing wal records in
- * microseconds */
- PgStat_Counter m_wal_sync_time; /* time spent syncing wal records in
- * microseconds */
-} PgStat_MsgWal;
-
-/* ----------
- * PgStat_MsgSLRU Sent by a backend to update SLRU statistics.
- * ----------
- */
-typedef struct PgStat_MsgSLRU
-{
- PgStat_MsgHdr m_hdr;
- PgStat_Counter m_index;
- PgStat_Counter m_blocks_zeroed;
- PgStat_Counter m_blocks_hit;
- PgStat_Counter m_blocks_read;
- PgStat_Counter m_blocks_written;
- PgStat_Counter m_blocks_exists;
- PgStat_Counter m_flush;
- PgStat_Counter m_truncate;
-} PgStat_MsgSLRU;
-
-/* ----------
- * PgStat_MsgReplSlot Sent by a backend or a wal sender to update replication
- * slot statistics.
- * ----------
- */
-typedef struct PgStat_MsgReplSlot
-{
- PgStat_MsgHdr m_hdr;
- NameData m_slotname;
- bool m_create;
- bool m_drop;
- PgStat_Counter m_spill_txns;
- PgStat_Counter m_spill_count;
- PgStat_Counter m_spill_bytes;
- PgStat_Counter m_stream_txns;
- PgStat_Counter m_stream_count;
- PgStat_Counter m_stream_bytes;
- PgStat_Counter m_total_txns;
- PgStat_Counter m_total_bytes;
-} PgStat_MsgReplSlot;
-
-/* ----------
- * PgStat_MsgSubscriptionDrop Sent by the backend and autovacuum to tell the
- * collector about the dead subscription.
- * ----------
- */
-typedef struct PgStat_MsgSubscriptionDrop
-{
- PgStat_MsgHdr m_hdr;
- Oid m_subid;
-} PgStat_MsgSubscriptionDrop;
-
-/* ----------
- * PgStat_MsgSubscriptionError Sent by the apply worker or the table sync
- * worker to report an error on the subscription.
- * ----------
- */
-typedef struct PgStat_MsgSubscriptionError
-{
- PgStat_MsgHdr m_hdr;
-
- Oid m_subid;
- bool m_is_apply_error;
-} PgStat_MsgSubscriptionError;
-
-/* ----------
- * PgStat_MsgRecoveryConflict Sent by the backend upon recovery conflict
- * ----------
- */
-typedef struct PgStat_MsgRecoveryConflict
-{
- PgStat_MsgHdr m_hdr;
-
- Oid m_databaseid;
- int m_reason;
-} PgStat_MsgRecoveryConflict;
-
-/* ----------
- * PgStat_MsgTempFile Sent by the backend upon creating a temp file
- * ----------
- */
-typedef struct PgStat_MsgTempFile
-{
- PgStat_MsgHdr m_hdr;
-
- Oid m_databaseid;
- size_t m_filesize;
-} PgStat_MsgTempFile;
-
-/* ----------
- * PgStat_FunctionEntry Per-function info in a MsgFuncstat
- * ----------
- */
-typedef struct PgStat_FunctionEntry
-{
- Oid f_id;
- PgStat_Counter f_numcalls;
- PgStat_Counter f_total_time; /* times in microseconds */
- PgStat_Counter f_self_time;
-} PgStat_FunctionEntry;
-
-/* ----------
- * PgStat_MsgFuncstat Sent by the backend to report function
- * usage statistics.
- * ----------
- */
-#define PGSTAT_NUM_FUNCENTRIES \
- ((PGSTAT_MSG_PAYLOAD - sizeof(Oid) - sizeof(int)) \
- / sizeof(PgStat_FunctionEntry))
-
-typedef struct PgStat_MsgFuncstat
-{
- PgStat_MsgHdr m_hdr;
- Oid m_databaseid;
- int m_nentries;
- PgStat_FunctionEntry m_entry[PGSTAT_NUM_FUNCENTRIES];
-} PgStat_MsgFuncstat;
-
-/* ----------
- * PgStat_MsgFuncpurge Sent by the backend to tell the collector
- * about dead functions.
- * ----------
- */
-#define PGSTAT_NUM_FUNCPURGE \
- ((PGSTAT_MSG_PAYLOAD - sizeof(Oid) - sizeof(int)) \
- / sizeof(Oid))
-
-typedef struct PgStat_MsgFuncpurge
-{
- PgStat_MsgHdr m_hdr;
- Oid m_databaseid;
- int m_nentries;
- Oid m_functionid[PGSTAT_NUM_FUNCPURGE];
-} PgStat_MsgFuncpurge;
-
-/* ----------
- * PgStat_MsgDeadlock Sent by the backend to tell the collector
- * about a deadlock that occurred.
- * ----------
- */
-typedef struct PgStat_MsgDeadlock
-{
- PgStat_MsgHdr m_hdr;
- Oid m_databaseid;
-} PgStat_MsgDeadlock;
-
-/* ----------
- * PgStat_MsgChecksumFailure Sent by the backend to tell the collector
- * about checksum failures noticed.
- * ----------
- */
-typedef struct PgStat_MsgChecksumFailure
-{
- PgStat_MsgHdr m_hdr;
- Oid m_databaseid;
- int m_failurecount;
- TimestampTz m_failure_time;
-} PgStat_MsgChecksumFailure;
-
-/* ----------
- * PgStat_MsgConnect Sent by the backend upon connection
- * establishment
- * ----------
- */
-typedef struct PgStat_MsgConnect
-{
- PgStat_MsgHdr m_hdr;
- Oid m_databaseid;
-} PgStat_MsgConnect;
-
-/* ----------
- * PgStat_MsgDisconnect Sent by the backend when disconnecting
- * ----------
- */
-typedef struct PgStat_MsgDisconnect
-{
- PgStat_MsgHdr m_hdr;
- Oid m_databaseid;
- SessionEndType m_cause;
-} PgStat_MsgDisconnect;
-
-/* ----------
- * PgStat_Msg Union over all possible messages.
- * ----------
- */
-typedef union PgStat_Msg
-{
- PgStat_MsgHdr msg_hdr;
- PgStat_MsgDummy msg_dummy;
- PgStat_MsgInquiry msg_inquiry;
- PgStat_MsgTabstat msg_tabstat;
- PgStat_MsgTabpurge msg_tabpurge;
- PgStat_MsgDropdb msg_dropdb;
- PgStat_MsgResetcounter msg_resetcounter;
- PgStat_MsgResetsharedcounter msg_resetsharedcounter;
- PgStat_MsgResetsinglecounter msg_resetsinglecounter;
- PgStat_MsgResetslrucounter msg_resetslrucounter;
- PgStat_MsgResetreplslotcounter msg_resetreplslotcounter;
- PgStat_MsgResetsubcounter msg_resetsubcounter;
- PgStat_MsgAutovacStart msg_autovacuum_start;
- PgStat_MsgVacuum msg_vacuum;
- PgStat_MsgAnalyze msg_analyze;
- PgStat_MsgArchiver msg_archiver;
- PgStat_MsgBgWriter msg_bgwriter;
- PgStat_MsgCheckpointer msg_checkpointer;
- PgStat_MsgWal msg_wal;
- PgStat_MsgSLRU msg_slru;
- PgStat_MsgFuncstat msg_funcstat;
- PgStat_MsgFuncpurge msg_funcpurge;
- PgStat_MsgRecoveryConflict msg_recoveryconflict;
- PgStat_MsgDeadlock msg_deadlock;
- PgStat_MsgTempFile msg_tempfile;
- PgStat_MsgChecksumFailure msg_checksumfailure;
- PgStat_MsgReplSlot msg_replslot;
- PgStat_MsgConnect msg_connect;
- PgStat_MsgDisconnect msg_disconnect;
- PgStat_MsgSubscriptionError msg_subscriptionerror;
- PgStat_MsgSubscriptionDrop msg_subscriptiondrop;
-} PgStat_Msg;
-
-
-/* ------------------------------------------------------------
- * Statistic collector data structures follow
+ * Data structures on disk and in shared memory follow
*
* PGSTAT_FILE_FORMAT_ID should be changed whenever any of these
* data structures change.
* ------------------------------------------------------------
*/
-#define PGSTAT_FILE_FORMAT_ID 0x01A5BCA6
+#define PGSTAT_FILE_FORMAT_ID 0x01A5BCA7
typedef struct PgStat_ArchiverStats
{
@@ -808,7 +267,6 @@ typedef struct PgStat_BgWriterStats
typedef struct PgStat_CheckpointerStats
{
- TimestampTz stats_timestamp; /* time of stats file update */
PgStat_Counter timed_checkpoints;
PgStat_Counter requested_checkpoints;
PgStat_Counter checkpoint_write_time; /* times in milliseconds */
@@ -820,7 +278,6 @@ typedef struct PgStat_CheckpointerStats
typedef struct PgStat_StatDBEntry
{
- Oid databaseid;
PgStat_Counter n_xact_commit;
PgStat_Counter n_xact_rollback;
PgStat_Counter n_blocks_fetched;
@@ -852,34 +309,16 @@ typedef struct PgStat_StatDBEntry
PgStat_Counter n_sessions_killed;
TimestampTz stat_reset_timestamp;
- TimestampTz stats_timestamp; /* time of db stats file update */
-
- /*
- * tables and functions must be last in the struct, because we don't write
- * the pointers out to the stats file.
- */
- HTAB *tables;
- HTAB *functions;
} PgStat_StatDBEntry;
typedef struct PgStat_StatFuncEntry
{
- Oid functionid;
-
PgStat_Counter f_numcalls;
PgStat_Counter f_total_time; /* times in microseconds */
PgStat_Counter f_self_time;
} PgStat_StatFuncEntry;
-typedef struct PgStat_GlobalStats
-{
- TimestampTz stats_timestamp; /* time of stats file update */
-
- PgStat_CheckpointerStats checkpointer;
- PgStat_BgWriterStats bgwriter;
-} PgStat_GlobalStats;
-
typedef struct PgStat_StatReplSlotEntry
{
NameData slotname;
@@ -908,8 +347,6 @@ typedef struct PgStat_SLRUStats
typedef struct PgStat_StatSubEntry
{
- Oid subid; /* hash key (must be first) */
-
PgStat_Counter apply_error_count;
PgStat_Counter sync_error_count;
TimestampTz stat_reset_timestamp;
@@ -917,8 +354,6 @@ typedef struct PgStat_StatSubEntry
typedef struct PgStat_StatTabEntry
{
- Oid tableid;
-
PgStat_Counter numscans;
PgStat_Counter tuples_returned;
@@ -966,22 +401,19 @@ typedef struct PgStat_WalStats
*/
/* functions called from postmaster */
-extern void pgstat_init(void);
-extern void pgstat_reset_all(void);
-extern int pgstat_start(void);
-extern void allow_immediate_pgstat_restart(void);
+extern Size StatsShmemSize(void);
+extern void StatsShmemInit(void);
-#ifdef EXEC_BACKEND
-extern void PgstatCollectorMain(int argc, char *argv[]) pg_attribute_noreturn();
-#endif
+/* Functions called during server startup / shutdown */
+extern void pgstat_restore_stats(void);
+extern void pgstat_discard_stats(void);
+extern void pgstat_before_server_shutdown(int code, Datum arg);
/* Functions for backend initialization */
extern void pgstat_initialize(void);
/* Functions called from backends */
-extern void pgstat_report_stat(bool force);
-extern void pgstat_vacuum_stat(void);
-extern void pgstat_ping(void);
+extern long pgstat_report_stat(bool force);
extern void pgstat_reset_counters(void);
extern void pgstat_reset(PgStat_Kind kind, Oid dboid, Oid objectid);
@@ -989,24 +421,17 @@ extern void pgstat_reset_of_kind(PgStat_Kind kind);
/* stats accessors */
extern void pgstat_clear_snapshot(void);
-extern PgStat_ArchiverStats *pgstat_fetch_stat_archiver(void);
-extern PgStat_BgWriterStats *pgstat_fetch_stat_bgwriter(void);
-extern PgStat_CheckpointerStats *pgstat_fetch_stat_checkpointer(void);
-extern PgStat_StatDBEntry *pgstat_fetch_stat_dbentry(Oid dbid);
-extern PgStat_StatFuncEntry *pgstat_fetch_stat_funcentry(Oid funcid);
-extern PgStat_GlobalStats *pgstat_fetch_global(void);
-extern PgStat_StatReplSlotEntry *pgstat_fetch_replslot(NameData slotname);
-extern PgStat_StatSubEntry *pgstat_fetch_stat_subscription(Oid subid);
-extern PgStat_SLRUStats *pgstat_fetch_slru(void);
-extern PgStat_StatTabEntry *pgstat_fetch_stat_tabentry(Oid relid);
-extern PgStat_WalStats *pgstat_fetch_stat_wal(void);
+extern TimestampTz pgstat_get_stat_snapshot_timestamp(bool *have_snapshot);
+/* helpers */
+extern PgStat_Kind pgstat_get_kind_from_str(char *kind_str);
/*
* Functions in pgstat_archiver.c
*/
extern void pgstat_report_archiver(const char *xlog, bool failed);
+extern PgStat_ArchiverStats *pgstat_fetch_stat_archiver(void);
/*
@@ -1014,6 +439,7 @@ extern void pgstat_report_archiver(const char *xlog, bool failed);
*/
extern void pgstat_report_bgwriter(void);
+extern PgStat_BgWriterStats *pgstat_fetch_stat_bgwriter(void);
/*
@@ -1021,6 +447,7 @@ extern void pgstat_report_bgwriter(void);
*/
extern void pgstat_report_checkpointer(void);
+extern PgStat_CheckpointerStats *pgstat_fetch_stat_checkpointer(void);
/*
@@ -1044,6 +471,7 @@ extern void pgstat_report_connect(Oid dboid);
#define pgstat_count_conn_txn_idle_time(n) \
(pgStatTransactionIdleTime += (n))
+extern PgStat_StatDBEntry *pgstat_fetch_stat_dbentry(Oid dbid);
/*
* Functions in pgstat_function.c
@@ -1058,6 +486,7 @@ extern void pgstat_init_function_usage(struct FunctionCallInfoBaseData *fcinfo,
extern void pgstat_end_function_usage(PgStat_FunctionCallUsage *fcu,
bool finalize);
+extern PgStat_StatFuncEntry *pgstat_fetch_stat_funcentry(Oid funcid);
extern PgStat_BackendFunctionEntry *find_funcstat_entry(Oid func_id);
@@ -1070,6 +499,8 @@ extern void pgstat_drop_relation(Relation rel);
extern void pgstat_copy_relation_stats(Relation dstrel, Relation srcrel);
extern void pgstat_init_relation(Relation rel);
+extern void pgstat_assoc_relation(Relation rel);
+extern void pgstat_unlink_relation(Relation rel);
extern void pgstat_report_vacuum(Oid tableoid, bool shared,
PgStat_Counter livetuples, PgStat_Counter deadtuples);
@@ -1077,8 +508,14 @@ extern void pgstat_report_analyze(Relation rel,
PgStat_Counter livetuples, PgStat_Counter deadtuples,
bool resetcounter);
+/*
+ * If stats are enabled, but pending data hasn't been prepared yet, call
+ * pgstat_assoc_relation() to do so. See its comment for why this is done
+ * separately from pgstat_init_relation().
+ */
#define pgstat_should_count_relation(rel) \
- (likely((rel)->pgstat_info != NULL))
+ (likely((rel)->pgstat_info != NULL) ? true : \
+ ((rel)->pgstat_enabled ? pgstat_assoc_relation(rel), true : false))
/* nontransactional event counts are simple enough to inline */
@@ -1129,6 +566,9 @@ extern void pgstat_twophase_postcommit(TransactionId xid, uint16 info,
extern void pgstat_twophase_postabort(TransactionId xid, uint16 info,
void *recdata, uint32 len);
+extern PgStat_StatTabEntry *pgstat_fetch_stat_tabentry(Oid relid);
+extern PgStat_StatTabEntry *pgstat_fetch_stat_tabentry_ext(bool shared,
+ Oid relid);
extern PgStat_TableStatus *find_tabstat_entry(Oid rel_id);
@@ -1140,7 +580,9 @@ extern void pgstat_reset_replslot(const char *name);
struct ReplicationSlot;
extern void pgstat_report_replslot(struct ReplicationSlot *slot, const PgStat_StatReplSlotEntry *repSlotStat);
extern void pgstat_create_replslot(struct ReplicationSlot *slot);
+extern void pgstat_acquire_replslot(struct ReplicationSlot *slot);
extern void pgstat_drop_replslot(struct ReplicationSlot *slot);
+extern PgStat_StatReplSlotEntry *pgstat_fetch_replslot(NameData slotname);
/*
@@ -1157,6 +599,7 @@ extern void pgstat_count_slru_flush(int slru_idx);
extern void pgstat_count_slru_truncate(int slru_idx);
extern const char *pgstat_get_slru_name(int slru_idx);
extern int pgstat_get_slru_index(const char *name);
+extern PgStat_SLRUStats *pgstat_fetch_slru(void);
/*
@@ -1166,6 +609,7 @@ extern int pgstat_get_slru_index(const char *name);
extern void pgstat_report_subscription_error(Oid subid, bool is_apply_error);
extern void pgstat_create_subscription(Oid subid);
extern void pgstat_drop_subscription(Oid subid);
+extern PgStat_StatSubEntry *pgstat_fetch_stat_subscription(Oid subid);
/*
@@ -1186,6 +630,7 @@ extern void pgstat_execute_transactional_drops(int ndrops, struct xl_xact_stats_
*/
extern void pgstat_report_wal(bool force);
+extern PgStat_WalStats *pgstat_fetch_stat_wal(void);
/*
@@ -1195,6 +640,8 @@ extern void pgstat_report_wal(bool force);
/* GUC parameters */
extern PGDLLIMPORT bool pgstat_track_counts;
extern PGDLLIMPORT int pgstat_track_functions;
+extern PGDLLIMPORT int pgstat_fetch_consistency;
+
extern char *pgstat_stat_directory;
extern char *pgstat_stat_tmpname;
extern char *pgstat_stat_filename;
@@ -1205,7 +652,7 @@ extern char *pgstat_stat_filename;
*/
/* updated directly by bgwriter and bufmgr */
-extern PgStat_MsgBgWriter PendingBgWriterStats;
+extern PgStat_BgWriterStats PendingBgWriterStats;
/*
@@ -1216,7 +663,7 @@ extern PgStat_MsgBgWriter PendingBgWriterStats;
* Checkpointer statistics counters are updated directly by checkpointer and
* bufmgr.
*/
-extern PgStat_MsgCheckpointer PendingCheckpointerStats;
+extern PgStat_CheckpointerStats PendingCheckpointerStats;
/*
@@ -1243,7 +690,7 @@ extern SessionEndType pgStatSessionEndCause;
*/
/* updated directly by backends and background processes */
-extern PgStat_MsgWal WalStats;
+extern PgStat_WalStats PendingWalStats;
#endif /* PGSTAT_H */
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index c3d5889d7b2..33eb4c10339 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -190,6 +190,9 @@ typedef enum BuiltinTrancheIds
LWTRANCHE_SHARED_TIDBITMAP,
LWTRANCHE_PARALLEL_APPEND,
LWTRANCHE_PER_XACT_PREDICATE_LIST,
+ LWTRANCHE_PGSTATS_DSA,
+ LWTRANCHE_PGSTATS_HASH,
+ LWTRANCHE_PGSTATS_DATA,
LWTRANCHE_FIRST_USER_DEFINED
} BuiltinTrancheIds;
diff --git a/src/include/utils/pgstat_internal.h b/src/include/utils/pgstat_internal.h
index c3f83c74c62..ab27bc47c5e 100644
--- a/src/include/utils/pgstat_internal.h
+++ b/src/include/utils/pgstat_internal.h
@@ -14,21 +14,134 @@
#define PGSTAT_INTERNAL_H
+#include "common/hashfn.h"
+#include "lib/dshash.h"
+#include "lib/ilist.h"
#include "pgstat.h"
+#include "storage/lwlock.h"
+#include "utils/dsa.h"
-#define PGSTAT_STAT_INTERVAL 500 /* Minimum time between stats file
- * updates; in milliseconds. */
+/*
+ * Types related to shared memory storage of statistics.
+ *
+ * Per-object statistics are stored in the "shared stats" hashtable. That
+ * table's entries (PgStatShared_HashEntry) contain a pointer to the actual stats
+ * data for the object (the size of the stats data varies depending on the
+ * kind of stats). The table is keyed by PgStat_HashKey.
+ *
+ * Once a backend has a reference to a shared stats entry, it increments the
+ * entry's refcount. Even after stats data is dropped (e.g., due to a DROP
+ * TABLE), the entry itself can only be deleted once all references have been
+ * released.
+ *
+ * These refcounts, in combination with a backend local hashtable
+ * (pgStatEntryRefHash, with entries pointing to PgStat_EntryRef) in front of
+ * the shared hash table, mean that most stats work can happen without
+ * touching the shared hash table, reducing contention.
+ *
+ * Once there are pending stats updates for a table PgStat_EntryRef->pending
+ * is allocated to contain a working space for as-of-yet-unapplied stats
+ * updates. Once the stats are flushed, PgStat_EntryRef->pending is freed.
+ *
+ * Each stat kind in the shared hash table has a fixed member
+ * PgStatShared_Common as the first element.
+ */
-/* ----------
- * The initial size hints for the hash tables used in the collector.
- * ----------
+/* struct for shared statistics hash entry key. */
+typedef struct PgStat_HashKey
+{
+ PgStat_Kind kind; /* statistics entry kind */
+ Oid dboid; /* database ID. InvalidOid for shared objects. */
+ Oid objoid; /* object ID, either table or function. */
+} PgStat_HashKey;
+
+/*
+ * Shared statistics hash entry. Doesn't itself contain any stats, but points
+ * to them (with ->body). That allows the stats entries themselves to be of
+ * variable size.
*/
-#define PGSTAT_DB_HASH_SIZE 16
-#define PGSTAT_TAB_HASH_SIZE 512
-#define PGSTAT_FUNCTION_HASH_SIZE 512
-#define PGSTAT_SUBSCRIPTION_HASH_SIZE 32
-#define PGSTAT_REPLSLOT_HASH_SIZE 32
+typedef struct PgStatShared_HashEntry
+{
+ PgStat_HashKey key; /* hash key */
+
+ /*
+ * If dropped is set, backends need to release their references so that
+ * the memory for the entry can be freed. No new references may be made
+ * once marked as dropped.
+ */
+ bool dropped;
+
+ /*
+ * Refcount managing lifetime of the entry itself (as opposed to the
+ * dshash entry pointing to it). The stats lifetime has to be separate
+ * from the hash table entry lifetime because we allow backends to point
+ * to a stats entry without holding a hash table lock (and some other
+ * reasons).
+ *
+ * As long as the entry is not dropped, 1 is added to the refcount
+ * representing that the entry should not be dropped. In addition each
+ * backend that has a reference to the entry needs to increment the
+ * refcount as long as it does.
+ *
+ * May only be incremented / decremented while holding at least a shared
+ * lock on the dshash partition containing the entry. It needs to be an
+ * atomic variable because multiple backends can increment the refcount
+ * with just a shared lock.
+ *
+ * When the refcount reaches 0 the entry needs to be freed.
+ */
+ pg_atomic_uint32 refcount;
+
+ /*
+ * Pointer to shared stats. The stats entry always starts with
+ * PgStatShared_Common, embedded in a larger struct containing the
+ * PgStat_Kind specific stats fields.
+ */
+ dsa_pointer body;
+} PgStatShared_HashEntry;
+
+/*
+ * Common header struct for PgStatShm_Stat*Entry.
+ */
+typedef struct PgStatShared_Common
+{
+ uint32 magic; /* just a validity cross-check */
+ /* lock protecting stats contents (i.e. data following the header) */
+ LWLock lock;
+} PgStatShared_Common;
+
+/*
+ * A backend local reference to a shared stats entry. As long as at least one
+ * such reference exists, the shared stats entry will not be released.
+ *
+ * If there are pending stats update to the shared stats, these are stored in
+ * ->pending.
+ */
+typedef struct PgStat_EntryRef
+{
+ /*
+ * Pointer to the PgStatShared_HashEntry entry in the shared stats
+ * hashtable.
+ */
+ PgStatShared_HashEntry *shared_entry;
+
+ /*
+ * Pointer to the stats data (i.e. PgStatShared_HashEntry->body), resolved
+ * as a local pointer, to avoid repeated dsa_get_address() calls.
+ */
+ PgStatShared_Common *shared_stats;
+
+ /*
+ * Pending statistics data that will need to be flushed to shared memory
+ * stats eventually. Each stats kind utilizing pending data defines what
+ * format its pending data has and needs to provide a
+ * PgStat_KindInfo->flush_pending_cb callback to merge pending into shared
+ * stats.
+ */
+ void *pending;
+ dlist_node pending_node; /* membership in pgStatPending list */
+} PgStat_EntryRef;
/*
@@ -43,11 +156,11 @@ typedef struct PgStat_SubXactStatus
struct PgStat_SubXactStatus *prev; /* higher-level subxact if any */
/*
- * Dropping the statistics for objects that dropped transactionally itself
- * needs to be transactional. Therefore we collect the stats dropped in
- * the current (sub-)transaction and only execute the stats drop when we
- * know if the transaction commits/aborts. To handle replicas and crashes,
- * stats drops are included in commit records.
+ * Statistics for transactionally dropped objects need to be
+ * transactionally dropped as well. Collect the stats dropped in the
+ * current (sub-)transaction and only execute the stats drop when we know
+ * if the transaction commits/aborts. To handle replicas and crashes,
+ * stats drops are included in commit / abort records.
*/
dlist_head pending_drops;
int pending_drops_count;
@@ -65,9 +178,95 @@ typedef struct PgStat_SubXactStatus
/*
+ * Metadata for a specific kind of statistics.
+ */
+typedef struct PgStat_KindInfo
+{
+ /*
+ * Do a fixed number of stats objects exist for this kind of stats (e.g.
+ * bgwriter stats) or not (e.g. tables).
+ */
+ bool fixed_amount:1;
+
+ /*
+ * Can stats of this kind be accessed from another database? Determines
+ * whether a stats object gets included in stats snapshots.
+ */
+ bool accessed_across_databases:1;
+
+ /*
+ * For variable-numbered stats: Identified on-disk using a name, rather
+ * than PgStat_HashKey. Probably only needed for replication slot stats.
+ */
+ bool named_on_disk:1;
+
+ /*
+ * The size of an entry in the shared stats hash table (pointed to by
+ * PgStatShared_HashEntry->body).
+ */
+ uint32 shared_size;
+
+ /*
+ * The offset/size of statistics inside the shared stats entry. Used when
+ * [de-]serializing statistics to / from disk respectively. Separate from
+ * shared_size because [de-]serialization may not include in-memory state
+ * like lwlocks.
+ */
+ uint32 shared_data_off;
+ uint32 shared_data_len;
+
+ /*
+ * The size of the pending data for this kind. E.g. how large
+ * PgStat_EntryRef->pending is. Used for allocations.
+ *
+ * 0 signals that an entry of this kind should never have a pending entry.
+ */
+ uint32 pending_size;
+
+ /*
+ * For variable-numbered stats: flush pending stats. Required if pending
+ * data is used.
+ */
+ bool (*flush_pending_cb) (PgStat_EntryRef *sr, bool nowait);
+
+ /*
+ * For variable-numbered stats: delete pending stats. Optional.
+ */
+ void (*delete_pending_cb) (PgStat_EntryRef *sr);
+
+ /*
+ * For variable-numbered stats: reset the reset timestamp. Optional.
+ */
+ void (*reset_timestamp_cb) (PgStatShared_Common *header, TimestampTz ts);
+
+ /*
+ * For variable-numbered stats with named_on_disk. Optional.
+ */
+ void (*to_serialized_name) (const PgStatShared_Common *header, NameData *name);
+ bool (*from_serialized_name) (const NameData *name, PgStat_HashKey *key);
+
+ /*
+ * For fixed-numbered statistics: Reset All.
+ */
+ void (*reset_all_cb) (TimestampTz ts);
+
+ /*
+ * For fixed-numbered statistics: Build snapshot for entry
+ */
+ void (*snapshot_cb) (void);
+
+ /* name of the kind of stats */
+ const char *const name;
+} PgStat_KindInfo;
+
+
+/*
* List of SLRU names that we keep stats for. There is no central registry of
* SLRUs, so we use this fixed list instead. The "other" entry is used for
* all SLRUs without an explicit entry (e.g. SLRUs in extensions).
+ *
+ * This is only defined here so that SLRU_NUM_ELEMENTS is known for later type
+ * definitions.
*/
static const char *const slru_names[] = {
"CommitTs",
@@ -83,33 +282,271 @@ static const char *const slru_names[] = {
#define SLRU_NUM_ELEMENTS lengthof(slru_names)
+/* ----------
+ * Types and definitions for different kinds of fixed-amount stats.
+ *
+ * Single-writer stats use the changecount mechanism to achieve low-overhead
+ * writes - they're obviously more performance critical than reads. Check the
+ * definition of struct PgBackendStatus for some explanation of the
+ * changecount mechanism.
+ *
+ * Because the obvious implementation of resetting single-writer stats isn't
+ * compatible with that (another backend needs to write), we don't scribble on
+ * shared stats while resetting. Instead, just record the current counter
+ * values in a copy of the stats data, which is protected by ->lock. See
+ * pgstat_fetch_stat_(archiver|bgwriter|checkpointer) for the reader side.
+ *
+ * The only exception to that is the the stat_reset_timestamp in these
+ * structs, which is protected by ->lock, because it has to be written by
+ * another backend while resetting
+ * ----------
+ */
+
+typedef struct PgStatShared_Archiver
+{
+ /* lock protects ->reset_offset as well as stats->stat_reset_timestamp */
+ LWLock lock;
+ uint32 changecount;
+ PgStat_ArchiverStats stats;
+ PgStat_ArchiverStats reset_offset;
+} PgStatShared_Archiver;
+
+typedef struct PgStatShared_BgWriter
+{
+ /* lock protects ->reset_offset as well as stats->stat_reset_timestamp */
+ LWLock lock;
+ uint32 changecount;
+ PgStat_BgWriterStats stats;
+ PgStat_BgWriterStats reset_offset;
+} PgStatShared_BgWriter;
+
+typedef struct PgStatShared_Checkpointer
+{
+ /* lock protects ->reset_offset as well as stats->stat_reset_timestamp */
+ LWLock lock;
+ uint32 changecount;
+ PgStat_CheckpointerStats stats;
+ PgStat_CheckpointerStats reset_offset;
+} PgStatShared_Checkpointer;
+
+typedef struct PgStatShared_SLRU
+{
+ /* lock protects ->stats */
+ LWLock lock;
+ PgStat_SLRUStats stats[SLRU_NUM_ELEMENTS];
+} PgStatShared_SLRU;
+
+typedef struct PgStatShared_Wal
+{
+ /* lock protects ->stats */
+ LWLock lock;
+ PgStat_WalStats stats;
+} PgStatShared_Wal;
+
+
+
+/* ----------
+ * Types and definitions for different kinds of variable-amount stats.
+ *
+ * Each struct has to start with PgStatShared_Common, containing information
+ * common across the different types of stats. Kind-specific data follows.
+ * ----------
+ */
+
+typedef struct PgStatShared_Database
+{
+ PgStatShared_Common header;
+ PgStat_StatDBEntry stats;
+} PgStatShared_Database;
+
+typedef struct PgStatShared_Relation
+{
+ PgStatShared_Common header;
+ PgStat_StatTabEntry stats;
+} PgStatShared_Relation;
+
+typedef struct PgStatShared_Function
+{
+ PgStatShared_Common header;
+ PgStat_StatFuncEntry stats;
+} PgStatShared_Function;
+
+typedef struct PgStatShared_Subscription
+{
+ PgStatShared_Common header;
+ PgStat_StatSubEntry stats;
+} PgStatShared_Subscription;
+
+typedef struct PgStatShared_ReplSlot
+{
+ PgStatShared_Common header;
+ PgStat_StatReplSlotEntry stats;
+} PgStatShared_ReplSlot;
+
+
+/*
+ * Central shared memory entry for the cumulative stats system.
+ *
+ * Fixed amount stats, the dynamic shared memory hash table for
+ * non-fixed-amount stats, as well as remaining bits and pieces are all
+ * reached from here.
+ */
+typedef struct PgStat_ShmemControl
+{
+ void *raw_dsa_area;
+
+ /*
+ * Stats for variable-numbered objects are kept in this shared hash table.
+ * See comment above PgStat_Kind for details.
+ */
+ dshash_table_handle hash_handle; /* shared dbstat hash */
+
+ /* Has the stats system already been shut down? Just a debugging check. */
+ bool is_shutdown;
+
+ /*
+ * Whenever statistics for dropped objects could not be freed - because
+ * backends still have references - the dropping backend calls
+ * pgstat_request_entry_refs_gc() incrementing this counter. Eventually
+ * that causes backends to run pgstat_gc_entry_refs(), allowing memory to
+ * be reclaimed.
+ */
+ pg_atomic_uint64 gc_request_count;
+
+ /*
+ * Stats data for fixed-numbered objects.
+ */
+ PgStatShared_Archiver archiver;
+ PgStatShared_BgWriter bgwriter;
+ PgStatShared_Checkpointer checkpointer;
+ PgStatShared_SLRU slru;
+ PgStatShared_Wal wal;
+} PgStat_ShmemControl;
+
+
+/*
+ * Cached statistics snapshot
+ */
+typedef struct PgStat_Snapshot
+{
+ PgStat_FetchConsistency mode;
+
+ /* time at which snapshot was taken */
+ TimestampTz snapshot_timestamp;
+
+ bool fixed_valid[PGSTAT_NUM_KINDS];
+
+ PgStat_ArchiverStats archiver;
+
+ PgStat_BgWriterStats bgwriter;
+
+ PgStat_CheckpointerStats checkpointer;
+
+ PgStat_SLRUStats slru[SLRU_NUM_ELEMENTS];
+
+ PgStat_WalStats wal;
+
+ /* to free snapshot in bulk */
+ MemoryContext context;
+ struct pgstat_snapshot_hash *stats;
+} PgStat_Snapshot;
+
+
+/*
+ * Collection of backend-local stats state.
+ */
+typedef struct PgStat_LocalState
+{
+ PgStat_ShmemControl *shmem;
+ dsa_area *dsa;
+ dshash_table *shared_hash;
+
+ /* the current statistics snapshot */
+ PgStat_Snapshot snapshot;
+} PgStat_LocalState;
+
+
+/*
+ * Inline functions defined further below.
+ */
+
+static inline void pgstat_begin_changecount_write(uint32 *cc);
+static inline void pgstat_end_changecount_write(uint32 *cc);
+static inline uint32 pgstat_begin_changecount_read(uint32 *cc);
+static inline bool pgstat_end_changecount_read(uint32 *cc, uint32 cc_before);
+
+static inline void pgstat_copy_changecounted_stats(void *dst, void *src, size_t len,
+ uint32 *cc);
+
+static inline int pgstat_cmp_hash_key(const void *a, const void *b, size_t size, void *arg);
+static inline uint32 pgstat_hash_hash_key(const void *d, size_t size, void *arg);
+static inline size_t pgstat_get_entry_len(PgStat_Kind kind);
+static inline void *pgstat_get_entry_data(PgStat_Kind kind, PgStatShared_Common *entry);
+
+
/*
* Functions in pgstat.c
*/
-extern void pgstat_setheader(PgStat_MsgHdr *hdr, StatMsgType mtype);
-extern void pgstat_send(void *msg, int len);
+const PgStat_KindInfo *pgstat_get_kind_info(PgStat_Kind kind);
+
#ifdef USE_ASSERT_CHECKING
extern void pgstat_assert_is_up(void);
#else
#define pgstat_assert_is_up() ((void)true)
#endif
+extern void pgstat_delete_pending_entry(PgStat_EntryRef *entry_ref);
+extern PgStat_EntryRef *pgstat_prep_pending_entry(PgStat_Kind kind, Oid dboid, Oid objoid, bool *created_entry);
+extern PgStat_EntryRef *pgstat_fetch_pending_entry(PgStat_Kind kind, Oid dboid, Oid objoid);
+
+extern void *pgstat_fetch_entry(PgStat_Kind kind, Oid dboid, Oid objoid);
+extern void pgstat_snapshot_fixed(PgStat_Kind kind);
+
+
+/*
+ * Functions in pgstat_archiver.c
+ */
+
+extern void pgstat_archiver_reset_all_cb(TimestampTz ts);
+extern void pgstat_archiver_snapshot_cb(void);
+
+
+/*
+ * Functions in pgstat_bgwriter.c
+ */
+
+extern void pgstat_bgwriter_reset_all_cb(TimestampTz ts);
+extern void pgstat_bgwriter_snapshot_cb(void);
+
+
+/*
+ * Functions in pgstat_checkpointer.c
+ */
+
+extern void pgstat_checkpointer_reset_all_cb(TimestampTz ts);
+extern void pgstat_checkpointer_snapshot_cb(void);
+
/*
* Functions in pgstat_database.c
*/
-extern void AtEOXact_PgStat_Database(bool isCommit, bool parallel);
extern void pgstat_report_disconnect(Oid dboid);
-extern void pgstat_update_dbstats(PgStat_MsgTabstat *tsmsg, TimestampTz now);
+extern void pgstat_update_dbstats(TimestampTz ts);
+extern void AtEOXact_PgStat_Database(bool isCommit, bool parallel);
+
+extern PgStat_StatDBEntry *pgstat_prep_database_pending(Oid dboid);
+extern void pgstat_reset_database_timestamp(Oid dboid, TimestampTz ts);
+extern bool pgstat_database_flush_cb(PgStat_EntryRef *entry_ref, bool nowait);
+extern void pgstat_database_reset_timestamp_cb(PgStatShared_Common *header, TimestampTz ts);
/*
* Functions in pgstat_function.c
*/
-extern void pgstat_send_funcstats(void);
+extern bool pgstat_function_flush_cb(PgStat_EntryRef *entry_ref, bool nowait);
/*
@@ -120,23 +557,73 @@ extern void AtEOXact_PgStat_Relations(PgStat_SubXactStatus *xact_state, bool isC
extern void AtEOSubXact_PgStat_Relations(PgStat_SubXactStatus *xact_state, bool isCommit, int nestDepth);
extern void AtPrepare_PgStat_Relations(PgStat_SubXactStatus *xact_state);
extern void PostPrepare_PgStat_Relations(PgStat_SubXactStatus *xact_state);
-extern void pgstat_send_tabstats(TimestampTz now, bool disconnect);
+
+extern bool pgstat_relation_flush_cb(PgStat_EntryRef *entry_ref, bool nowait);
+extern void pgstat_relation_delete_pending_cb(PgStat_EntryRef *entry_ref);
+
+
+/*
+ * Functions in pgstat_replslot.c
+ */
+
+extern void pgstat_replslot_reset_timestamp_cb(PgStatShared_Common *header, TimestampTz ts);
+extern void pgstat_replslot_to_serialized_name_cb(const PgStatShared_Common *tmp, NameData *name);
+extern bool pgstat_replslot_from_serialized_name_cb(const NameData *name, PgStat_HashKey *key);
+
+
+/*
+ * Functions in pgstat_shmem.c
+ */
+
+extern void pgstat_attach_shmem(void);
+extern void pgstat_detach_shmem(void);
+
+extern PgStat_EntryRef *pgstat_get_entry_ref(PgStat_Kind kind, Oid dboid, Oid objoid,
+ bool create, bool *found);
+extern bool pgstat_lock_entry(PgStat_EntryRef *entry_ref, bool nowait);
+extern void pgstat_unlock_entry(PgStat_EntryRef *entry_ref);
+extern bool pgstat_drop_entry(PgStat_Kind kind, Oid dboid, Oid objoid);
+extern void pgstat_drop_all_entries(void);
+extern PgStat_EntryRef *pgstat_get_entry_ref_locked(PgStat_Kind kind, Oid dboid, Oid objoid,
+ bool nowait);
+extern void pgstat_reset_entry(PgStat_Kind kind, Oid dboid, Oid objoid, TimestampTz ts);
+extern void pgstat_reset_entries_of_kind(PgStat_Kind kind, TimestampTz ts);
+extern void pgstat_reset_matching_entries(bool (*do_reset) (PgStatShared_HashEntry *, Datum),
+ Datum match_data,
+ TimestampTz ts);
+
+extern void pgstat_request_entry_refs_gc(void);
+extern PgStatShared_Common *pgstat_init_entry(PgStat_Kind kind,
+ PgStatShared_HashEntry *shhashent);
/*
* Functions in pgstat_slru.c
*/
-extern void pgstat_send_slru(void);
+extern bool pgstat_slru_flush(bool nowait);
+extern void pgstat_slru_reset_all_cb(TimestampTz ts);
+extern void pgstat_slru_snapshot_cb(void);
/*
* Functions in pgstat_wal.c
*/
+extern bool pgstat_flush_wal(bool nowait);
extern void pgstat_init_wal(void);
extern bool pgstat_have_pending_wal(void);
+extern void pgstat_wal_reset_all_cb(TimestampTz ts);
+extern void pgstat_wal_snapshot_cb(void);
+
+
+/*
+ * Functions in pgstat_subscription.c
+ */
+
+extern bool pgstat_subscription_flush_cb(PgStat_EntryRef *entry_ref, bool nowait);
+extern void pgstat_subscription_reset_timestamp_cb(PgStatShared_Common *header, TimestampTz ts);
/*
* Functions in pgstat_xact.c
@@ -151,29 +638,145 @@ extern void pgstat_create_transactional(PgStat_Kind kind, Oid dboid, Oid objoid)
* Variables in pgstat.c
*/
-extern pgsocket pgStatSock;
+extern PgStat_LocalState pgStatLocal;
/*
- * Variables in pgstat_database.c
+ * Variables in pgstat_slru.c
*/
-extern int pgStatXactCommit;
-extern int pgStatXactRollback;
+extern bool have_slrustats;
/*
- * Variables in pgstat_functions.c
+ * Implementation of inline functions declared above.
+ */
+
+/*
+ * Helpers for changecount manipulation. See comments around struct
+ * PgBackendStatus for details.
*/
-extern bool have_function_stats;
+static inline void
+pgstat_begin_changecount_write(uint32 *cc)
+{
+ Assert((*cc & 1) == 0);
+
+ START_CRIT_SECTION();
+ (*cc)++;
+ pg_write_barrier();
+}
+
+static inline void
+pgstat_end_changecount_write(uint32 *cc)
+{
+ Assert((*cc & 1) == 1);
+
+ pg_write_barrier();
+
+ (*cc)++;
+
+ END_CRIT_SECTION();
+}
+
+static inline uint32
+pgstat_begin_changecount_read(uint32 *cc)
+{
+ uint32 before_cc = *cc;
+
+ CHECK_FOR_INTERRUPTS();
+ pg_read_barrier();
+
+ return before_cc;
+}
/*
- * Variables in pgstat_relation.c
+ * Returns true if the read succeeded, false if it needs to be repeated.
*/
+static inline bool
+pgstat_end_changecount_read(uint32 *cc, uint32 before_cc)
+{
+ uint32 after_cc;
+
+ pg_read_barrier();
+
+ after_cc = *cc;
+
+ /* was a write in progress when we started? */
+ if (before_cc & 1)
+ return false;
+
+ /* did writes start and complete while we read? */
+ return before_cc == after_cc;
+}
+
+
+/*
+ * helper function for PgStat_KindInfo->snapshot_cb
+ * PgStat_KindInfo->reset_all_cb callbacks.
+ *
+ * Copies out the specified memory area following change-count protocol.
+ */
+static inline void
+pgstat_copy_changecounted_stats(void *dst, void *src, size_t len,
+ uint32 *cc)
+{
+ uint32 cc_before;
+
+ do
+ {
+ cc_before = pgstat_begin_changecount_read(cc);
+
+ memcpy(dst, src, len);
+ }
+ while (!pgstat_end_changecount_read(cc, cc_before));
+}
+
+/* helpers for dshash / simplehash hashtables */
+static inline int
+pgstat_cmp_hash_key(const void *a, const void *b, size_t size, void *arg)
+{
+ AssertArg(size == sizeof(PgStat_HashKey) && arg == NULL);
+ return memcmp(a, b, sizeof(PgStat_HashKey));
+}
+
+static inline uint32
+pgstat_hash_hash_key(const void *d, size_t size, void *arg)
+{
+ const PgStat_HashKey *key = (PgStat_HashKey *) d;
+ uint32 hash;
+
+ AssertArg(size == sizeof(PgStat_HashKey) && arg == NULL);
+
+ hash = murmurhash32(key->kind);
+ hash = hash_combine(hash, murmurhash32(key->dboid));
+ hash = hash_combine(hash, murmurhash32(key->objoid));
+
+ return hash;
+}
+
+/*
+ * The length of the data portion of a shared memory stats entry (i.e. without
+ * transient data such as refcounts, lwlocks, ...).
+ */
+static inline size_t
+pgstat_get_entry_len(PgStat_Kind kind)
+{
+ return pgstat_get_kind_info(kind)->shared_data_len;
+}
+
+/*
+ * Returns a pointer to the data portion of a shared memory stats entry.
+ */
+static inline void *
+pgstat_get_entry_data(PgStat_Kind kind, PgStatShared_Common *entry)
+{
+ size_t off = pgstat_get_kind_info(kind)->shared_data_off;
-extern bool have_relation_stats;
+ Assert(off != 0 && off < PG_UINT32_MAX);
+ return ((char *) (entry)) + off;
+}
#endif /* PGSTAT_INTERNAL_H */
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index 121dbbc9a96..eadbd009045 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -246,6 +246,7 @@ typedef struct RelationData
*/
Oid rd_toastoid; /* Real TOAST table's OID, or InvalidOid */
+ bool pgstat_enabled; /* should relation stats be counted */
/* use "struct" here to avoid needing to include pgstat.h: */
struct PgStat_TableStatus *pgstat_info; /* statistics collection area */
} RelationData;
diff --git a/src/include/utils/timeout.h b/src/include/utils/timeout.h
index 099f91c61da..c068986d09a 100644
--- a/src/include/utils/timeout.h
+++ b/src/include/utils/timeout.h
@@ -32,6 +32,7 @@ typedef enum TimeoutId
STANDBY_LOCK_TIMEOUT,
IDLE_IN_TRANSACTION_SESSION_TIMEOUT,
IDLE_SESSION_TIMEOUT,
+ IDLE_STATS_UPDATE_TIMEOUT,
CLIENT_CONNECTION_CHECK_TIMEOUT,
STARTUP_PROGRESS_TIMEOUT,
/* First user-definable timeout reason */
diff --git a/src/include/utils/wait_event.h b/src/include/utils/wait_event.h
index d870c592632..b578e2ec757 100644
--- a/src/include/utils/wait_event.h
+++ b/src/include/utils/wait_event.h
@@ -42,7 +42,6 @@ typedef enum
WAIT_EVENT_CHECKPOINTER_MAIN,
WAIT_EVENT_LOGICAL_APPLY_MAIN,
WAIT_EVENT_LOGICAL_LAUNCHER_MAIN,
- WAIT_EVENT_PGSTAT_MAIN,
WAIT_EVENT_RECOVERY_WAL_STREAM,
WAIT_EVENT_SYSLOGGER_MAIN,
WAIT_EVENT_WAL_RECEIVER_MAIN,