diff options
| author | Tomas Vondra | 2020-04-02 00:11:38 +0000 |
|---|---|---|
| committer | Tomas Vondra | 2020-04-02 00:34:21 +0000 |
| commit | 28cac71bd368788d1ab22f048eef211641fb1283 (patch) | |
| tree | ff540c1e6cabe828d884d7098af51091d7957927 /src/backend/postmaster | |
| parent | 17ca067995114ee40749d9138ba85fdd68518052 (diff) | |
Collect statistics about SLRU caches
There's a number of SLRU caches used to access important data like clog,
commit timestamps, multixact, asynchronous notifications, etc. Until now
we had no easy way to monitor these shared caches, compute hit ratios,
number of reads/writes etc.
This commit extends the statistics collector to track this information
for a predefined list of SLRUs, and also introduces a new system view
pg_stat_slru displaying the data.
The list of built-in SLRUs is fixed, but additional SLRUs may be defined
in extensions. Unfortunately, there's no suitable registry of SLRUs, so
this patch simply defines a fixed list of SLRUs with entries for the
built-in ones and one entry for all additional SLRUs. Extensions adding
their own SLRU are fairly rare, so this seems acceptable.
This patch only allows monitoring of SLRUs, not tuning. The SLRU sizes
are still fixed (hard-coded in the code) and it's not entirely clear
which of the SLRUs might need a GUC to tune size. In a way, allowing us
to determine that is one of the goals of this patch.
Bump catversion as the patch introduces new functions and system view.
Author: Tomas Vondra
Reviewed-by: Alvaro Herrera
Discussion: https://www.postgresql.org/message-id/flat/20200119143707.gyinppnigokesjok@development
Diffstat (limited to 'src/backend/postmaster')
| -rw-r--r-- | src/backend/postmaster/pgstat.c | 297 |
1 files changed, 297 insertions, 0 deletions
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index ab42df7e1b4..04274056ca7 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -141,6 +141,26 @@ char *pgstat_stat_tmpname = NULL; */ PgStat_MsgBgWriter BgWriterStats; +/* + * SLRU statistics counters (unused in other processes) stored directly in + * stats structure so it can be sent without needing to copy things around. + * We assume this inits to zeroes. There is no central registry of SLRUs, + * so we use this fixed list instead. + * + * There's a separte entry for each SLRU we have. The "other" entry is used + * for all SLRUs without an explicit entry (e.g. SLRUs in extensions). + */ +static char *slru_names[] = {"async", "clog", "commit_timestamp", + "multixact_offset", "multixact_member", + "oldserxid", "pg_xact", "subtrans", + "other" /* has to be last */}; + +/* number of elemenents of slru_name array */ +#define SLRU_NUM_ELEMENTS (sizeof(slru_names) / sizeof(char *)) + +/* entries in the same order as slru_names */ +PgStat_MsgSLRU SLRUStats[SLRU_NUM_ELEMENTS]; + /* ---------- * Local data * ---------- @@ -255,6 +275,7 @@ static int localNumBackends = 0; */ static PgStat_ArchiverStats archiverStats; static PgStat_GlobalStats globalStats; +static PgStat_SLRUStats slruStats[SLRU_NUM_ELEMENTS]; /* * List of OIDs of databases we need to write out. If an entry is InvalidOid, @@ -297,6 +318,7 @@ static bool pgstat_db_requested(Oid databaseid); static void pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg); static void pgstat_send_funcstats(void); +static void pgstat_send_slru(void); static HTAB *pgstat_collect_oids(Oid catalogid, AttrNumber anum_oid); static PgStat_TableStatus *get_tabstat_entry(Oid rel_id, bool isshared); @@ -319,11 +341,13 @@ static void pgstat_recv_dropdb(PgStat_MsgDropdb *msg, int len); static void pgstat_recv_resetcounter(PgStat_MsgResetcounter *msg, int len); static void pgstat_recv_resetsharedcounter(PgStat_MsgResetsharedcounter *msg, int len); static void pgstat_recv_resetsinglecounter(PgStat_MsgResetsinglecounter *msg, int len); +static void pgstat_recv_resetslrucounter(PgStat_MsgResetslrucounter *msg, int len); static void pgstat_recv_autovac(PgStat_MsgAutovacStart *msg, int len); static void pgstat_recv_vacuum(PgStat_MsgVacuum *msg, int len); static void pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len); static void pgstat_recv_archiver(PgStat_MsgArchiver *msg, int len); static void pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len); +static void pgstat_recv_slru(PgStat_MsgSLRU *msg, int len); static void pgstat_recv_funcstat(PgStat_MsgFuncstat *msg, int len); static void pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len); static void pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int len); @@ -907,6 +931,9 @@ pgstat_report_stat(bool force) /* Now, send function statistics */ pgstat_send_funcstats(); + + /* Finally send SLRU statistics */ + pgstat_send_slru(); } /* @@ -1373,6 +1400,30 @@ pgstat_reset_single_counter(Oid objoid, PgStat_Single_Reset_Type type) } /* ---------- + * pgstat_reset_slru_counter() - + * + * Tell the statistics collector to reset a single SLRU counter, or all + * SLRU counters (when name is null). + * + * Permission checking for this function is managed through the normal + * GRANT system. + * ---------- + */ +void +pgstat_reset_slru_counter(const char *name) +{ + PgStat_MsgResetslrucounter msg; + + if (pgStatSock == PGINVALID_SOCKET) + return; + + pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_RESETSLRUCOUNTER); + msg.m_index = (name) ? pgstat_slru_index(name) : -1; + + pgstat_send(&msg, sizeof(msg)); +} + +/* ---------- * pgstat_report_autovac() - * * Called from autovacuum.c to report startup of an autovacuum process. @@ -2622,6 +2673,23 @@ pgstat_fetch_global(void) } +/* + * --------- + * pgstat_fetch_slru() - + * + * Support function for the SQL-callable pgstat* functions. Returns + * a pointer to the slru statistics struct. + * --------- + */ +PgStat_SLRUStats * +pgstat_fetch_slru(void) +{ + backend_read_statsfile(); + + return slruStats; +} + + /* ------------------------------------------------------------ * Functions for management of the shared-memory PgBackendStatus array * ------------------------------------------------------------ @@ -4325,6 +4393,46 @@ pgstat_send_bgwriter(void) MemSet(&BgWriterStats, 0, sizeof(BgWriterStats)); } +/* ---------- + * pgstat_send_slru() - + * + * Send SLRU statistics to the collector + * ---------- + */ +static void +pgstat_send_slru(void) +{ + int i; + + /* We assume this initializes to zeroes */ + static const PgStat_MsgSLRU all_zeroes; + + for (i = 0; i < SLRU_NUM_ELEMENTS; i++) + { + /* + * This function can be called even if nothing at all has happened. In + * this case, avoid sending a completely empty message to the stats + * collector. + */ + if (memcmp(&SLRUStats[i], &all_zeroes, sizeof(PgStat_MsgSLRU)) == 0) + continue; + + /* set the SLRU type before each send */ + SLRUStats[i].m_index = i; + + /* + * Prepare and send the message + */ + pgstat_setheader(&SLRUStats[i].m_hdr, PGSTAT_MTYPE_SLRU); + pgstat_send(&SLRUStats[i], sizeof(PgStat_MsgSLRU)); + + /* + * Clear out the statistics buffer, so it can be re-used. + */ + MemSet(&SLRUStats[i], 0, sizeof(PgStat_MsgSLRU)); + } +} + /* ---------- * PgstatCollectorMain() - @@ -4493,6 +4601,11 @@ PgstatCollectorMain(int argc, char *argv[]) len); break; + case PGSTAT_MTYPE_RESETSLRUCOUNTER: + pgstat_recv_resetslrucounter(&msg.msg_resetslrucounter, + len); + break; + case PGSTAT_MTYPE_AUTOVAC_START: pgstat_recv_autovac(&msg.msg_autovacuum_start, len); break; @@ -4513,6 +4626,10 @@ PgstatCollectorMain(int argc, char *argv[]) pgstat_recv_bgwriter(&msg.msg_bgwriter, len); break; + case PGSTAT_MTYPE_SLRU: + pgstat_recv_slru(&msg.msg_slru, len); + break; + case PGSTAT_MTYPE_FUNCSTAT: pgstat_recv_funcstat(&msg.msg_funcstat, len); break; @@ -4783,6 +4900,12 @@ pgstat_write_statsfiles(bool permanent, bool allDbs) (void) rc; /* we'll check for error with ferror */ /* + * Write SLRU stats struct + */ + rc = fwrite(slruStats, sizeof(slruStats), 1, fpout); + (void) rc; /* we'll check for error with ferror */ + + /* * Walk through the database table. */ hash_seq_init(&hstat, pgStatDBHash); @@ -5017,6 +5140,7 @@ pgstat_read_statsfiles(Oid onlydb, bool permanent, bool deep) int32 format_id; bool found; const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_FILENAME : pgstat_stat_filename; + int i; /* * The tables will live in pgStatLocalContext. @@ -5039,6 +5163,7 @@ pgstat_read_statsfiles(Oid onlydb, bool permanent, bool deep) */ memset(&globalStats, 0, sizeof(globalStats)); memset(&archiverStats, 0, sizeof(archiverStats)); + memset(&slruStats, 0, sizeof(slruStats)); /* * Set the current timestamp (will be kept only in case we can't load an @@ -5048,6 +5173,12 @@ pgstat_read_statsfiles(Oid onlydb, bool permanent, bool deep) archiverStats.stat_reset_timestamp = globalStats.stat_reset_timestamp; /* + * Set the same reset timestamp for all SLRU items too. + */ + for (i = 0; i < SLRU_NUM_ELEMENTS; i++) + slruStats[i].stat_reset_timestamp = globalStats.stat_reset_timestamp; + + /* * Try to open the stats file. If it doesn't exist, the backends simply * return zero for anything and the collector simply starts from scratch * with empty counters. @@ -5110,6 +5241,17 @@ pgstat_read_statsfiles(Oid onlydb, bool permanent, bool deep) } /* + * Read SLRU stats struct + */ + if (fread(slruStats, 1, sizeof(slruStats), fpin) != sizeof(slruStats)) + { + ereport(pgStatRunningInCollector ? LOG : WARNING, + (errmsg("corrupted statistics file \"%s\"", statfile))); + memset(&slruStats, 0, sizeof(slruStats)); + goto done; + } + + /* * We found an existing collector stats file. Read it and put all the * hashtable entries into place. */ @@ -5407,6 +5549,7 @@ pgstat_read_db_statsfile_timestamp(Oid databaseid, bool permanent, PgStat_StatDBEntry dbentry; PgStat_GlobalStats myGlobalStats; PgStat_ArchiverStats myArchiverStats; + PgStat_SLRUStats mySLRUStats[SLRU_NUM_ELEMENTS]; FILE *fpin; int32 format_id; const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_FILENAME : pgstat_stat_filename; @@ -5461,6 +5604,17 @@ pgstat_read_db_statsfile_timestamp(Oid databaseid, bool permanent, return false; } + /* + * Read SLRU stats struct + */ + if (fread(mySLRUStats, 1, sizeof(mySLRUStats), fpin) != sizeof(mySLRUStats)) + { + ereport(pgStatRunningInCollector ? LOG : WARNING, + (errmsg("corrupted statistics file \"%s\"", statfile))); + FreeFile(fpin); + return false; + } + /* By default, we're going to return the timestamp of the global file. */ *ts = myGlobalStats.stats_timestamp; @@ -6062,6 +6216,33 @@ pgstat_recv_resetsinglecounter(PgStat_MsgResetsinglecounter *msg, int len) } /* ---------- + * pgstat_recv_resetslrucounter() - + * + * Reset some SLRU statistics of the cluster. + * ---------- + */ +static void +pgstat_recv_resetslrucounter(PgStat_MsgResetslrucounter *msg, int len) +{ + int i; + TimestampTz ts = GetCurrentTimestamp(); + + memset(&slruStats, 0, sizeof(slruStats)); + + elog(LOG, "msg->m_index = %d", msg->m_index); + + for (i = 0; i < SLRU_NUM_ELEMENTS; i++) + { + /* reset entry with the given index, or all entries (index is -1) */ + if ((msg->m_index == -1) || (msg->m_index == i)) + { + memset(&slruStats[i], 0, sizeof(slruStats[i])); + slruStats[i].stat_reset_timestamp = ts; + } + } +} + +/* ---------- * pgstat_recv_autovac() - * * Process an autovacuum signalling message. @@ -6218,6 +6399,24 @@ pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len) } /* ---------- + * pgstat_recv_slru() - + * + * Process a SLRU message. + * ---------- + */ +static void +pgstat_recv_slru(PgStat_MsgSLRU *msg, int len) +{ + slruStats[msg->m_index].blocks_zeroed += msg->m_blocks_zeroed; + slruStats[msg->m_index].blocks_hit += msg->m_blocks_hit; + slruStats[msg->m_index].blocks_read += msg->m_blocks_read; + slruStats[msg->m_index].blocks_written += msg->m_blocks_written; + slruStats[msg->m_index].blocks_exists += msg->m_blocks_exists; + slruStats[msg->m_index].flush += msg->m_flush; + slruStats[msg->m_index].truncate += msg->m_truncate; +} + +/* ---------- * pgstat_recv_recoveryconflict() - * * Process a RECOVERYCONFLICT message. @@ -6471,3 +6670,101 @@ pgstat_clip_activity(const char *raw_activity) return activity; } + +/* + * pgstat_slru_index + * + * Determine index of entry for a SLRU with a given name. If there's no exact + * match, returns index of the last "other" entry used for SLRUs defined in + * external proejcts. + */ +int +pgstat_slru_index(const char *name) +{ + int i; + + for (i = 0; i < SLRU_NUM_ELEMENTS; i++) + { + if (strcmp(slru_names[i], name) == 0) + return i; + } + + /* return index of the last entry (which is the "other" one) */ + return (SLRU_NUM_ELEMENTS - 1); +} + +/* + * pgstat_slru_name + * + * Returns SLRU name for an index. The index may be above SLRU_NUM_ELEMENTS, + * in which case this returns NULL. This allows writing code that does not + * know the number of entries in advance. + */ +char * +pgstat_slru_name(int idx) +{ + Assert(idx >= 0); + + if (idx >= SLRU_NUM_ELEMENTS) + return NULL; + + return slru_names[idx]; +} + +/* + * slru_entry + * + * Returns pointer to entry with counters for given SLRU (based on the name + * stored in SlruCtl as lwlock tranche name). + */ +static PgStat_MsgSLRU * +slru_entry(SlruCtl ctl) +{ + int idx = pgstat_slru_index(ctl->shared->lwlock_tranche_name); + + Assert((idx >= 0) && (idx < SLRU_NUM_ELEMENTS)); + + return &SLRUStats[idx]; +} + +void +pgstat_count_slru_page_zeroed(SlruCtl ctl) +{ + slru_entry(ctl)->m_blocks_zeroed += 1; +} + +void +pgstat_count_slru_page_hit(SlruCtl ctl) +{ + slru_entry(ctl)->m_blocks_hit += 1; +} + +void +pgstat_count_slru_page_exists(SlruCtl ctl) +{ + slru_entry(ctl)->m_blocks_exists += 1; +} + +void +pgstat_count_slru_page_read(SlruCtl ctl) +{ + slru_entry(ctl)->m_blocks_read += 1; +} + +void +pgstat_count_slru_page_written(SlruCtl ctl) +{ + slru_entry(ctl)->m_blocks_written += 1; +} + +void +pgstat_count_slru_flush(SlruCtl ctl) +{ + slru_entry(ctl)->m_flush += 1; +} + +void +pgstat_count_slru_truncate(SlruCtl ctl) +{ + slru_entry(ctl)->m_truncate += 1; +} |
