Fix performance regression from session statistics.
authorAndres Freund <andres@anarazel.de>
Thu, 16 Sep 2021 09:02:40 +0000 (02:02 -0700)
committerAndres Freund <andres@anarazel.de>
Thu, 16 Sep 2021 09:05:50 +0000 (02:05 -0700)
Session statistics, as introduced by 960869da08, had several shortcomings:

- an additional GetCurrentTimestamp() call that also impaired the accuracy of
  the data collected

  This can be avoided by passing the current timestamp we already have in
  pgstat_report_stat().

- an additional statistics UDP packet sent every 500ms

  This is solved by adding the new statistics to PgStat_MsgTabstat.
  This is conceptually ugly, because session statistics are not
  table statistics.  But the struct already contains data unrelated
  to tables, so there is not much damage done.

  Connection and disconnection are reported in separate messages, which
  reduces the number of additional messages to two messages per session and a
  slight increase in PgStat_MsgTabstat size (but the same number of table
  stats fit).

- Session time computation could overflow on systems where long is 32 bit.

Reported-By: Andres Freund <andres@anarazel.de>
Author: Andres Freund <andres@anarazel.de>
Author: Laurenz Albe <laurenz.albe@cybertec.at>
Discussion: https://postgr.es/m/20210801205501.nyxzxoelqoo4x2qc%40alap3.anarazel.de
Backpatch: 14-, where the feature was introduced.

src/backend/postmaster/pgstat.c
src/backend/tcop/postgres.c
src/backend/utils/activity/backend_status.c
src/include/pgstat.h
src/tools/pgindent/typedefs.list

index 3450a10129bbf0e7bedcb19be3d0383ff5cc38a5..ab9c37c64f0d6e4f741e61224e6457cc43142ce3 100644 (file)
@@ -246,6 +246,7 @@ static int  pgStatXactCommit = 0;
 static int pgStatXactRollback = 0;
 PgStat_Counter pgStatBlockReadTime = 0;
 PgStat_Counter pgStatBlockWriteTime = 0;
+static PgStat_Counter pgLastSessionReportTime = 0;
 PgStat_Counter pgStatActiveTime = 0;
 PgStat_Counter pgStatTransactionIdleTime = 0;
 SessionEndType pgStatSessionEndCause = DISCONNECT_NORMAL;
@@ -330,11 +331,12 @@ static bool pgstat_db_requested(Oid databaseid);
 static PgStat_StatReplSlotEntry *pgstat_get_replslot_entry(NameData name, bool create_it);
 static void pgstat_reset_replslot(PgStat_StatReplSlotEntry *slotstats, TimestampTz ts);
 
-static void pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg);
+static void pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg, TimestampTz now);
 static void pgstat_send_funcstats(void);
 static void pgstat_send_slru(void);
 static HTAB *pgstat_collect_oids(Oid catalogid, AttrNumber anum_oid);
-static void pgstat_send_connstats(bool disconnect, TimestampTz last_report);
+static bool pgstat_should_report_connstat(void);
+static void pgstat_report_disconnect(Oid dboid);
 
 static PgStat_TableStatus *get_tabstat_entry(Oid rel_id, bool isshared);
 
@@ -366,7 +368,8 @@ static void pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len);
 static void pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int len);
 static void pgstat_recv_deadlock(PgStat_MsgDeadlock *msg, int len);
 static void pgstat_recv_checksum_failure(PgStat_MsgChecksumFailure *msg, int len);
-static void pgstat_recv_connstat(PgStat_MsgConn *msg, int len);
+static void pgstat_recv_connect(PgStat_MsgConnect *msg, int len);
+static void pgstat_recv_disconnect(PgStat_MsgDisconnect *msg, int len);
 static void pgstat_recv_replslot(PgStat_MsgReplSlot *msg, int len);
 static void pgstat_recv_tempfile(PgStat_MsgTempFile *msg, int len);
 
@@ -890,12 +893,11 @@ pgstat_report_stat(bool disconnect)
        !TimestampDifferenceExceeds(last_report, now, PGSTAT_STAT_INTERVAL))
        return;
 
-   /* for backends, send connection statistics */
-   if (MyBackendType == B_BACKEND)
-       pgstat_send_connstats(disconnect, last_report);
-
    last_report = now;
 
+   if (disconnect)
+       pgstat_report_disconnect(MyDatabaseId);
+
    /*
     * Destroy pgStatTabHash before we start invalidating PgStat_TableEntry
     * entries it points to.  (Should we fail partway through the loop below,
@@ -947,7 +949,7 @@ pgstat_report_stat(bool disconnect)
                   sizeof(PgStat_TableCounts));
            if (++this_msg->m_nentries >= PGSTAT_NUM_TABENTRIES)
            {
-               pgstat_send_tabstat(this_msg);
+               pgstat_send_tabstat(this_msg, now);
                this_msg->m_nentries = 0;
            }
        }
@@ -959,13 +961,14 @@ pgstat_report_stat(bool disconnect)
 
    /*
     * Send partial messages.  Make sure that any pending xact commit/abort
-    * gets counted, even if there are no table stats to send.
+    * and connection stats get counted, even if there are no table stats to
+    * send.
     */
    if (regular_msg.m_nentries > 0 ||
-       pgStatXactCommit > 0 || pgStatXactRollback > 0)
-       pgstat_send_tabstat(&regular_msg);
+       pgStatXactCommit > 0 || pgStatXactRollback > 0 || disconnect)
+       pgstat_send_tabstat(&regular_msg, now);
    if (shared_msg.m_nentries > 0)
-       pgstat_send_tabstat(&shared_msg);
+       pgstat_send_tabstat(&shared_msg, now);
 
    /* Now, send function statistics */
    pgstat_send_funcstats();
@@ -981,7 +984,7 @@ pgstat_report_stat(bool disconnect)
  * Subroutine for pgstat_report_stat: finish and send a tabstat message
  */
 static void
-pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg)
+pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg, TimestampTz now)
 {
    int         n;
    int         len;
@@ -1000,10 +1003,34 @@ pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg)
        tsmsg->m_xact_rollback = pgStatXactRollback;
        tsmsg->m_block_read_time = pgStatBlockReadTime;
        tsmsg->m_block_write_time = pgStatBlockWriteTime;
+
+       if (pgstat_should_report_connstat())
+       {
+           long        secs;
+           int         usecs;
+
+           /*
+            * pgLastSessionReportTime is initialized to MyStartTimestamp by
+            * pgstat_report_connect().
+            */
+           TimestampDifference(pgLastSessionReportTime, now, &secs, &usecs);
+           pgLastSessionReportTime = now;
+           tsmsg->m_session_time = (PgStat_Counter) secs * 1000000 + usecs;
+           tsmsg->m_active_time = pgStatActiveTime;
+           tsmsg->m_idle_in_xact_time = pgStatTransactionIdleTime;
+       }
+       else
+       {
+           tsmsg->m_session_time = 0;
+           tsmsg->m_active_time = 0;
+           tsmsg->m_idle_in_xact_time = 0;
+       }
        pgStatXactCommit = 0;
        pgStatXactRollback = 0;
        pgStatBlockReadTime = 0;
        pgStatBlockWriteTime = 0;
+       pgStatActiveTime = 0;
+       pgStatTransactionIdleTime = 0;
    }
    else
    {
@@ -1011,6 +1038,9 @@ pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg)
        tsmsg->m_xact_rollback = 0;
        tsmsg->m_block_read_time = 0;
        tsmsg->m_block_write_time = 0;
+       tsmsg->m_session_time = 0;
+       tsmsg->m_active_time = 0;
+       tsmsg->m_idle_in_xact_time = 0;
    }
 
    n = tsmsg->m_nentries;
@@ -1378,49 +1408,6 @@ pgstat_drop_relation(Oid relid)
 }
 #endif                         /* NOT_USED */
 
-
-/* ----------
- * pgstat_send_connstats() -
- *
- * Tell the collector about session statistics.
- * The parameter "disconnect" will be true when the backend exits.
- * "last_report" is the last time we were called (0 if never).
- * ----------
- */
-static void
-pgstat_send_connstats(bool disconnect, TimestampTz last_report)
-{
-   PgStat_MsgConn msg;
-   long        secs;
-   int         usecs;
-
-   if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
-       return;
-
-   pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_CONNECTION);
-   msg.m_databaseid = MyDatabaseId;
-
-   /* session time since the last report */
-   TimestampDifference(((last_report == 0) ? MyStartTimestamp : last_report),
-                       GetCurrentTimestamp(),
-                       &secs, &usecs);
-   msg.m_session_time = secs * 1000000 + usecs;
-
-   msg.m_disconnect = disconnect ? pgStatSessionEndCause : DISCONNECT_NOT_YET;
-
-   msg.m_active_time = pgStatActiveTime;
-   pgStatActiveTime = 0;
-
-   msg.m_idle_in_xact_time = pgStatTransactionIdleTime;
-   pgStatTransactionIdleTime = 0;
-
-   /* report a new session only the first time */
-   msg.m_count = (last_report == 0) ? 1 : 0;
-
-   pgstat_send(&msg, sizeof(PgStat_MsgConn));
-}
-
-
 /* ----------
  * pgstat_reset_counters() -
  *
@@ -1759,6 +1746,63 @@ pgstat_report_tempfile(size_t filesize)
    pgstat_send(&msg, sizeof(msg));
 }
 
+/* --------
+ * pgstat_report_connect() -
+ *
+ * Tell the collector about a new connection.
+ * --------
+ */
+void
+pgstat_report_connect(Oid dboid)
+{
+   PgStat_MsgConnect msg;
+
+   if (!pgstat_should_report_connstat())
+       return;
+
+   pgLastSessionReportTime = MyStartTimestamp;
+
+   pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_CONNECT);
+   msg.m_databaseid = MyDatabaseId;
+   pgstat_send(&msg, sizeof(PgStat_MsgConnect));
+}
+
+/* --------
+ * pgstat_report_disconnect() -
+ *
+ * Tell the collector about a disconnect.
+ * --------
+ */
+static void
+pgstat_report_disconnect(Oid dboid)
+{
+   PgStat_MsgDisconnect msg;
+
+   if (!pgstat_should_report_connstat())
+       return;
+
+   pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_DISCONNECT);
+   msg.m_databaseid = MyDatabaseId;
+   msg.m_cause = pgStatSessionEndCause;
+   pgstat_send(&msg, sizeof(PgStat_MsgDisconnect));
+}
+
+/* --------
+ * pgstat_should_report_connstats() -
+ *
+ * We report session statistics only for normal backend processes.  Parallel
+ * workers run in parallel, so they don't contribute to session times, even
+ * though they use CPU time. Walsender processes could be considered here,
+ * but they have different session characteristics from normal backends (for
+ * example, they are always "active"), so they would skew session statistics.
+ * ----------
+ */
+static bool
+pgstat_should_report_connstat(void)
+{
+   return MyBackendType == B_BACKEND;
+}
+
 /* ----------
  * pgstat_report_replslot() -
  *
@@ -3465,8 +3509,12 @@ PgstatCollectorMain(int argc, char *argv[])
                    pgstat_recv_replslot(&msg.msg_replslot, len);
                    break;
 
-               case PGSTAT_MTYPE_CONNECTION:
-                   pgstat_recv_connstat(&msg.msg_conn, len);
+               case PGSTAT_MTYPE_CONNECT:
+                   pgstat_recv_connect(&msg.msg_connect, len);
+                   break;
+
+               case PGSTAT_MTYPE_DISCONNECT:
+                   pgstat_recv_disconnect(&msg.msg_disconnect, len);
                    break;
 
                default:
@@ -4904,6 +4952,10 @@ pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len)
    dbentry->n_block_read_time += msg->m_block_read_time;
    dbentry->n_block_write_time += msg->m_block_write_time;
 
+   dbentry->total_session_time += msg->m_session_time;
+   dbentry->total_active_time += msg->m_active_time;
+   dbentry->total_idle_in_xact_time += msg->m_idle_in_xact_time;
+
    /*
     * Process all table entries in the message.
     */
@@ -5568,23 +5620,34 @@ pgstat_recv_replslot(PgStat_MsgReplSlot *msg, int len)
 }
 
 /* ----------
- * pgstat_recv_connstat() -
+ * pgstat_recv_connect() -
  *
- *  Process connection information.
+ * Process a CONNECT message.
  * ----------
  */
 static void
-pgstat_recv_connstat(PgStat_MsgConn *msg, int len)
+pgstat_recv_connect(PgStat_MsgConnect *msg, int len)
 {
    PgStat_StatDBEntry *dbentry;
 
    dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
+   dbentry->n_sessions++;
+}
 
-   dbentry->n_sessions += msg->m_count;
-   dbentry->total_session_time += msg->m_session_time;
-   dbentry->total_active_time += msg->m_active_time;
-   dbentry->total_idle_in_xact_time += msg->m_idle_in_xact_time;
-   switch (msg->m_disconnect)
+/* ----------
+ * pgstat_recv_disconnect() -
+ *
+ * Process a DISCONNECT message.
+ * ----------
+ */
+static void
+pgstat_recv_disconnect(PgStat_MsgDisconnect *msg, int len)
+{
+   PgStat_StatDBEntry *dbentry;
+
+   dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
+
+   switch (msg->m_cause)
    {
        case DISCONNECT_NOT_YET:
        case DISCONNECT_NORMAL:
index 82d22e4eed76754f1bb7beca7152a367c08bd2a5..489e27f99aef6c511d718c17289b2612ab01bfb1 100644 (file)
@@ -4116,6 +4116,8 @@ PostgresMain(int argc, char *argv[],
    if (IsUnderPostmaster && Log_disconnections)
        on_proc_exit(log_disconnections, 0);
 
+   pgstat_report_connect(MyDatabaseId);
+
    /* Perform initialization specific to a WAL sender process. */
    if (am_walsender)
        InitWalSender();
index e19c4506efa4851e6104fe0d272d3d7aa39ff206..72295988226389b7eb671a5d47b6d628e779cb64 100644 (file)
@@ -590,9 +590,9 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
 
        if (beentry->st_state == STATE_RUNNING ||
            beentry->st_state == STATE_FASTPATH)
-           pgstat_count_conn_active_time(secs * 1000000 + usecs);
+           pgstat_count_conn_active_time((PgStat_Counter) secs * 1000000 + usecs);
        else
-           pgstat_count_conn_txn_idle_time(secs * 1000000 + usecs);
+           pgstat_count_conn_txn_idle_time((PgStat_Counter) secs * 1000000 + usecs);
    }
 
    /*
index 509849c7ff4ae9df91f98089d6cfc4a2cea492d9..a7b386821f6d603038383354ac715032bf9147e6 100644 (file)
@@ -81,7 +81,8 @@ typedef enum StatMsgType
    PGSTAT_MTYPE_DEADLOCK,
    PGSTAT_MTYPE_CHECKSUMFAILURE,
    PGSTAT_MTYPE_REPLSLOT,
-   PGSTAT_MTYPE_CONNECTION,
+   PGSTAT_MTYPE_CONNECT,
+   PGSTAT_MTYPE_DISCONNECT,
 } StatMsgType;
 
 /* ----------
@@ -279,7 +280,7 @@ typedef struct PgStat_TableEntry
  * ----------
  */
 #define PGSTAT_NUM_TABENTRIES  \
-   ((PGSTAT_MSG_PAYLOAD - sizeof(Oid) - 3 * sizeof(int) - 2 * sizeof(PgStat_Counter))  \
+   ((PGSTAT_MSG_PAYLOAD - sizeof(Oid) - 3 * sizeof(int) - 5 * sizeof(PgStat_Counter)) \
     / sizeof(PgStat_TableEntry))
 
 typedef struct PgStat_MsgTabstat
@@ -291,6 +292,9 @@ typedef struct PgStat_MsgTabstat
    int         m_xact_rollback;
    PgStat_Counter m_block_read_time;   /* times in microseconds */
    PgStat_Counter m_block_write_time;
+   PgStat_Counter m_session_time;
+   PgStat_Counter m_active_time;
+   PgStat_Counter m_idle_in_xact_time;
    PgStat_TableEntry m_entry[PGSTAT_NUM_TABENTRIES];
 } PgStat_MsgTabstat;
 
@@ -653,20 +657,26 @@ typedef struct PgStat_MsgChecksumFailure
 } PgStat_MsgChecksumFailure;
 
 /* ----------
- * PgStat_MsgConn          Sent by the backend to update connection statistics.
+ * PgStat_MsgConnect           Sent by the backend upon connection
+ *                             establishment
  * ----------
  */
-typedef struct PgStat_MsgConn
+typedef struct PgStat_MsgConnect
 {
    PgStat_MsgHdr m_hdr;
    Oid         m_databaseid;
-   PgStat_Counter m_count;
-   PgStat_Counter m_session_time;
-   PgStat_Counter m_active_time;
-   PgStat_Counter m_idle_in_xact_time;
-   SessionEndType m_disconnect;
-} PgStat_MsgConn;
+} PgStat_MsgConnect;
 
+/* ----------
+ * PgStat_MsgDisconnect            Sent by the backend when disconnecting
+ * ----------
+ */
+typedef struct PgStat_MsgDisconnect
+{
+   PgStat_MsgHdr m_hdr;
+   Oid         m_databaseid;
+   SessionEndType m_cause;
+} PgStat_MsgDisconnect;
 
 /* ----------
  * PgStat_Msg                  Union over all possible messages.
@@ -700,7 +710,8 @@ typedef union PgStat_Msg
    PgStat_MsgTempFile msg_tempfile;
    PgStat_MsgChecksumFailure msg_checksumfailure;
    PgStat_MsgReplSlot msg_replslot;
-   PgStat_MsgConn msg_conn;
+   PgStat_MsgConnect msg_connect;
+   PgStat_MsgDisconnect msg_disconnect;
 } PgStat_Msg;
 
 
@@ -1010,6 +1021,7 @@ extern void pgstat_reset_single_counter(Oid objectid, PgStat_Single_Reset_Type t
 extern void pgstat_reset_slru_counter(const char *);
 extern void pgstat_reset_replslot_counter(const char *name);
 
+extern void pgstat_report_connect(Oid dboid);
 extern void pgstat_report_autovac(Oid dboid);
 extern void pgstat_report_vacuum(Oid tableoid, bool shared,
                                 PgStat_Counter livetuples, PgStat_Counter deadtuples);
index 423780652fbef5f05e0cd3a4a6e36b28118c9b1b..402a6617a9850ba33deea2202a6578da54f5f01d 100644 (file)
@@ -1923,8 +1923,9 @@ PgStat_MsgArchiver
 PgStat_MsgAutovacStart
 PgStat_MsgBgWriter
 PgStat_MsgChecksumFailure
-PgStat_MsgConn
+PgStat_MsgConnect
 PgStat_MsgDeadlock
+PgStat_MsgDisconnect
 PgStat_MsgDropdb
 PgStat_MsgDummy
 PgStat_MsgFuncpurge