pg_stat_statements: Widen query IDs from 32 bits to 64 bits.
authorRobert Haas <rhaas@postgresql.org>
Wed, 11 Oct 2017 23:52:46 +0000 (19:52 -0400)
committerRobert Haas <rhaas@postgresql.org>
Wed, 11 Oct 2017 23:52:46 +0000 (19:52 -0400)
This takes advantage of the infrastructure introduced by commit
81c5e46c490e2426db243eada186995da5bb0ba7 to greatly reduce the
likelihood that two different queries will end up with the same query
ID.  It's still possible, of course, but whereas before it the chances
of a collision reached 25% around 50,000 queries, it will now take
more than 3 billion queries.

Backward incompatibility: Because the type exposed at the SQL level is
int8, users may now see negative query IDs in the pg_stat_statements
view (and also, query IDs more than 4 billion, which was the old
limit).

Patch by me, reviewed by Michael Paquier and Peter Geoghegan.

Discussion: http://postgr.es/m/CA+TgmobG_Kp4cBKFmsznUAaM1GWW6hhRNiZC0KjRMOOeYnz5Yw@mail.gmail.com

contrib/pg_stat_statements/pg_stat_statements.c
src/backend/executor/execParallel.c
src/backend/nodes/outfuncs.c
src/backend/nodes/readfuncs.c
src/backend/rewrite/rewriteHandler.c
src/include/nodes/parsenodes.h
src/include/nodes/plannodes.h

index a0e7a46871dcfa6f98d4e026f4cd1d77d4b0ed0c..b04b4d6ce11a798667a3d4c2ceb08139a5682695 100644 (file)
@@ -21,7 +21,7 @@
  * as the collations of Vars and, most notably, the values of constants.
  *
  * This jumble is acquired at the end of parse analysis of each query, and
- * a 32-bit hash of it is stored into the query's Query.queryId field.
+ * a 64-bit hash of it is stored into the query's Query.queryId field.
  * The server then copies this value around, making it available in plan
  * tree(s) generated from the query.  The executor can then use this value
  * to blame query costs on the proper queryId.
@@ -95,7 +95,7 @@ PG_MODULE_MAGIC;
 #define PGSS_TEXT_FILE PG_STAT_TMP_DIR "/pgss_query_texts.stat"
 
 /* Magic number identifying the stats file format */
-static const uint32 PGSS_FILE_HEADER = 0x20140125;
+static const uint32 PGSS_FILE_HEADER = 0x20171004;
 
 /* PostgreSQL major version number, changes in which invalidate all entries */
 static const uint32 PGSS_PG_MAJOR_VERSION = PG_VERSION_NUM / 100;
@@ -130,7 +130,7 @@ typedef struct pgssHashKey
 {
        Oid                     userid;                 /* user OID */
        Oid                     dbid;                   /* database OID */
-       uint32          queryid;                /* query identifier */
+       uint64          queryid;                /* query identifier */
 } pgssHashKey;
 
 /*
@@ -301,10 +301,8 @@ static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
                                        ProcessUtilityContext context, ParamListInfo params,
                                        QueryEnvironment *queryEnv,
                                        DestReceiver *dest, char *completionTag);
-static uint32 pgss_hash_fn(const void *key, Size keysize);
-static int     pgss_match_fn(const void *key1, const void *key2, Size keysize);
-static uint32 pgss_hash_string(const char *str, int len);
-static void pgss_store(const char *query, uint32 queryId,
+static uint64 pgss_hash_string(const char *str, int len);
+static void pgss_store(const char *query, uint64 queryId,
                   int query_location, int query_len,
                   double total_time, uint64 rows,
                   const BufferUsage *bufusage,
@@ -500,12 +498,10 @@ pgss_shmem_startup(void)
        memset(&info, 0, sizeof(info));
        info.keysize = sizeof(pgssHashKey);
        info.entrysize = sizeof(pgssEntry);
-       info.hash = pgss_hash_fn;
-       info.match = pgss_match_fn;
        pgss_hash = ShmemInitHash("pg_stat_statements hash",
                                                          pgss_max, pgss_max,
                                                          &info,
-                                                         HASH_ELEM | HASH_FUNCTION | HASH_COMPARE);
+                                                         HASH_ELEM | HASH_BLOBS);
 
        LWLockRelease(AddinShmemInitLock);
 
@@ -781,7 +777,7 @@ pgss_post_parse_analyze(ParseState *pstate, Query *query)
                prev_post_parse_analyze_hook(pstate, query);
 
        /* Assert we didn't do this already */
-       Assert(query->queryId == 0);
+       Assert(query->queryId == UINT64CONST(0));
 
        /* Safety check... */
        if (!pgss || !pgss_hash)
@@ -797,7 +793,7 @@ pgss_post_parse_analyze(ParseState *pstate, Query *query)
         */
        if (query->utilityStmt)
        {
-               query->queryId = 0;
+               query->queryId = UINT64CONST(0);
                return;
        }
 
@@ -812,14 +808,15 @@ pgss_post_parse_analyze(ParseState *pstate, Query *query)
 
        /* Compute query ID and mark the Query node with it */
        JumbleQuery(&jstate, query);
-       query->queryId = hash_any(jstate.jumble, jstate.jumble_len);
+       query->queryId =
+               DatumGetUInt64(hash_any_extended(jstate.jumble, jstate.jumble_len, 0));
 
        /*
         * If we are unlucky enough to get a hash of zero, use 1 instead, to
         * prevent confusion with the utility-statement case.
         */
-       if (query->queryId == 0)
-               query->queryId = 1;
+       if (query->queryId == UINT64CONST(0))
+               query->queryId = UINT64CONST(1);
 
        /*
         * If we were able to identify any ignorable constants, we immediately
@@ -855,7 +852,7 @@ pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
         * counting of optimizable statements that are directly contained in
         * utility statements.
         */
-       if (pgss_enabled() && queryDesc->plannedstmt->queryId != 0)
+       if (pgss_enabled() && queryDesc->plannedstmt->queryId != UINT64CONST(0))
        {
                /*
                 * Set up to track total elapsed time in ExecutorRun.  Make sure the
@@ -926,9 +923,9 @@ pgss_ExecutorFinish(QueryDesc *queryDesc)
 static void
 pgss_ExecutorEnd(QueryDesc *queryDesc)
 {
-       uint32          queryId = queryDesc->plannedstmt->queryId;
+       uint64          queryId = queryDesc->plannedstmt->queryId;
 
-       if (queryId != 0 && queryDesc->totaltime && pgss_enabled())
+       if (queryId != UINT64CONST(0) && queryDesc->totaltime && pgss_enabled())
        {
                /*
                 * Make sure stats accumulation is done.  (Note: it's okay if several
@@ -1069,45 +1066,16 @@ pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
        }
 }
 
-/*
- * Calculate hash value for a key
- */
-static uint32
-pgss_hash_fn(const void *key, Size keysize)
-{
-       const pgssHashKey *k = (const pgssHashKey *) key;
-
-       return hash_uint32((uint32) k->userid) ^
-               hash_uint32((uint32) k->dbid) ^
-               hash_uint32((uint32) k->queryid);
-}
-
-/*
- * Compare two keys - zero means match
- */
-static int
-pgss_match_fn(const void *key1, const void *key2, Size keysize)
-{
-       const pgssHashKey *k1 = (const pgssHashKey *) key1;
-       const pgssHashKey *k2 = (const pgssHashKey *) key2;
-
-       if (k1->userid == k2->userid &&
-               k1->dbid == k2->dbid &&
-               k1->queryid == k2->queryid)
-               return 0;
-       else
-               return 1;
-}
-
 /*
  * Given an arbitrarily long query string, produce a hash for the purposes of
  * identifying the query, without normalizing constants.  Used when hashing
  * utility statements.
  */
-static uint32
+static uint64
 pgss_hash_string(const char *str, int len)
 {
-       return hash_any((const unsigned char *) str, len);
+       return DatumGetUInt64(hash_any_extended((const unsigned char *) str,
+                                                                                       len, 0));
 }
 
 /*
@@ -1121,7 +1089,7 @@ pgss_hash_string(const char *str, int len)
  * query string.  total_time, rows, bufusage are ignored in this case.
  */
 static void
-pgss_store(const char *query, uint32 queryId,
+pgss_store(const char *query, uint64 queryId,
                   int query_location, int query_len,
                   double total_time, uint64 rows,
                   const BufferUsage *bufusage,
@@ -1173,7 +1141,7 @@ pgss_store(const char *query, uint32 queryId,
        /*
         * For utility statements, we just hash the query string to get an ID.
         */
-       if (queryId == 0)
+       if (queryId == UINT64CONST(0))
                queryId = pgss_hash_string(query, query_len);
 
        /* Set up key for hashtable search */
@@ -2324,8 +2292,10 @@ AppendJumble(pgssJumbleState *jstate, const unsigned char *item, Size size)
 
                if (jumble_len >= JUMBLE_SIZE)
                {
-                       uint32          start_hash = hash_any(jumble, JUMBLE_SIZE);
+                       uint64          start_hash;
 
+                       start_hash = DatumGetUInt64(hash_any_extended(jumble,
+                                                                                                                 JUMBLE_SIZE, 0));
                        memcpy(jumble, &start_hash, sizeof(start_hash));
                        jumble_len = sizeof(start_hash);
                }
index 5dc26ed17ab64223b20a317e31b973a3db3ff563..1b477baecb89e1b891ffca4287f94596292b135e 100644 (file)
@@ -162,7 +162,7 @@ ExecSerializePlan(Plan *plan, EState *estate)
         */
        pstmt = makeNode(PlannedStmt);
        pstmt->commandType = CMD_SELECT;
-       pstmt->queryId = 0;
+       pstmt->queryId = UINT64CONST(0);
        pstmt->hasReturning = false;
        pstmt->hasModifyingCTE = false;
        pstmt->canSetTag = true;
index 2532edc94a2b260949be08e333f5317e5251717f..43d62062bc03c7525d815502bd3be3bf06650884 100644 (file)
@@ -54,6 +54,11 @@ static void outChar(StringInfo str, char c);
 #define WRITE_UINT_FIELD(fldname) \
        appendStringInfo(str, " :" CppAsString(fldname) " %u", node->fldname)
 
+/* Write an unsigned integer field (anything written with UINT64_FORMAT) */
+#define WRITE_UINT64_FIELD(fldname) \
+       appendStringInfo(str, " :" CppAsString(fldname) " " UINT64_FORMAT, \
+                                        node->fldname)
+
 /* Write an OID field (don't hard-wire assumption that OID is same as uint) */
 #define WRITE_OID_FIELD(fldname) \
        appendStringInfo(str, " :" CppAsString(fldname) " %u", node->fldname)
@@ -260,7 +265,7 @@ _outPlannedStmt(StringInfo str, const PlannedStmt *node)
        WRITE_NODE_TYPE("PLANNEDSTMT");
 
        WRITE_ENUM_FIELD(commandType, CmdType);
-       WRITE_UINT_FIELD(queryId);
+       WRITE_UINT64_FIELD(queryId);
        WRITE_BOOL_FIELD(hasReturning);
        WRITE_BOOL_FIELD(hasModifyingCTE);
        WRITE_BOOL_FIELD(canSetTag);
index 07ba69178c84d30e49c41d9b9901acbe91c7d6ee..ccb6a1f4acbb7edde23593ebcb2b3d41fbed19cb 100644 (file)
@@ -33,6 +33,7 @@
 #include "nodes/parsenodes.h"
 #include "nodes/plannodes.h"
 #include "nodes/readfuncs.h"
+#include "utils/builtins.h"
 
 
 /*
        token = pg_strtok(&length);             /* get field value */ \
        local_node->fldname = atoui(token)
 
+/* Read an unsigned integer field (anything written using UINT64_FORMAT) */
+#define READ_UINT64_FIELD(fldname) \
+       token = pg_strtok(&length);             /* skip :fldname */ \
+       token = pg_strtok(&length);             /* get field value */ \
+       local_node->fldname = pg_strtouint64(token, NULL, 10)
+
 /* Read an long integer field (anything written as ":fldname %ld") */
 #define READ_LONG_FIELD(fldname) \
        token = pg_strtok(&length);             /* skip :fldname */ \
@@ -231,7 +238,7 @@ _readQuery(void)
 
        READ_ENUM_FIELD(commandType, CmdType);
        READ_ENUM_FIELD(querySource, QuerySource);
-       local_node->queryId = 0;        /* not saved in output format */
+       local_node->queryId = UINT64CONST(0);   /* not saved in output format */
        READ_BOOL_FIELD(canSetTag);
        READ_NODE_FIELD(utilityStmt);
        READ_INT_FIELD(resultRelation);
@@ -1456,7 +1463,7 @@ _readPlannedStmt(void)
        READ_LOCALS(PlannedStmt);
 
        READ_ENUM_FIELD(commandType, CmdType);
-       READ_UINT_FIELD(queryId);
+       READ_UINT64_FIELD(queryId);
        READ_BOOL_FIELD(hasReturning);
        READ_BOOL_FIELD(hasModifyingCTE);
        READ_BOOL_FIELD(canSetTag);
index 7054d4f77d994fc48fda704d97cb062b048ff88d..7a61af79059ea9bab5b08b8f0c8aae099c07062f 100644 (file)
@@ -3575,7 +3575,7 @@ RewriteQuery(Query *parsetree, List *rewrite_events)
 List *
 QueryRewrite(Query *parsetree)
 {
-       uint32          input_query_id = parsetree->queryId;
+       uint64          input_query_id = parsetree->queryId;
        List       *querylist;
        List       *results;
        ListCell   *l;
index 50eec730b3da4ca68e2e91f015d47b3303090719..732e5d6788334dd9606d1c16e2533d54dc444ac6 100644 (file)
@@ -111,7 +111,7 @@ typedef struct Query
 
        QuerySource querySource;        /* where did I come from? */
 
-       uint32          queryId;                /* query identifier (can be set by plugins) */
+       uint64          queryId;                /* query identifier (can be set by plugins) */
 
        bool            canSetTag;              /* do I set the command result tag? */
 
index a382331f41976152cec558a03f991a901dc1dbf7..dd74efa9a4138330c2e10785ba9526b0e43b35c7 100644 (file)
@@ -44,7 +44,7 @@ typedef struct PlannedStmt
 
        CmdType         commandType;    /* select|insert|update|delete|utility */
 
-       uint32          queryId;                /* query identifier (copied from Query) */
+       uint64          queryId;                /* query identifier (copied from Query) */
 
        bool            hasReturning;   /* is it insert|update|delete RETURNING? */