Refactor parse_filename_for_nontemp_relation to parse more.
authorRobert Haas <rhaas@postgresql.org>
Mon, 23 Oct 2023 19:08:53 +0000 (15:08 -0400)
committerRobert Haas <rhaas@postgresql.org>
Mon, 23 Oct 2023 19:08:53 +0000 (15:08 -0400)
Instead of returning the number of characters in the RelFileNumber,
return the RelFileNumber itself. Continue to return the fork number,
as before, and additionally return the segment number.

parse_filename_for_nontemp_relation now rejects a RelFileNumber or
segment number that begins with a leading zero. Before, we accepted
such cases as relation filenames, but if we continued to do so after
this change, the function might return the same values for two
different files (e.g. 1234.5 and 001234.5 or 1234.005) which could be
annoying for callers. Since we don't actually ever generate filenames
with leading zeroes in the names, any such files that we find must
have been created by something other than PostgreSQL, and it is
therefore reasonable to treat them as non-relation files.

Along the way, change unlogged_relation_entry to store a RelFileNumber
rather than an OID. This update should have been made in
851f4cc75cdd8c831f1baa9a7abf8c8248b65890, but it was overlooked.
It's trivial to make the update as part of this commit, perhaps more
trivial than it would have been without it, so do that.

Patch by me, reviewed by David Steele.

Discussion: http://postgr.es/m/CA+TgmoZNVeBzoqDL8xvr-nkaepq815jtDR4nJzPew7=3iEuM1g@mail.gmail.com

src/backend/backup/basebackup.c
src/backend/storage/file/reinit.c
src/include/storage/reinit.h

index 7d025bcf3822d5752f1171bcce511efefdf84bc9..b126d9c8907315bf6a9a5325f3cfebc05808f866 100644 (file)
@@ -1197,9 +1197,9 @@ sendDir(bbsink *sink, const char *path, int basepathlen, bool sizeonly,
        {
                int                     excludeIdx;
                bool            excludeFound;
-               ForkNumber      relForkNum; /* Type of fork if file is a relation */
-               int                     relnumchars;    /* Chars in filename that are the
-                                                                        * relnumber */
+               RelFileNumber relNumber;
+               ForkNumber      relForkNum;
+               unsigned        segno;
 
                /* Skip special stuff */
                if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
@@ -1249,23 +1249,20 @@ sendDir(bbsink *sink, const char *path, int basepathlen, bool sizeonly,
 
                /* Exclude all forks for unlogged tables except the init fork */
                if (isDbDir &&
-                       parse_filename_for_nontemp_relation(de->d_name, &relnumchars,
-                                                                                               &relForkNum))
+                       parse_filename_for_nontemp_relation(de->d_name, &relNumber,
+                                                                                               &relForkNum, &segno))
                {
                        /* Never exclude init forks */
                        if (relForkNum != INIT_FORKNUM)
                        {
                                char            initForkFile[MAXPGPATH];
-                               char            relNumber[OIDCHARS + 1];
 
                                /*
                                 * If any other type of fork, check if there is an init fork
                                 * with the same RelFileNumber. If so, the file can be
                                 * excluded.
                                 */
-                               memcpy(relNumber, de->d_name, relnumchars);
-                               relNumber[relnumchars] = '\0';
-                               snprintf(initForkFile, sizeof(initForkFile), "%s/%s_init",
+                               snprintf(initForkFile, sizeof(initForkFile), "%s/%u_init",
                                                 path, relNumber);
 
                                if (lstat(initForkFile, &statbuf) == 0)
index fb55371b1b51d35332d5975159768398b1f7adab..5df2517b46b6c2a12fdd0d779db514618a61fa34 100644 (file)
@@ -31,7 +31,7 @@ static void ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname,
 
 typedef struct
 {
-       Oid                     reloid;                 /* hash key */
+       RelFileNumber relnumber;        /* hash key */
 } unlogged_relation_entry;
 
 /*
@@ -195,12 +195,13 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
                while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
                {
                        ForkNumber      forkNum;
-                       int                     relnumchars;
+                       unsigned        segno;
                        unlogged_relation_entry ent;
 
                        /* Skip anything that doesn't look like a relation data file. */
-                       if (!parse_filename_for_nontemp_relation(de->d_name, &relnumchars,
-                                                                                                        &forkNum))
+                       if (!parse_filename_for_nontemp_relation(de->d_name,
+                                                                                                        &ent.relnumber,
+                                                                                                        &forkNum, &segno))
                                continue;
 
                        /* Also skip it unless this is the init fork. */
@@ -208,10 +209,8 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
                                continue;
 
                        /*
-                        * Put the OID portion of the name into the hash table, if it
-                        * isn't already.
+                        * Put the RelFileNumber into the hash table, if it isn't already.
                         */
-                       ent.reloid = atooid(de->d_name);
                        (void) hash_search(hash, &ent, HASH_ENTER, NULL);
                }
 
@@ -235,12 +234,13 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
                while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
                {
                        ForkNumber      forkNum;
-                       int                     relnumchars;
+                       unsigned        segno;
                        unlogged_relation_entry ent;
 
                        /* Skip anything that doesn't look like a relation data file. */
-                       if (!parse_filename_for_nontemp_relation(de->d_name, &relnumchars,
-                                                                                                        &forkNum))
+                       if (!parse_filename_for_nontemp_relation(de->d_name,
+                                                                                                        &ent.relnumber,
+                                                                                                        &forkNum, &segno))
                                continue;
 
                        /* We never remove the init fork. */
@@ -251,7 +251,6 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
                         * See whether the OID portion of the name shows up in the hash
                         * table.  If so, nuke it!
                         */
-                       ent.reloid = atooid(de->d_name);
                        if (hash_search(hash, &ent, HASH_FIND, NULL))
                        {
                                snprintf(rm_path, sizeof(rm_path), "%s/%s",
@@ -285,14 +284,14 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
                while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
                {
                        ForkNumber      forkNum;
-                       int                     relnumchars;
-                       char            relnumbuf[OIDCHARS + 1];
+                       RelFileNumber relNumber;
+                       unsigned        segno;
                        char            srcpath[MAXPGPATH * 2];
                        char            dstpath[MAXPGPATH];
 
                        /* Skip anything that doesn't look like a relation data file. */
-                       if (!parse_filename_for_nontemp_relation(de->d_name, &relnumchars,
-                                                                                                        &forkNum))
+                       if (!parse_filename_for_nontemp_relation(de->d_name, &relNumber,
+                                                                                                        &forkNum, &segno))
                                continue;
 
                        /* Also skip it unless this is the init fork. */
@@ -304,11 +303,12 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
                                         dbspacedirname, de->d_name);
 
                        /* Construct destination pathname. */
-                       memcpy(relnumbuf, de->d_name, relnumchars);
-                       relnumbuf[relnumchars] = '\0';
-                       snprintf(dstpath, sizeof(dstpath), "%s/%s%s",
-                                        dbspacedirname, relnumbuf, de->d_name + relnumchars + 1 +
-                                        strlen(forkNames[INIT_FORKNUM]));
+                       if (segno == 0)
+                               snprintf(dstpath, sizeof(dstpath), "%s/%u",
+                                                dbspacedirname, relNumber);
+                       else
+                               snprintf(dstpath, sizeof(dstpath), "%s/%u.%u",
+                                                dbspacedirname, relNumber, segno);
 
                        /* OK, we're ready to perform the actual copy. */
                        elog(DEBUG2, "copying %s to %s", srcpath, dstpath);
@@ -327,14 +327,14 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
                dbspace_dir = AllocateDir(dbspacedirname);
                while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
                {
+                       RelFileNumber relNumber;
                        ForkNumber      forkNum;
-                       int                     relnumchars;
-                       char            relnumbuf[OIDCHARS + 1];
+                       unsigned        segno;
                        char            mainpath[MAXPGPATH];
 
                        /* Skip anything that doesn't look like a relation data file. */
-                       if (!parse_filename_for_nontemp_relation(de->d_name, &relnumchars,
-                                                                                                        &forkNum))
+                       if (!parse_filename_for_nontemp_relation(de->d_name, &relNumber,
+                                                                                                        &forkNum, &segno))
                                continue;
 
                        /* Also skip it unless this is the init fork. */
@@ -342,11 +342,12 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
                                continue;
 
                        /* Construct main fork pathname. */
-                       memcpy(relnumbuf, de->d_name, relnumchars);
-                       relnumbuf[relnumchars] = '\0';
-                       snprintf(mainpath, sizeof(mainpath), "%s/%s%s",
-                                        dbspacedirname, relnumbuf, de->d_name + relnumchars + 1 +
-                                        strlen(forkNames[INIT_FORKNUM]));
+                       if (segno == 0)
+                               snprintf(mainpath, sizeof(mainpath), "%s/%u",
+                                                dbspacedirname, relNumber);
+                       else
+                               snprintf(mainpath, sizeof(mainpath), "%s/%u.%u",
+                                                dbspacedirname, relNumber, segno);
 
                        fsync_fname(mainpath, false);
                }
@@ -371,52 +372,82 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
  * This function returns true if the file appears to be in the correct format
  * for a non-temporary relation and false otherwise.
  *
- * NB: If this function returns true, the caller is entitled to assume that
- * *relnumchars has been set to a value no more than OIDCHARS, and thus
- * that a buffer of OIDCHARS+1 characters is sufficient to hold the
- * RelFileNumber portion of the filename.  This is critical to protect against
- * a possible buffer overrun.
+ * If it returns true, it sets *relnumber, *fork, and *segno to the values
+ * extracted from the filename. If it returns false, these values are set to
+ * InvalidRelFileNumber, InvalidForkNumber, and 0, respectively.
  */
 bool
-parse_filename_for_nontemp_relation(const char *name, int *relnumchars,
-                                                                       ForkNumber *fork)
+parse_filename_for_nontemp_relation(const char *name, RelFileNumber *relnumber,
+                                                                       ForkNumber *fork, unsigned *segno)
 {
-       int                     pos;
+       unsigned long n,
+                               s;
+       ForkNumber      f;
+       char       *endp;
 
-       /* Look for a non-empty string of digits (that isn't too long). */
-       for (pos = 0; isdigit((unsigned char) name[pos]); ++pos)
-               ;
-       if (pos == 0 || pos > OIDCHARS)
+       *relnumber = InvalidRelFileNumber;
+       *fork = InvalidForkNumber;
+       *segno = 0;
+
+       /*
+        * Relation filenames should begin with a digit that is not a zero. By
+        * rejecting cases involving leading zeroes, the caller can assume that
+        * there's only one possible string of characters that could have produced
+        * any given value for *relnumber.
+        *
+        * (To be clear, we don't expect files with names like 0017.3 to exist at
+        * all -- but if 0017.3 does exist, it's a non-relation file, not part of
+        * the main fork for relfilenode 17.)
+        */
+       if (name[0] < '1' || name[0] > '9')
+               return false;
+
+       /*
+        * Parse the leading digit string. If the value is out of range, we
+        * conclude that this isn't a relation file at all.
+        */
+       errno = 0;
+       n = strtoul(name, &endp, 10);
+       if (errno || name == endp || n <= 0 || n > PG_UINT32_MAX)
                return false;
-       *relnumchars = pos;
+       name = endp;
 
        /* Check for a fork name. */
-       if (name[pos] != '_')
-               *fork = MAIN_FORKNUM;
+       if (*name != '_')
+               f = MAIN_FORKNUM;
        else
        {
                int                     forkchar;
 
-               forkchar = forkname_chars(&name[pos + 1], fork);
+               forkchar = forkname_chars(name + 1, &f);
                if (forkchar <= 0)
                        return false;
-               pos += forkchar + 1;
+               name += forkchar + 1;
        }
 
        /* Check for a segment number. */
-       if (name[pos] == '.')
+       if (*name != '.')
+               s = 0;
+       else
        {
-               int                     segchar;
+               /* Reject leading zeroes, just like we do for RelFileNumber. */
+               if (name[0] < '1' || name[0] > '9')
+                       return false;
 
-               for (segchar = 1; isdigit((unsigned char) name[pos + segchar]); ++segchar)
-                       ;
-               if (segchar <= 1)
+               errno = 0;
+               s = strtoul(name + 1, &endp, 10);
+               if (errno || name + 1 == endp || s <= 0 || s > PG_UINT32_MAX)
                        return false;
-               pos += segchar;
+               name = endp;
        }
 
        /* Now we should be at the end. */
-       if (name[pos] != '\0')
+       if (*name != '\0')
                return false;
+
+       /* Set out parameters and return. */
+       *relnumber = (RelFileNumber) n;
+       *fork = f;
+       *segno = (unsigned) s;
        return true;
 }
index e2bbb5abe9f3312bf2f59a0903773b4a777b575f..f8eb7ce23404b7540d115746b270a547eba761df 100644 (file)
@@ -20,8 +20,9 @@
 
 extern void ResetUnloggedRelations(int op);
 extern bool parse_filename_for_nontemp_relation(const char *name,
-                                                                                               int *relnumchars,
-                                                                                               ForkNumber *fork);
+                                                                                               RelFileNumber *relnumber,
+                                                                                               ForkNumber *fork,
+                                                                                               unsigned *segno);
 
 #define UNLOGGED_RELATION_CLEANUP              0x0001
 #define UNLOGGED_RELATION_INIT                 0x0002