Skip unnecessary stat() calls in walkdir().
authorThomas Munro <tmunro@postgresql.org>
Mon, 7 Sep 2020 06:11:46 +0000 (18:11 +1200)
committerThomas Munro <tmunro@postgresql.org>
Mon, 7 Sep 2020 06:28:06 +0000 (18:28 +1200)
Some kernels can tell us the type of a "dirent", so we can avoid a call
to stat() or lstat() in many cases.  Define a new function
get_dirent_type() to contain that logic, for use by the backend and
frontend versions of walkdir(), and perhaps other callers in future.

Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Reviewed-by: Juan José Santamaría Flecha <juanjo.santamaria@gmail.com>
Discussion: https://postgr.es/m/CA%2BhUKG%2BFzxupGGN4GpUdbzZN%2Btn6FQPHo8w0Q%2BAPH5Wz8RG%2Bww%40mail.gmail.com

src/backend/storage/file/fd.c
src/common/Makefile
src/common/file_utils.c
src/include/common/file_utils.h
src/tools/msvc/Mkvcbuild.pm
src/tools/pgindent/typedefs.list

index f376a97ed6771b10e5ad0bfa6cae7c69a9f1c16b..bd72a87ee30713d082d3c6c85b40d940c4bac903 100644 (file)
@@ -89,6 +89,7 @@
 #include "access/xlog.h"
 #include "catalog/pg_tablespace.h"
 #include "common/file_perm.h"
+#include "common/file_utils.h"
 #include "miscadmin.h"
 #include "pgstat.h"
 #include "portability/mem.h"
@@ -3340,8 +3341,6 @@ walkdir(const char *path,
    while ((de = ReadDirExtended(dir, path, elevel)) != NULL)
    {
        char        subpath[MAXPGPATH * 2];
-       struct stat fst;
-       int         sret;
 
        CHECK_FOR_INTERRUPTS();
 
@@ -3351,23 +3350,23 @@ walkdir(const char *path,
 
        snprintf(subpath, sizeof(subpath), "%s/%s", path, de->d_name);
 
-       if (process_symlinks)
-           sret = stat(subpath, &fst);
-       else
-           sret = lstat(subpath, &fst);
-
-       if (sret < 0)
+       switch (get_dirent_type(subpath, de, process_symlinks, elevel))
        {
-           ereport(elevel,
-                   (errcode_for_file_access(),
-                    errmsg("could not stat file \"%s\": %m", subpath)));
-           continue;
-       }
+           case PGFILETYPE_REG:
+               (*action) (subpath, false, elevel);
+               break;
+           case PGFILETYPE_DIR:
+               walkdir(subpath, action, false, elevel);
+               break;
+           default:
 
-       if (S_ISREG(fst.st_mode))
-           (*action) (subpath, false, elevel);
-       else if (S_ISDIR(fst.st_mode))
-           walkdir(subpath, action, false, elevel);
+               /*
+                * Errors are already reported directly by get_dirent_type(),
+                * and any remaining symlinks and unknown file types are
+                * ignored.
+                */
+               break;
+       }
    }
 
    FreeDir(dir);               /* we ignore any error here */
index ad8fd9e41c47adc820c3b3c92dfffbdbba1ed909..f2817628851ea012d70e3df4faaa790b9c372235 100644 (file)
@@ -56,6 +56,7 @@ OBJS_COMMON = \
    exec.o \
    f2s.o \
    file_perm.o \
+   file_utils.o \
    hashfn.o \
    ip.o \
    jsonapi.o \
@@ -91,7 +92,6 @@ endif
 OBJS_FRONTEND = \
    $(OBJS_COMMON) \
    fe_memutils.o \
-   file_utils.o \
    logging.o \
    restricted_token.o \
    sprompt.o
index a2faafdf13a35ff337f24acd17bda75e8545a742..12474730905ca2845daad65ca9c88589c9c01422 100644 (file)
  */
 
 #ifndef FRONTEND
-#error "This file is not expected to be compiled for backend code"
-#endif
-
+#include "postgres.h"
+#else
 #include "postgres_fe.h"
+#endif
 
 #include <dirent.h>
 #include <fcntl.h>
 #include <unistd.h>
 
 #include "common/file_utils.h"
+#ifdef FRONTEND
 #include "common/logging.h"
+#endif
 
+#ifdef FRONTEND
 
 /* Define PG_FLUSH_DATA_WORKS if we have an implementation for pg_flush_data */
 #if defined(HAVE_SYNC_FILE_RANGE)
@@ -167,8 +170,6 @@ walkdir(const char *path,
    while (errno = 0, (de = readdir(dir)) != NULL)
    {
        char        subpath[MAXPGPATH * 2];
-       struct stat fst;
-       int         sret;
 
        if (strcmp(de->d_name, ".") == 0 ||
            strcmp(de->d_name, "..") == 0)
@@ -176,21 +177,23 @@ walkdir(const char *path,
 
        snprintf(subpath, sizeof(subpath), "%s/%s", path, de->d_name);
 
-       if (process_symlinks)
-           sret = stat(subpath, &fst);
-       else
-           sret = lstat(subpath, &fst);
-
-       if (sret < 0)
+       switch (get_dirent_type(subpath, de, process_symlinks, PG_LOG_ERROR))
        {
-           pg_log_error("could not stat file \"%s\": %m", subpath);
-           continue;
+           case PGFILETYPE_REG:
+               (*action) (subpath, false);
+               break;
+           case PGFILETYPE_DIR:
+               walkdir(subpath, action, false);
+               break;
+           default:
+
+               /*
+                * Errors are already reported directly by get_dirent_type(),
+                * and any remaining symlinks and unknown file types are
+                * ignored.
+                */
+               break;
        }
-
-       if (S_ISREG(fst.st_mode))
-           (*action) (subpath, false);
-       else if (S_ISDIR(fst.st_mode))
-           walkdir(subpath, action, false);
    }
 
    if (errno)
@@ -394,3 +397,73 @@ durable_rename(const char *oldfile, const char *newfile)
 
    return 0;
 }
+
+#endif                         /* FRONTEND */
+
+/*
+ * Return the type of a directory entry.
+ *
+ * In frontend code, elevel should be a level from logging.h; in backend code
+ * it should be a level from elog.h.
+ */
+PGFileType
+get_dirent_type(const char *path,
+               const struct dirent *de,
+               bool look_through_symlinks,
+               int elevel)
+{
+   PGFileType  result;
+
+   /*
+    * Some systems tell us the type directly in the dirent struct, but that's
+    * a BSD and Linux extension not required by POSIX.  Even when the
+    * interface is present, sometimes the type is unknown, depending on the
+    * filesystem.
+    */
+#if defined(DT_REG) && defined(DT_DIR) && defined(DT_LNK)
+   if (de->d_type == DT_REG)
+       result = PGFILETYPE_REG;
+   else if (de->d_type == DT_DIR)
+       result = PGFILETYPE_DIR;
+   else if (de->d_type == DT_LNK && !look_through_symlinks)
+       result = PGFILETYPE_LNK;
+   else
+       result = PGFILETYPE_UNKNOWN;
+#else
+   result = PGFILETYPE_UNKNOWN;
+#endif
+
+   if (result == PGFILETYPE_UNKNOWN)
+   {
+       struct stat fst;
+       int         sret;
+
+
+       if (look_through_symlinks)
+           sret = stat(path, &fst);
+       else
+           sret = lstat(path, &fst);
+
+       if (sret < 0)
+       {
+           result = PGFILETYPE_ERROR;
+#ifdef FRONTEND
+           pg_log_generic(elevel, "could not stat file \"%s\": %m", path);
+#else
+           ereport(elevel,
+                   (errcode_for_file_access(),
+                    errmsg("could not stat file \"%s\": %m", path)));
+#endif
+       }
+       else if (S_ISREG(fst.st_mode))
+           result = PGFILETYPE_REG;
+       else if (S_ISDIR(fst.st_mode))
+           result = PGFILETYPE_DIR;
+#ifdef S_ISLNK
+       else if (S_ISLNK(fst.st_mode))
+           result = PGFILETYPE_LNK;
+#endif
+   }
+
+   return result;
+}
index a7add75efa1dca7e6da396b7f2726928f9f370a3..fef846485f8364c944a500890701e432fe955eeb 100644 (file)
@@ -1,6 +1,4 @@
 /*-------------------------------------------------------------------------
- *
- * File-processing utility routines for frontend code
  *
  * Assorted utility functions to work on files.
  *
 #ifndef FILE_UTILS_H
 #define FILE_UTILS_H
 
+#include <dirent.h>
+
+typedef enum PGFileType
+{
+   PGFILETYPE_ERROR,
+   PGFILETYPE_UNKNOWN,
+   PGFILETYPE_REG,
+   PGFILETYPE_DIR,
+   PGFILETYPE_LNK
+} PGFileType;
+
+#ifdef FRONTEND
 extern int fsync_fname(const char *fname, bool isdir);
 extern void fsync_pgdata(const char *pg_data, int serverVersion);
 extern void fsync_dir_recurse(const char *dir);
 extern int durable_rename(const char *oldfile, const char *newfile);
 extern int fsync_parent_path(const char *fname);
+#endif
+
+extern PGFileType get_dirent_type(const char *path,
+                                 const struct dirent *de,
+                                 bool look_through_symlinks,
+                                 int elevel);
 
 #endif                         /* FILE_UTILS_H */
index 536df5a92e75ff02ccd4874c68af402c44e5bb38..89e1b39036568d0b8c6a99613b97cea788d347d3 100644 (file)
@@ -121,7 +121,7 @@ sub mkvcbuild
    our @pgcommonallfiles = qw(
      archive.c base64.c checksum_helper.c
      config_info.c controldata_utils.c d2s.c encnames.c exec.c
-     f2s.c file_perm.c hashfn.c ip.c jsonapi.c
+     f2s.c file_perm.c file_utils.c hashfn.c ip.c jsonapi.c
      keywords.c kwlookup.c link-canary.c md5.c
      pg_get_line.c pg_lzcompress.c pgfnames.c psprintf.c relpath.c rmtree.c
      saslprep.c scram-common.c string.c stringinfo.c unicode_norm.c username.c
@@ -138,7 +138,7 @@ sub mkvcbuild
    }
 
    our @pgcommonfrontendfiles = (
-       @pgcommonallfiles, qw(fe_memutils.c file_utils.c
+       @pgcommonallfiles, qw(fe_memutils.c
          logging.c restricted_token.c sprompt.c));
 
    our @pgcommonbkndfiles = @pgcommonallfiles;
index 500623e230199d2ffb01301046b26e073fd2d9bc..391c204f72191a1861fe08bd330274ccd26f9f1e 100644 (file)
@@ -1515,6 +1515,7 @@ PGEventResultCopy
 PGEventResultCreate
 PGEventResultDestroy
 PGFInfoFunction
+PGFileType
 PGFunction
 PGLZ_HistEntry
 PGLZ_Strategy