Check for STATUS_DELETE_PENDING on Windows.
authorThomas Munro <tmunro@postgresql.org>
Fri, 10 Dec 2021 03:13:14 +0000 (16:13 +1300)
committerThomas Munro <tmunro@postgresql.org>
Fri, 10 Dec 2021 03:19:43 +0000 (16:19 +1300)
1.  Update our open() wrapper to check for NT's STATUS_DELETE_PENDING
and translate it to Unix-like errors.  This is done with
RtlGetLastNtStatus(), which is dynamically loaded from ntdll.  A new
file win32ntdll.c centralizes lookup of NT functions, in case we decide
to add more in the future.

2.  Remove non-working code that was trying to do something similar for
stat(), and just reuse the open() wrapper code.  As a side effect,
stat() also gains resilience against "sharing violation" errors.

3.  Since stat() is used very early in process startup, remove the
requirement that the Win32 signal event has been created before
pgwin32_open_handle() is reached.  Instead, teach pg_usleep() to fall
back to a non-interruptible sleep if reached before the signal event is
available.

This could be back-patched, but for now it's in master only.  The
problem has apparently been with us for a long time and generated only a
few complaints.  Proposed patches trigger it more often, which led to
this investigation and fix.

Reviewed-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Alexander Lakhin <exclusion@gmail.com>
Reviewed-by: Juan José Santamaría Flecha <juanjo.santamaria@gmail.com>
Discussion: https://postgr.es/m/CA%2BhUKGJz_pZTF9mckn6XgSv69%2BjGwdgLkxZ6b3NWGLBCVjqUZA%40mail.gmail.com

configure
configure.ac
src/backend/port/win32/signal.c
src/include/port.h
src/include/port/win32ntdll.h [new file with mode: 0644]
src/port/open.c
src/port/win32ntdll.c [new file with mode: 0644]
src/port/win32stat.c
src/tools/msvc/Mkvcbuild.pm

index 5f842a86b27b0ab0fe7181cb50dc0f6883332496..3b19105328d8cc6e7ab8018c6dfb8815b3527d01 100755 (executable)
--- a/configure
+++ b/configure
@@ -16738,6 +16738,12 @@ esac
  ;;
 esac
 
+  case " $LIBOBJS " in
+  *" win32ntdll.$ac_objext "* ) ;;
+  *) LIBOBJS="$LIBOBJS win32ntdll.$ac_objext"
+ ;;
+esac
+
   case " $LIBOBJS " in
   *" win32security.$ac_objext "* ) ;;
   *) LIBOBJS="$LIBOBJS win32security.$ac_objext"
index 566a6010dd7a4895dbe0d205c5737d2c396f63b2..e77d4dcf2d2759804f7dd7f51b661b250125c616 100644 (file)
@@ -1932,6 +1932,7 @@ if test "$PORTNAME" = "win32"; then
   AC_LIBOBJ(system)
   AC_LIBOBJ(win32env)
   AC_LIBOBJ(win32error)
+  AC_LIBOBJ(win32ntdll)
   AC_LIBOBJ(win32security)
   AC_LIBOBJ(win32setlocale)
   AC_LIBOBJ(win32stat)
index 580a517f3f56f90680bb33d4bafc89b39508bd41..61f06a29f6d749b8f29fb92f22ae975a390591bd 100644 (file)
@@ -52,7 +52,17 @@ static BOOL WINAPI pg_console_handler(DWORD dwCtrlType);
 void
 pg_usleep(long microsec)
 {
-       Assert(pgwin32_signal_event != NULL);
+       if (unlikely(pgwin32_signal_event == NULL))
+       {
+               /*
+                * If we're reached by pgwin32_open_handle() early in startup before
+                * the signal event is set up, just fall back to a regular
+                * non-interruptible sleep.
+                */
+               SleepEx((microsec < 500 ? 1 : (microsec + 500) / 1000), FALSE);
+               return;
+       }
+
        if (WaitForSingleObject(pgwin32_signal_event,
                                                        (microsec < 500 ? 1 : (microsec + 500) / 1000))
                == WAIT_OBJECT_0)
index 806fb795edc217f13cf71e9baa57db97d2d6158c..fd9c9d6f9483a645e12d1b7024e602f3233dd77d 100644 (file)
@@ -296,6 +296,7 @@ extern bool rmtree(const char *path, bool rmtopdir);
  * passing of other special options.
  */
 #define                O_DIRECT        0x80000000
+extern HANDLE pgwin32_open_handle(const char *, int, bool);
 extern int     pgwin32_open(const char *, int,...);
 extern FILE *pgwin32_fopen(const char *, const char *);
 #define                open(a,b,c) pgwin32_open(a,b,c)
diff --git a/src/include/port/win32ntdll.h b/src/include/port/win32ntdll.h
new file mode 100644 (file)
index 0000000..4d8808b
--- /dev/null
@@ -0,0 +1,27 @@
+/*-------------------------------------------------------------------------
+ *
+ * win32ntdll.h
+ *       Dynamically loaded Windows NT functions.
+ *
+ * Portions Copyright (c) 2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/port/win32ntdll.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/*
+ * Because this includes NT headers that normally conflict with Win32 headers,
+ * any translation unit that includes it should #define UMDF_USING_NTSTATUS
+ * before including <windows.h>.
+ */
+
+#include <ntstatus.h>
+#include <winternl.h>
+
+typedef NTSTATUS (__stdcall *RtlGetLastNtStatus_t) (void);
+
+extern RtlGetLastNtStatus_t pg_RtlGetLastNtStatus;
+
+extern int     initialize_ntdll(void);
index 14c6debba915cff31219758e0d228588db999766..7b52bd83c283f1d2181a80f9741cc45e97dbdb54 100644 (file)
 
 #ifdef WIN32
 
+#define UMDF_USING_NTSTATUS
+
 #ifndef FRONTEND
 #include "postgres.h"
 #else
 #include "postgres_fe.h"
 #endif
 
+#include "port/win32ntdll.h"
+
 #include <fcntl.h>
 #include <assert.h>
 #include <sys/stat.h>
 
-
 static int
 openFlagsToCreateFileFlags(int openFlags)
 {
@@ -56,38 +59,25 @@ openFlagsToCreateFileFlags(int openFlags)
 }
 
 /*
- *      - file attribute setting, based on fileMode?
+ * Internal function used by pgwin32_open() and _pgstat64().  When
+ * backup_semantics is true, directories may be opened (for limited uses).  On
+ * failure, INVALID_HANDLE_VALUE is returned and errno is set.
  */
-int
-pgwin32_open(const char *fileName, int fileFlags,...)
+HANDLE
+pgwin32_open_handle(const char *fileName, int fileFlags, bool backup_semantics)
 {
-       int                     fd;
-       HANDLE          h = INVALID_HANDLE_VALUE;
+       HANDLE          h;
        SECURITY_ATTRIBUTES sa;
        int                     loops = 0;
 
+       if (initialize_ntdll() < 0)
+               return INVALID_HANDLE_VALUE;
+
        /* Check that we can handle the request */
        assert((fileFlags & ((O_RDONLY | O_WRONLY | O_RDWR) | O_APPEND |
                                                 (O_RANDOM | O_SEQUENTIAL | O_TEMPORARY) |
                                                 _O_SHORT_LIVED | O_DSYNC | O_DIRECT |
                                                 (O_CREAT | O_TRUNC | O_EXCL) | (O_TEXT | O_BINARY))) == fileFlags);
-#ifndef FRONTEND
-       Assert(pgwin32_signal_event != NULL);   /* small chance of pg_usleep() */
-#endif
-
-#ifdef FRONTEND
-
-       /*
-        * Since PostgreSQL 12, those concurrent-safe versions of open() and
-        * fopen() can be used by frontends, having as side-effect to switch the
-        * file-translation mode from O_TEXT to O_BINARY if none is specified.
-        * Caller may want to enforce the binary or text mode, but if nothing is
-        * defined make sure that the default mode maps with what versions older
-        * than 12 have been doing.
-        */
-       if ((fileFlags & O_BINARY) == 0)
-               fileFlags |= O_TEXT;
-#endif
 
        sa.nLength = sizeof(sa);
        sa.bInheritHandle = TRUE;
@@ -102,6 +92,7 @@ pgwin32_open(const char *fileName, int fileFlags,...)
                                                   &sa,
                                                   openFlagsToCreateFileFlags(fileFlags),
                                                   FILE_ATTRIBUTE_NORMAL |
+                                                  (backup_semantics ? FILE_FLAG_BACKUP_SEMANTICS : 0) |
                                                   ((fileFlags & O_RANDOM) ? FILE_FLAG_RANDOM_ACCESS : 0) |
                                                   ((fileFlags & O_SEQUENTIAL) ? FILE_FLAG_SEQUENTIAL_SCAN : 0) |
                                                   ((fileFlags & _O_SHORT_LIVED) ? FILE_ATTRIBUTE_TEMPORARY : 0) |
@@ -140,38 +131,55 @@ pgwin32_open(const char *fileName, int fileFlags,...)
                /*
                 * ERROR_ACCESS_DENIED is returned if the file is deleted but not yet
                 * gone (Windows NT status code is STATUS_DELETE_PENDING).  In that
-                * case we want to wait a bit and try again, giving up after 1 second
-                * (since this condition should never persist very long).  However,
-                * there are other commonly-hit cases that return ERROR_ACCESS_DENIED,
-                * so care is needed.  In particular that happens if we try to open a
-                * directory, or of course if there's an actual file-permissions
-                * problem.  To distinguish these cases, try a stat().  In the
-                * delete-pending case, it will either also get STATUS_DELETE_PENDING,
-                * or it will see the file as gone and fail with ENOENT.  In other
-                * cases it will usually succeed.  The only somewhat-likely case where
-                * this coding will uselessly wait is if there's a permissions problem
-                * with a containing directory, which we hope will never happen in any
-                * performance-critical code paths.
+                * case, we'd better ask for the NT status too so we can translate it
+                * to a more Unix-like error.  We hope that nothing clobbers the NT
+                * status in between the internal NtCreateFile() call and CreateFile()
+                * returning.
+                *
+                * If there's no O_CREAT flag, then we'll pretend the file is
+                * invisible.  With O_CREAT, we have no choice but to report that
+                * there's a file in the way (which wouldn't happen on Unix).
                 */
-               if (err == ERROR_ACCESS_DENIED)
+               if (err == ERROR_ACCESS_DENIED &&
+                       pg_RtlGetLastNtStatus() == STATUS_DELETE_PENDING)
                {
-                       if (loops < 10)
-                       {
-                               struct stat st;
-
-                               if (stat(fileName, &st) != 0)
-                               {
-                                       pg_usleep(100000);
-                                       loops++;
-                                       continue;
-                               }
-                       }
+                       if (fileFlags & O_CREAT)
+                               err = ERROR_FILE_EXISTS;
+                       else
+                               err = ERROR_FILE_NOT_FOUND;
                }
 
                _dosmaperr(err);
-               return -1;
+               return INVALID_HANDLE_VALUE;
        }
 
+       return h;
+}
+
+int
+pgwin32_open(const char *fileName, int fileFlags,...)
+{
+       HANDLE          h;
+       int                     fd;
+
+       h = pgwin32_open_handle(fileName, fileFlags, false);
+       if (h == INVALID_HANDLE_VALUE)
+               return -1;
+
+#ifdef FRONTEND
+
+       /*
+        * Since PostgreSQL 12, those concurrent-safe versions of open() and
+        * fopen() can be used by frontends, having as side-effect to switch the
+        * file-translation mode from O_TEXT to O_BINARY if none is specified.
+        * Caller may want to enforce the binary or text mode, but if nothing is
+        * defined make sure that the default mode maps with what versions older
+        * than 12 have been doing.
+        */
+       if ((fileFlags & O_BINARY) == 0)
+               fileFlags |= O_TEXT;
+#endif
+
        /* _open_osfhandle will, on error, set errno accordingly */
        if ((fd = _open_osfhandle((intptr_t) h, fileFlags & O_APPEND)) < 0)
                CloseHandle(h);                 /* will not affect errno */
diff --git a/src/port/win32ntdll.c b/src/port/win32ntdll.c
new file mode 100644 (file)
index 0000000..aa3d37c
--- /dev/null
@@ -0,0 +1,69 @@
+/*-------------------------------------------------------------------------
+ *
+ * win32ntdll.c
+ *       Dynamically loaded Windows NT functions.
+ *
+ * Portions Copyright (c) 2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *       src/port/win32ntdll.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#define UMDF_USING_NTSTATUS
+
+#include "c.h"
+
+#include "port/win32ntdll.h"
+
+RtlGetLastNtStatus_t pg_RtlGetLastNtStatus;
+
+typedef struct NtDllRoutine
+{
+       const char *name;
+       pg_funcptr_t *address;
+} NtDllRoutine;
+
+static const NtDllRoutine routines[] = {
+       {"RtlGetLastNtStatus", (pg_funcptr_t *) &pg_RtlGetLastNtStatus}
+};
+
+static bool initialized;
+
+int
+initialize_ntdll(void)
+{
+       HMODULE         module;
+
+       if (initialized)
+               return 0;
+
+       if (!(module = LoadLibraryEx("ntdll.dll", NULL, 0)))
+       {
+               _dosmaperr(GetLastError());
+               return -1;
+       }
+
+       for (int i = 0; i < lengthof(routines); ++i)
+       {
+               pg_funcptr_t address;
+
+               address = (pg_funcptr_t) GetProcAddress(module, routines[i].name);
+               if (!address)
+               {
+                       _dosmaperr(GetLastError());
+                       FreeLibrary(module);
+
+                       return -1;
+               }
+
+               *(pg_funcptr_t *) routines[i].address = address;
+       }
+
+       initialized = true;
+
+       return 0;
+}
index 426e01f0efab4188c1be287b492bf7eef2e78adf..29e13409e6c6bb6b5c9f4072c9d3489d4fb31c0f 100644 (file)
 #include "c.h"
 #include <windows.h>
 
-/*
- * In order to support MinGW and MSVC2013 we use NtQueryInformationFile as an
- * alternative for GetFileInformationByHandleEx. It is loaded from the ntdll
- * library.
- */
-#if _WIN32_WINNT < 0x0600
-#include <winternl.h>
-
-#if !defined(__MINGW32__) && !defined(__MINGW64__)
-/* MinGW includes this in <winternl.h>, but it is missing in MSVC */
-typedef struct _FILE_STANDARD_INFORMATION
-{
-       LARGE_INTEGER AllocationSize;
-       LARGE_INTEGER EndOfFile;
-       ULONG           NumberOfLinks;
-       BOOLEAN         DeletePending;
-       BOOLEAN         Directory;
-} FILE_STANDARD_INFORMATION;
-#define FileStandardInformation 5
-#endif                                                 /* !defined(__MINGW32__) &&
-                                                                * !defined(__MINGW64__) */
-
-typedef NTSTATUS (NTAPI * PFN_NTQUERYINFORMATIONFILE)
-                       (IN HANDLE FileHandle,
-                        OUT PIO_STATUS_BLOCK IoStatusBlock,
-                        OUT PVOID FileInformation,
-                        IN ULONG Length,
-                        IN FILE_INFORMATION_CLASS FileInformationClass);
-
-static PFN_NTQUERYINFORMATIONFILE _NtQueryInformationFile = NULL;
-
-static HMODULE ntdll = NULL;
-
-/*
- * Load DLL file just once regardless of how many functions we load/call in it.
- */
-static void
-LoadNtdll(void)
-{
-       if (ntdll != NULL)
-               return;
-       ntdll = LoadLibraryEx("ntdll.dll", NULL, 0);
-}
-
-#endif                                                 /* _WIN32_WINNT < 0x0600 */
-
-
 /*
  * Convert a FILETIME struct into a 64 bit time_t.
  */
@@ -162,120 +115,18 @@ int
 _pgstat64(const char *name, struct stat *buf)
 {
        /*
-        * We must use a handle so lstat() returns the information of the target
-        * file.  To have a reliable test for ERROR_DELETE_PENDING, we use
-        * NtQueryInformationFile from Windows 2000 or
-        * GetFileInformationByHandleEx from Server 2008 / Vista.
+        * Our open wrapper will report STATUS_DELETE_PENDING as ENOENT.  We
+        * request FILE_FLAG_BACKUP_SEMANTICS so that we can open directories too,
+        * for limited purposes.  We use the private handle-based version, so we
+        * don't risk running out of fds.
         */
-       SECURITY_ATTRIBUTES sa;
        HANDLE          hFile;
        int                     ret;
-#if _WIN32_WINNT < 0x0600
-       IO_STATUS_BLOCK ioStatus;
-       FILE_STANDARD_INFORMATION standardInfo;
-#else
-       FILE_STANDARD_INFO standardInfo;
-#endif
-
-       if (name == NULL || buf == NULL)
-       {
-               errno = EINVAL;
-               return -1;
-       }
 
-       /* fast not-exists check */
-       if (GetFileAttributes(name) == INVALID_FILE_ATTRIBUTES)
-       {
-               _dosmaperr(GetLastError());
-               return -1;
-       }
-
-       /* get a file handle as lightweight as we can */
-       sa.nLength = sizeof(SECURITY_ATTRIBUTES);
-       sa.bInheritHandle = TRUE;
-       sa.lpSecurityDescriptor = NULL;
-       hFile = CreateFile(name,
-                                          GENERIC_READ,
-                                          (FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE),
-                                          &sa,
-                                          OPEN_EXISTING,
-                                          (FILE_FLAG_NO_BUFFERING | FILE_FLAG_BACKUP_SEMANTICS |
-                                               FILE_FLAG_OVERLAPPED),
-                                          NULL);
+       hFile = pgwin32_open_handle(name, O_RDONLY, true);
        if (hFile == INVALID_HANDLE_VALUE)
-       {
-               DWORD           err = GetLastError();
-
-               CloseHandle(hFile);
-               _dosmaperr(err);
                return -1;
-       }
-
-       memset(&standardInfo, 0, sizeof(standardInfo));
-
-#if _WIN32_WINNT < 0x0600
-       if (_NtQueryInformationFile == NULL)
-       {
-               /* First time through: load ntdll.dll and find NtQueryInformationFile */
-               LoadNtdll();
-               if (ntdll == NULL)
-               {
-                       DWORD           err = GetLastError();
-
-                       CloseHandle(hFile);
-                       _dosmaperr(err);
-                       return -1;
-               }
-
-               _NtQueryInformationFile = (PFN_NTQUERYINFORMATIONFILE) (pg_funcptr_t)
-                       GetProcAddress(ntdll, "NtQueryInformationFile");
-               if (_NtQueryInformationFile == NULL)
-               {
-                       DWORD           err = GetLastError();
 
-                       CloseHandle(hFile);
-                       _dosmaperr(err);
-                       return -1;
-               }
-       }
-
-       if (!NT_SUCCESS(_NtQueryInformationFile(hFile, &ioStatus, &standardInfo,
-                                                                                       sizeof(standardInfo),
-                                                                                       FileStandardInformation)))
-       {
-               DWORD           err = GetLastError();
-
-               CloseHandle(hFile);
-               _dosmaperr(err);
-               return -1;
-       }
-#else
-       if (!GetFileInformationByHandleEx(hFile, FileStandardInfo, &standardInfo,
-                                                                         sizeof(standardInfo)))
-       {
-               DWORD           err = GetLastError();
-
-               CloseHandle(hFile);
-               _dosmaperr(err);
-               return -1;
-       }
-#endif                                                 /* _WIN32_WINNT < 0x0600 */
-
-       if (standardInfo.DeletePending)
-       {
-               /*
-                * File has been deleted, but is not gone from the filesystem yet.
-                * This can happen when some process with FILE_SHARE_DELETE has it
-                * open, and it will be fully removed once that handle is closed.
-                * Meanwhile, we can't open it, so indicate that the file just doesn't
-                * exist.
-                */
-               CloseHandle(hFile);
-               errno = ENOENT;
-               return -1;
-       }
-
-       /* At last we can invoke fileinfo_to_stat */
        ret = fileinfo_to_stat(hFile, buf);
 
        CloseHandle(hFile);
@@ -316,11 +167,6 @@ _pgfstat64(int fileno, struct stat *buf)
                return 0;
        }
 
-       /*
-        * Since we already have a file handle there is no need to check for
-        * ERROR_DELETE_PENDING.
-        */
-
        return fileinfo_to_stat(hFile, buf);
 }
 
index 5a374a4727403ae08a12c1d304bc750c458a500a..404c45a6f30140d811c71846f3a3aca0b99fd320 100644 (file)
@@ -107,7 +107,8 @@ sub mkvcbuild
          pg_strong_random.c pgcheckdir.c pgmkdirp.c pgsleep.c pgstrcasecmp.c
          pqsignal.c mkdtemp.c qsort.c qsort_arg.c bsearch_arg.c quotes.c system.c
          strerror.c tar.c thread.c
-         win32env.c win32error.c win32security.c win32setlocale.c win32stat.c);
+         win32env.c win32error.c win32ntdll.c
+         win32security.c win32setlocale.c win32stat.c);
 
        push(@pgportfiles, 'strtof.c') if ($vsVersion < '14.00');