Add prefetching support on macOS
authorPeter Eisentraut <peter@eisentraut.org>
Wed, 28 Aug 2024 05:26:48 +0000 (07:26 +0200)
committerPeter Eisentraut <peter@eisentraut.org>
Wed, 28 Aug 2024 05:28:27 +0000 (07:28 +0200)
macOS doesn't have posix_fadvise(), but fcntl() with the F_RDADVISE
command does the same thing.

Some related documentation has been generalized to not mention
posix_advise() specifically anymore.

Reviewed-by: Thomas Munro <thomas.munro@gmail.com>
Discussion: https://www.postgresql.org/message-id/flat/0827edec-1317-4917-a186-035eb1e3241d%40eisentraut.org

doc/src/sgml/config.sgml
doc/src/sgml/wal.sgml
src/backend/commands/variable.c
src/backend/storage/file/fd.c
src/include/pg_config_manual.h
src/include/port/darwin.h

index 2937384b0012d221de50596a9a80785f960d4ae6..12feac608743099fa63f6f8bd06e3d6275d87d48 100644 (file)
@@ -2679,11 +2679,9 @@ include_dir 'conf.d'
         </para>
 
         <para>
-         Asynchronous I/O depends on an effective <function>posix_fadvise</function>
-         function, which some operating systems lack.  If the function is not
-         present then setting this parameter to anything but zero will result
-         in an error.  On some operating systems (e.g., Solaris), the function
-         is present but does not actually do anything.
+         Asynchronous I/O requires that the operating system supports issuing
+         read-ahead advice.  If there is no operating system support then
+         setting this parameter to anything but zero will result in an error.
         </para>
 
         <para>
@@ -3852,10 +3850,8 @@ include_dir 'conf.d'
         <literal>off</literal>, <literal>on</literal> and
         <literal>try</literal> (the default).  The setting
         <literal>try</literal> enables
-        prefetching only if the operating system provides the
-        <function>posix_fadvise</function> function, which is currently used
-        to implement prefetching.  Note that some operating systems provide the
-        function, but it doesn't do anything.
+        prefetching only if the operating system provides support for issuing
+        read-ahead advice.
        </para>
        <para>
         Prefetching blocks that will soon be needed can reduce I/O wait times
index d5df65bc693cbe87dd79cd3204c62698ae03157b..0ba0c930b7823fd6f76459ed36e362d5c5e9e86a 100644 (file)
    The <xref linkend="guc-maintenance-io-concurrency"/> and
    <xref linkend="guc-wal-decode-buffer-size"/> settings limit prefetching
    concurrency and distance, respectively.  By default, it is set to
-   <literal>try</literal>, which enables the feature on systems where
-   <function>posix_fadvise</function> is available.
+   <literal>try</literal>, which enables the feature on systems that support
+   issuing read-ahead advice.
   </para>
  </sect1>
 
index 6202c5ebe44b3d4476dc2f1a1d1bc4bebb5c4e94..136c584305ae954bf23d58a42e2a583c8a37be88 100644 (file)
@@ -1212,7 +1212,7 @@ check_effective_io_concurrency(int *newval, void **extra, GucSource source)
 #ifndef USE_PREFETCH
    if (*newval != 0)
    {
-       GUC_check_errdetail("\"effective_io_concurrency\" must be set to 0 on platforms that lack posix_fadvise().");
+       GUC_check_errdetail("\"effective_io_concurrency\" must be set to 0 on platforms that lack support for issuing read-ahead advice.");
        return false;
    }
 #endif                         /* USE_PREFETCH */
@@ -1225,7 +1225,7 @@ check_maintenance_io_concurrency(int *newval, void **extra, GucSource source)
 #ifndef USE_PREFETCH
    if (*newval != 0)
    {
-       GUC_check_errdetail("\"maintenance_io_concurrency\" must be set to 0 on platforms that lack posix_fadvise().");
+       GUC_check_errdetail("\"maintenance_io_concurrency\" must be set to 0 on platforms that lack support for issuing read-ahead advice.");
        return false;
    }
 #endif                         /* USE_PREFETCH */
index 3944321ff37766ae7bff3068e54b2f55e39b2565..c84f089665c92229b06e03459d9c76622bcd063a 100644 (file)
@@ -2068,40 +2068,59 @@ FileClose(File file)
 /*
  * FilePrefetch - initiate asynchronous read of a given range of the file.
  *
- * Currently the only implementation of this function is using posix_fadvise
- * which is the simplest standardized interface that accomplishes this.
- * We could add an implementation using libaio in the future; but note that
- * this API is inappropriate for libaio, which wants to have a buffer provided
- * to read into.
+ * Returns 0 on success, otherwise an errno error code (like posix_fadvise()).
+ *
+ * posix_fadvise() is the simplest standardized interface that accomplishes
+ * this.
  */
 int
 FilePrefetch(File file, off_t offset, off_t amount, uint32 wait_event_info)
 {
-#if defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_WILLNEED)
-   int         returnCode;
-
    Assert(FileIsValid(file));
 
    DO_DB(elog(LOG, "FilePrefetch: %d (%s) " INT64_FORMAT " " INT64_FORMAT,
               file, VfdCache[file].fileName,
               (int64) offset, (int64) amount));
 
-   returnCode = FileAccess(file);
-   if (returnCode < 0)
-       return returnCode;
+#if defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_WILLNEED)
+   {
+       int         returnCode;
+
+       returnCode = FileAccess(file);
+       if (returnCode < 0)
+           return returnCode;
 
 retry:
-   pgstat_report_wait_start(wait_event_info);
-   returnCode = posix_fadvise(VfdCache[file].fd, offset, amount,
-                              POSIX_FADV_WILLNEED);
-   pgstat_report_wait_end();
+       pgstat_report_wait_start(wait_event_info);
+       returnCode = posix_fadvise(VfdCache[file].fd, offset, amount,
+                                  POSIX_FADV_WILLNEED);
+       pgstat_report_wait_end();
 
-   if (returnCode == EINTR)
-       goto retry;
+       if (returnCode == EINTR)
+           goto retry;
 
-   return returnCode;
+       return returnCode;
+   }
+#elif defined(__darwin__)
+   {
+       struct radvisory
+       {
+           off_t       ra_offset;  /* offset into the file */
+           int         ra_count;   /* size of the read     */
+       }           ra;
+       int         returnCode;
+
+       ra.ra_offset = offset;
+       ra.ra_count = amount;
+       pgstat_report_wait_start(wait_event_info);
+       returnCode = fcntl(VfdCache[file].fd, F_RDADVISE, &ra);
+       pgstat_report_wait_end();
+       if (returnCode != -1)
+           return 0;
+       else
+           return errno;
+   }
 #else
-   Assert(FileIsValid(file));
    return 0;
 #endif
 }
index e799c2989b82a68bdc790fad2af454afd324dcaa..e49eb13e43c193a00fa02769a6a7330a2ca8ff75 100644 (file)
 /*
  * USE_PREFETCH code should be compiled only if we have a way to implement
  * prefetching.  (This is decoupled from USE_POSIX_FADVISE because there
- * might in future be support for alternative low-level prefetch APIs.
- * If you change this, you probably need to adjust the error message in
- * check_effective_io_concurrency.)
+ * might in future be support for alternative low-level prefetch APIs,
+ * as well as platform-specific APIs defined elsewhere.)
  */
 #ifdef USE_POSIX_FADVISE
 #define USE_PREFETCH
index 15fb69d6dbb4bcf60b26640146f6f850296bcc63..6aa2ea70f6b0d805ca9e29409066cfbdfa911882 100644 (file)
@@ -6,3 +6,8 @@
 #define HAVE_FSYNC_WRITETHROUGH
 
 #endif
+
+/*
+ * macOS has a platform-specific implementation of prefetching.
+ */
+#define USE_PREFETCH