Provide helper for retrying partial vectored I/O.
authorThomas Munro <tmunro@postgresql.org>
Mon, 11 Dec 2023 21:28:46 +0000 (10:28 +1300)
committerThomas Munro <tmunro@postgresql.org>
Mon, 11 Dec 2023 21:57:18 +0000 (10:57 +1300)
compute_remaining_iovec() is a re-usable routine for retrying after
pg_readv() or pg_writev() reports a short transfer.  This will gain new
users in a later commit, but can already replace the open-coded
equivalent code in the existing pg_pwritev_with_retry() function.

Reviewed-by: Heikki Linnakangas <hlinnaka@iki.fi>
Discussion: https://postgr.es/m/CA+hUKGJkOiOCa+mag4BF+zHo7qo=o9CFheB8=g6uT5TUm2gkvA@mail.gmail.com

src/common/file_utils.c
src/include/common/file_utils.h

index abe5129412db5c65075fb5db849bd4442e17804d..5380299f35f7cf16fb313592c8e7d67cc939f9e4 100644 (file)
@@ -581,6 +581,52 @@ get_dirent_type(const char *path,
        return result;
 }
 
+/*
+ * Compute what remains to be done after a possibly partial vectored read or
+ * write.  The part of 'source' beginning after 'transferred' bytes is copied
+ * to 'destination', and its length is returned.  'source' and 'destination'
+ * may point to the same array, for in-place adjustment.  A return value of
+ * zero indicates completion (for callers without a cheaper way to know that).
+ */
+int
+compute_remaining_iovec(struct iovec *destination,
+                                               const struct iovec *source,
+                                               int iovcnt,
+                                               size_t transferred)
+{
+       Assert(iovcnt > 0);
+
+       /* Skip wholly transferred iovecs. */
+       while (source->iov_len <= transferred)
+       {
+               transferred -= source->iov_len;
+               source++;
+               iovcnt--;
+
+               /* All iovecs transferred? */
+               if (iovcnt == 0)
+               {
+                       /*
+                        * We don't expect the kernel to transfer more than we asked it
+                        * to, or something is out of sync.
+                        */
+                       Assert(transferred == 0);
+                       return 0;
+               }
+       }
+
+       /* Copy the remaining iovecs to the front of the array. */
+       if (source != destination)
+               memmove(destination, source, sizeof(*source) * iovcnt);
+
+       /* Adjust leading iovec, which may have been partially transferred. */
+       Assert(destination->iov_len > transferred);
+       destination->iov_base = (char *) destination->iov_base + transferred;
+       destination->iov_len -= transferred;
+
+       return iovcnt;
+}
+
 /*
  * pg_pwritev_with_retry
  *
@@ -601,7 +647,7 @@ pg_pwritev_with_retry(int fd, const struct iovec *iov, int iovcnt, off_t offset)
                return -1;
        }
 
-       for (;;)
+       do
        {
                /* Write as much as we can. */
                part = pg_pwritev(fd, iov, iovcnt, offset);
@@ -616,33 +662,14 @@ pg_pwritev_with_retry(int fd, const struct iovec *iov, int iovcnt, off_t offset)
                sum += part;
                offset += part;
 
-               /* Step over iovecs that are done. */
-               while (iovcnt > 0 && iov->iov_len <= part)
-               {
-                       part -= iov->iov_len;
-                       ++iov;
-                       --iovcnt;
-               }
-
-               /* Are they all done? */
-               if (iovcnt == 0)
-               {
-                       /* We don't expect the kernel to write more than requested. */
-                       Assert(part == 0);
-                       break;
-               }
-
                /*
-                * Move whatever's left to the front of our mutable copy and adjust
-                * the leading iovec.
+                * See what is left.  On the first loop we used the caller's array,
+                * but in later loops we'll use our local copy that we are allowed to
+                * mutate.
                 */
-               Assert(iovcnt > 0);
-               memmove(iov_copy, iov, sizeof(*iov) * iovcnt);
-               Assert(iov->iov_len > part);
-               iov_copy[0].iov_base = (char *) iov_copy[0].iov_base + part;
-               iov_copy[0].iov_len -= part;
+               iovcnt = compute_remaining_iovec(iov_copy, iov, iovcnt, part);
                iov = iov_copy;
-       }
+       } while (iovcnt > 0);
 
        return sum;
 }
index 3bb20170cbd5ce93d0f39ec18e317e1eaa145ab9..02a940e31029421cdb5dfe493ea20b1c9cca6c89 100644 (file)
@@ -46,6 +46,11 @@ extern PGFileType get_dirent_type(const char *path,
                                                                  bool look_through_symlinks,
                                                                  int elevel);
 
+extern int     compute_remaining_iovec(struct iovec *destination,
+                                                                       const struct iovec *source,
+                                                                       int iovcnt,
+                                                                       size_t transferred);
+
 extern ssize_t pg_pwritev_with_retry(int fd,
                                                                         const struct iovec *iov,
                                                                         int iovcnt,