Use pg_pread() and pg_pwrite() for data files and WAL.
authorThomas Munro <tmunro@postgresql.org>
Tue, 6 Nov 2018 20:51:50 +0000 (09:51 +1300)
committerThomas Munro <tmunro@postgresql.org>
Tue, 6 Nov 2018 20:51:50 +0000 (09:51 +1300)
Cut down on system calls by doing random I/O using offset-based OS
routines where available.  Remove the code for tracking the 'virtual'
seek position.  The only reason left to call FileSeek() was to get
the file's size, so provide a new function FileSize() instead.

Author: Oskari Saarenmaa, Thomas Munro
Reviewed-by: Thomas Munro, Jesper Pedersen, Tom Lane, Alvaro Herrera
Discussion: https://postgr.es/m/CAEepm=02rapCpPR3ZGF2vW=SBHSdFYO_bz_f-wwWJonmA3APgw@mail.gmail.com
Discussion: https://postgr.es/m/b8748d39-0b19-0514-a1b9-4e5a28e6a208%40gmail.com
Discussion: https://postgr.es/m/a86bd200-ebbe-d829-e3ca-0c4474b2fcb7%40ohmu.fi

src/backend/access/heap/rewriteheap.c
src/backend/access/transam/xlog.c
src/backend/storage/file/buffile.c
src/backend/storage/file/fd.c
src/backend/storage/smgr/md.c
src/include/storage/fd.h

index 71277889649938ad2330fa35339880adc60da065..c5db75afa1fe02d75b513596c044c97d339bef7f 100644 (file)
@@ -935,7 +935,7 @@ logical_heap_rewrite_flush_mappings(RewriteState state)
                 * Note that we deviate from the usual WAL coding practices here,
                 * check the above "Logical rewrite support" comment for reasoning.
                 */
-               written = FileWrite(src->vfd, waldata_start, len,
+               written = FileWrite(src->vfd, waldata_start, len, src->off,
                                                        WAIT_EVENT_LOGICAL_REWRITE_WRITE);
                if (written != len)
                        ereport(ERROR,
index 246869bba2982e089aaf10729ea97e117e014988..7eed5866d2e5a87dc93a192dfb9242078cfa0e3b 100644 (file)
@@ -2478,18 +2478,6 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
                        Size            nleft;
                        int                     written;
 
-                       /* Need to seek in the file? */
-                       if (openLogOff != startoffset)
-                       {
-                               if (lseek(openLogFile, (off_t) startoffset, SEEK_SET) < 0)
-                                       ereport(PANIC,
-                                                       (errcode_for_file_access(),
-                                                        errmsg("could not seek in log file %s to offset %u: %m",
-                                                                       XLogFileNameP(ThisTimeLineID, openLogSegNo),
-                                                                       startoffset)));
-                               openLogOff = startoffset;
-                       }
-
                        /* OK to write the page(s) */
                        from = XLogCtl->pages + startidx * (Size) XLOG_BLCKSZ;
                        nbytes = npages * (Size) XLOG_BLCKSZ;
@@ -2498,7 +2486,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
                        {
                                errno = 0;
                                pgstat_report_wait_start(WAIT_EVENT_WAL_WRITE);
-                               written = write(openLogFile, from, nleft);
+                               written = pg_pwrite(openLogFile, from, nleft, startoffset);
                                pgstat_report_wait_end();
                                if (written <= 0)
                                {
@@ -2513,6 +2501,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
                                }
                                nleft -= written;
                                from += written;
+                               startoffset += written;
                        } while (nleft > 0);
 
                        /* Update state for write */
@@ -11821,22 +11810,9 @@ retry:
 
        /* Read the requested page */
        readOff = targetPageOff;
-       if (lseek(readFile, (off_t) readOff, SEEK_SET) < 0)
-       {
-               char            fname[MAXFNAMELEN];
-               int                     save_errno = errno;
-
-               XLogFileName(fname, curFileTLI, readSegNo, wal_segment_size);
-               errno = save_errno;
-               ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
-                               (errcode_for_file_access(),
-                                errmsg("could not seek in log segment %s to offset %u: %m",
-                                               fname, readOff)));
-               goto next_record_is_invalid;
-       }
 
        pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
-       r = read(readFile, readBuf, XLOG_BLCKSZ);
+       r = pg_pread(readFile, readBuf, XLOG_BLCKSZ, (off_t) readOff);
        if (r != XLOG_BLCKSZ)
        {
                char            fname[MAXFNAMELEN];
index e93813d97371f32aa0f6cbe839fdbd34b7dfacbc..dd687dfe71fc6617c2649ccb75f3f718b17c7324 100644 (file)
@@ -67,12 +67,6 @@ struct BufFile
        int                     numFiles;               /* number of physical files in set */
        /* all files except the last have length exactly MAX_PHYSICAL_FILESIZE */
        File       *files;                      /* palloc'd array with numFiles entries */
-       off_t      *offsets;            /* palloc'd array with numFiles entries */
-
-       /*
-        * offsets[i] is the current seek position of files[i].  We use this to
-        * avoid making redundant FileSeek calls.
-        */
 
        bool            isInterXact;    /* keep open over transactions? */
        bool            dirty;                  /* does buffer need to be written? */
@@ -116,7 +110,6 @@ makeBufFileCommon(int nfiles)
        BufFile    *file = (BufFile *) palloc(sizeof(BufFile));
 
        file->numFiles = nfiles;
-       file->offsets = (off_t *) palloc0(sizeof(off_t) * nfiles);
        file->isInterXact = false;
        file->dirty = false;
        file->resowner = CurrentResourceOwner;
@@ -170,10 +163,7 @@ extendBufFile(BufFile *file)
 
        file->files = (File *) repalloc(file->files,
                                                                        (file->numFiles + 1) * sizeof(File));
-       file->offsets = (off_t *) repalloc(file->offsets,
-                                                                          (file->numFiles + 1) * sizeof(off_t));
        file->files[file->numFiles] = pfile;
-       file->offsets[file->numFiles] = 0L;
        file->numFiles++;
 }
 
@@ -396,7 +386,6 @@ BufFileClose(BufFile *file)
                FileClose(file->files[i]);
        /* release the buffer space */
        pfree(file->files);
-       pfree(file->offsets);
        pfree(file);
 }
 
@@ -422,27 +411,17 @@ BufFileLoadBuffer(BufFile *file)
                file->curOffset = 0L;
        }
 
-       /*
-        * May need to reposition physical file.
-        */
-       thisfile = file->files[file->curFile];
-       if (file->curOffset != file->offsets[file->curFile])
-       {
-               if (FileSeek(thisfile, file->curOffset, SEEK_SET) != file->curOffset)
-                       return;                         /* seek failed, read nothing */
-               file->offsets[file->curFile] = file->curOffset;
-       }
-
        /*
         * Read whatever we can get, up to a full bufferload.
         */
+       thisfile = file->files[file->curFile];
        file->nbytes = FileRead(thisfile,
                                                        file->buffer.data,
                                                        sizeof(file->buffer),
+                                                       file->curOffset,
                                                        WAIT_EVENT_BUFFILE_READ);
        if (file->nbytes < 0)
                file->nbytes = 0;
-       file->offsets[file->curFile] += file->nbytes;
        /* we choose not to advance curOffset here */
 
        if (file->nbytes > 0)
@@ -491,23 +470,14 @@ BufFileDumpBuffer(BufFile *file)
                if ((off_t) bytestowrite > availbytes)
                        bytestowrite = (int) availbytes;
 
-               /*
-                * May need to reposition physical file.
-                */
                thisfile = file->files[file->curFile];
-               if (file->curOffset != file->offsets[file->curFile])
-               {
-                       if (FileSeek(thisfile, file->curOffset, SEEK_SET) != file->curOffset)
-                               return;                 /* seek failed, give up */
-                       file->offsets[file->curFile] = file->curOffset;
-               }
                bytestowrite = FileWrite(thisfile,
                                                                 file->buffer.data + wpos,
                                                                 bytestowrite,
+                                                                file->curOffset,
                                                                 WAIT_EVENT_BUFFILE_WRITE);
                if (bytestowrite <= 0)
                        return;                         /* failed to write */
-               file->offsets[file->curFile] += bytestowrite;
                file->curOffset += bytestowrite;
                wpos += bytestowrite;
 
@@ -803,11 +773,10 @@ BufFileSize(BufFile *file)
 {
        off_t           lastFileSize;
 
-       /* Get the size of the last physical file by seeking to end. */
-       lastFileSize = FileSeek(file->files[file->numFiles - 1], 0, SEEK_END);
+       /* Get the size of the last physical file. */
+       lastFileSize = FileSize(file->files[file->numFiles - 1]);
        if (lastFileSize < 0)
                return -1;
-       file->offsets[file->numFiles - 1] = lastFileSize;
 
        return ((file->numFiles - 1) * (off_t) MAX_PHYSICAL_FILESIZE) +
                lastFileSize;
@@ -849,13 +818,8 @@ BufFileAppend(BufFile *target, BufFile *source)
 
        target->files = (File *)
                repalloc(target->files, sizeof(File) * newNumFiles);
-       target->offsets = (off_t *)
-               repalloc(target->offsets, sizeof(off_t) * newNumFiles);
        for (i = target->numFiles; i < newNumFiles; i++)
-       {
                target->files[i] = source->files[i - target->numFiles];
-               target->offsets[i] = source->offsets[i - target->numFiles];
-       }
        target->numFiles = newNumFiles;
 
        return startBlock;
index 8dd51f176749c5d1b2adc9993b5a9a3571ee566c..6611edbbd2c206e803d6e23d3c4717de37e9e706 100644 (file)
@@ -16,8 +16,8 @@
  * including base tables, scratch files (e.g., sort and hash spool
  * files), and random calls to C library routines like system(3); it
  * is quite easy to exceed system limits on the number of open files a
- * single process can have.  (This is around 256 on many modern
- * operating systems, but can be as low as 32 on others.)
+ * single process can have.  (This is around 1024 on many modern
+ * operating systems, but may be lower on others.)
  *
  * VFDs are managed as an LRU pool, with actual OS file descriptors
  * being opened and closed as needed.  Obviously, if a routine is
@@ -167,15 +167,6 @@ int                        max_safe_fds = 32;      /* default if not changed */
 
 #define FileIsNotOpen(file) (VfdCache[file].fd == VFD_CLOSED)
 
-/*
- * Note: a VFD's seekPos is normally always valid, but if for some reason
- * an lseek() fails, it might become set to FileUnknownPos.  We can struggle
- * along without knowing the seek position in many cases, but in some places
- * we have to fail if we don't have it.
- */
-#define FileUnknownPos ((off_t) -1)
-#define FilePosIsUnknown(pos) ((pos) < 0)
-
 /* these are the assigned bits in fdstate below: */
 #define FD_DELETE_AT_CLOSE     (1 << 0)        /* T = delete when closed */
 #define FD_CLOSE_AT_EOXACT     (1 << 1)        /* T = close at eoXact */
@@ -189,7 +180,6 @@ typedef struct vfd
        File            nextFree;               /* link to next free VFD, if in freelist */
        File            lruMoreRecently;        /* doubly linked recency-of-use list */
        File            lruLessRecently;
-       off_t           seekPos;                /* current logical file position, or -1 */
        off_t           fileSize;               /* current size of file (0 if not temporary) */
        char       *fileName;           /* name of file, or NULL for unused VFD */
        /* NB: fileName is malloc'd, and must be free'd when closing the VFD */
@@ -407,9 +397,7 @@ pg_fdatasync(int fd)
 /*
  * pg_flush_data --- advise OS that the described dirty data should be flushed
  *
- * offset of 0 with nbytes 0 means that the entire file should be flushed;
- * in this case, this function may have side-effects on the file's
- * seek position!
+ * offset of 0 with nbytes 0 means that the entire file should be flushed
  */
 void
 pg_flush_data(int fd, off_t offset, off_t nbytes)
@@ -1029,22 +1017,6 @@ LruDelete(File file)
 
        vfdP = &VfdCache[file];
 
-       /*
-        * Normally we should know the seek position, but if for some reason we
-        * have lost track of it, try again to get it.  If we still can't get it,
-        * we have a problem: we will be unable to restore the file seek position
-        * when and if the file is re-opened.  But we can't really throw an error
-        * and refuse to close the file, or activities such as transaction cleanup
-        * will be broken.
-        */
-       if (FilePosIsUnknown(vfdP->seekPos))
-       {
-               vfdP->seekPos = lseek(vfdP->fd, (off_t) 0, SEEK_CUR);
-               if (FilePosIsUnknown(vfdP->seekPos))
-                       elog(LOG, "could not seek file \"%s\" before closing: %m",
-                                vfdP->fileName);
-       }
-
        /*
         * Close the file.  We aren't expecting this to fail; if it does, better
         * to leak the FD than to mess up our internal state.
@@ -1113,33 +1085,6 @@ LruInsert(File file)
                {
                        ++nfile;
                }
-
-               /*
-                * Seek to the right position.  We need no special case for seekPos
-                * equal to FileUnknownPos, as lseek() will certainly reject that
-                * (thus completing the logic noted in LruDelete() that we will fail
-                * to re-open a file if we couldn't get its seek position before
-                * closing).
-                */
-               if (vfdP->seekPos != (off_t) 0)
-               {
-                       if (lseek(vfdP->fd, vfdP->seekPos, SEEK_SET) < 0)
-                       {
-                               /*
-                                * If we fail to restore the seek position, treat it like an
-                                * open() failure.
-                                */
-                               int                     save_errno = errno;
-
-                               elog(LOG, "could not seek file \"%s\" after re-opening: %m",
-                                        vfdP->fileName);
-                               (void) close(vfdP->fd);
-                               vfdP->fd = VFD_CLOSED;
-                               --nfile;
-                               errno = save_errno;
-                               return -1;
-                       }
-               }
        }
 
        /*
@@ -1406,7 +1351,6 @@ PathNameOpenFilePerm(const char *fileName, int fileFlags, mode_t fileMode)
        /* Saved flags are adjusted to be OK for re-opening file */
        vfdP->fileFlags = fileFlags & ~(O_CREAT | O_TRUNC | O_EXCL);
        vfdP->fileMode = fileMode;
-       vfdP->seekPos = 0;
        vfdP->fileSize = 0;
        vfdP->fdstate = 0x0;
        vfdP->resowner = NULL;
@@ -1820,7 +1764,6 @@ FileClose(File file)
 
 /*
  * FilePrefetch - initiate asynchronous read of a given range of the file.
- * The logical seek position is unaffected.
  *
  * Currently the only implementation of this function is using posix_fadvise
  * which is the simplest standardized interface that accomplishes this.
@@ -1867,10 +1810,6 @@ FileWriteback(File file, off_t offset, off_t nbytes, uint32 wait_event_info)
                           file, VfdCache[file].fileName,
                           (int64) offset, (int64) nbytes));
 
-       /*
-        * Caution: do not call pg_flush_data with nbytes = 0, it could trash the
-        * file's seek position.  We prefer to define that as a no-op here.
-        */
        if (nbytes <= 0)
                return;
 
@@ -1884,7 +1823,8 @@ FileWriteback(File file, off_t offset, off_t nbytes, uint32 wait_event_info)
 }
 
 int
-FileRead(File file, char *buffer, int amount, uint32 wait_event_info)
+FileRead(File file, char *buffer, int amount, off_t offset,
+                uint32 wait_event_info)
 {
        int                     returnCode;
        Vfd                *vfdP;
@@ -1893,7 +1833,7 @@ FileRead(File file, char *buffer, int amount, uint32 wait_event_info)
 
        DO_DB(elog(LOG, "FileRead: %d (%s) " INT64_FORMAT " %d %p",
                           file, VfdCache[file].fileName,
-                          (int64) VfdCache[file].seekPos,
+                          (int64) offset,
                           amount, buffer));
 
        returnCode = FileAccess(file);
@@ -1904,16 +1844,10 @@ FileRead(File file, char *buffer, int amount, uint32 wait_event_info)
 
 retry:
        pgstat_report_wait_start(wait_event_info);
-       returnCode = read(vfdP->fd, buffer, amount);
+       returnCode = pg_pread(vfdP->fd, buffer, amount, offset);
        pgstat_report_wait_end();
 
-       if (returnCode >= 0)
-       {
-               /* if seekPos is unknown, leave it that way */
-               if (!FilePosIsUnknown(vfdP->seekPos))
-                       vfdP->seekPos += returnCode;
-       }
-       else
+       if (returnCode < 0)
        {
                /*
                 * Windows may run out of kernel buffers and return "Insufficient
@@ -1939,16 +1873,14 @@ retry:
                /* OK to retry if interrupted */
                if (errno == EINTR)
                        goto retry;
-
-               /* Trouble, so assume we don't know the file position anymore */
-               vfdP->seekPos = FileUnknownPos;
        }
 
        return returnCode;
 }
 
 int
-FileWrite(File file, char *buffer, int amount, uint32 wait_event_info)
+FileWrite(File file, char *buffer, int amount, off_t offset,
+                 uint32 wait_event_info)
 {
        int                     returnCode;
        Vfd                *vfdP;
@@ -1957,7 +1889,7 @@ FileWrite(File file, char *buffer, int amount, uint32 wait_event_info)
 
        DO_DB(elog(LOG, "FileWrite: %d (%s) " INT64_FORMAT " %d %p",
                           file, VfdCache[file].fileName,
-                          (int64) VfdCache[file].seekPos,
+                          (int64) offset,
                           amount, buffer));
 
        returnCode = FileAccess(file);
@@ -1976,26 +1908,13 @@ FileWrite(File file, char *buffer, int amount, uint32 wait_event_info)
         */
        if (temp_file_limit >= 0 && (vfdP->fdstate & FD_TEMP_FILE_LIMIT))
        {
-               off_t           newPos;
+               off_t           past_write = offset + amount;
 
-               /*
-                * Normally we should know the seek position, but if for some reason
-                * we have lost track of it, try again to get it.  Here, it's fine to
-                * throw an error if we still can't get it.
-                */
-               if (FilePosIsUnknown(vfdP->seekPos))
-               {
-                       vfdP->seekPos = lseek(vfdP->fd, (off_t) 0, SEEK_CUR);
-                       if (FilePosIsUnknown(vfdP->seekPos))
-                               elog(ERROR, "could not seek file \"%s\": %m", vfdP->fileName);
-               }
-
-               newPos = vfdP->seekPos + amount;
-               if (newPos > vfdP->fileSize)
+               if (past_write > vfdP->fileSize)
                {
                        uint64          newTotal = temporary_files_size;
 
-                       newTotal += newPos - vfdP->fileSize;
+                       newTotal += past_write - vfdP->fileSize;
                        if (newTotal > (uint64) temp_file_limit * (uint64) 1024)
                                ereport(ERROR,
                                                (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
@@ -2007,7 +1926,7 @@ FileWrite(File file, char *buffer, int amount, uint32 wait_event_info)
 retry:
        errno = 0;
        pgstat_report_wait_start(wait_event_info);
-       returnCode = write(vfdP->fd, buffer, amount);
+       returnCode = pg_pwrite(VfdCache[file].fd, buffer, amount, offset);
        pgstat_report_wait_end();
 
        /* if write didn't set errno, assume problem is no disk space */
@@ -2016,10 +1935,6 @@ retry:
 
        if (returnCode >= 0)
        {
-               /* if seekPos is unknown, leave it that way */
-               if (!FilePosIsUnknown(vfdP->seekPos))
-                       vfdP->seekPos += returnCode;
-
                /*
                 * Maintain fileSize and temporary_files_size if it's a temp file.
                 *
@@ -2029,12 +1944,12 @@ retry:
                 */
                if (vfdP->fdstate & FD_TEMP_FILE_LIMIT)
                {
-                       off_t           newPos = vfdP->seekPos;
+                       off_t           past_write = offset + amount;
 
-                       if (newPos > vfdP->fileSize)
+                       if (past_write > vfdP->fileSize)
                        {
-                               temporary_files_size += newPos - vfdP->fileSize;
-                               vfdP->fileSize = newPos;
+                               temporary_files_size += past_write - vfdP->fileSize;
+                               vfdP->fileSize = past_write;
                        }
                }
        }
@@ -2060,9 +1975,6 @@ retry:
                /* OK to retry if interrupted */
                if (errno == EINTR)
                        goto retry;
-
-               /* Trouble, so assume we don't know the file position anymore */
-               vfdP->seekPos = FileUnknownPos;
        }
 
        return returnCode;
@@ -2090,92 +2002,25 @@ FileSync(File file, uint32 wait_event_info)
 }
 
 off_t
-FileSeek(File file, off_t offset, int whence)
+FileSize(File file)
 {
        Vfd                *vfdP;
 
        Assert(FileIsValid(file));
 
-       DO_DB(elog(LOG, "FileSeek: %d (%s) " INT64_FORMAT " " INT64_FORMAT " %d",
-                          file, VfdCache[file].fileName,
-                          (int64) VfdCache[file].seekPos,
-                          (int64) offset, whence));
+       DO_DB(elog(LOG, "FileSize %d (%s)",
+                          file, VfdCache[file].fileName));
 
        vfdP = &VfdCache[file];
 
        if (FileIsNotOpen(file))
        {
-               switch (whence)
-               {
-                       case SEEK_SET:
-                               if (offset < 0)
-                               {
-                                       errno = EINVAL;
-                                       return (off_t) -1;
-                               }
-                               vfdP->seekPos = offset;
-                               break;
-                       case SEEK_CUR:
-                               if (FilePosIsUnknown(vfdP->seekPos) ||
-                                       vfdP->seekPos + offset < 0)
-                               {
-                                       errno = EINVAL;
-                                       return (off_t) -1;
-                               }
-                               vfdP->seekPos += offset;
-                               break;
-                       case SEEK_END:
-                               if (FileAccess(file) < 0)
-                                       return (off_t) -1;
-                               vfdP->seekPos = lseek(vfdP->fd, offset, whence);
-                               break;
-                       default:
-                               elog(ERROR, "invalid whence: %d", whence);
-                               break;
-               }
+               if (FileAccess(file) < 0)
+                       return (off_t) -1;
        }
-       else
-       {
-               switch (whence)
-               {
-                       case SEEK_SET:
-                               if (offset < 0)
-                               {
-                                       errno = EINVAL;
-                                       return (off_t) -1;
-                               }
-                               if (vfdP->seekPos != offset)
-                                       vfdP->seekPos = lseek(vfdP->fd, offset, whence);
-                               break;
-                       case SEEK_CUR:
-                               if (offset != 0 || FilePosIsUnknown(vfdP->seekPos))
-                                       vfdP->seekPos = lseek(vfdP->fd, offset, whence);
-                               break;
-                       case SEEK_END:
-                               vfdP->seekPos = lseek(vfdP->fd, offset, whence);
-                               break;
-                       default:
-                               elog(ERROR, "invalid whence: %d", whence);
-                               break;
-               }
-       }
-
-       return vfdP->seekPos;
-}
 
-/*
- * XXX not actually used but here for completeness
- */
-#ifdef NOT_USED
-off_t
-FileTell(File file)
-{
-       Assert(FileIsValid(file));
-       DO_DB(elog(LOG, "FileTell %d (%s)",
-                          file, VfdCache[file].fileName));
-       return VfdCache[file].seekPos;
+       return lseek(VfdCache[file].fd, 0, SEEK_END);
 }
-#endif
 
 int
 FileTruncate(File file, off_t offset, uint32 wait_event_info)
index f4374d077be981af072366fda757e7d2a9972de2..86013a5c8b2f093724d40d05c82d6d315bf04fde 100644 (file)
@@ -522,22 +522,7 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 
        Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
 
-       /*
-        * Note: because caller usually obtained blocknum by calling mdnblocks,
-        * which did a seek(SEEK_END), this seek is often redundant and will be
-        * optimized away by fd.c.  It's not redundant, however, if there is a
-        * partial page at the end of the file. In that case we want to try to
-        * overwrite the partial page with a full page.  It's also not redundant
-        * if bufmgr.c had to dump another buffer of the same file to make room
-        * for the new page's buffer.
-        */
-       if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
-               ereport(ERROR,
-                               (errcode_for_file_access(),
-                                errmsg("could not seek to block %u in file \"%s\": %m",
-                                               blocknum, FilePathName(v->mdfd_vfd))));
-
-       if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, WAIT_EVENT_DATA_FILE_EXTEND)) != BLCKSZ)
+       if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_EXTEND)) != BLCKSZ)
        {
                if (nbytes < 0)
                        ereport(ERROR,
@@ -748,13 +733,7 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 
        Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
 
-       if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
-               ereport(ERROR,
-                               (errcode_for_file_access(),
-                                errmsg("could not seek to block %u in file \"%s\": %m",
-                                               blocknum, FilePathName(v->mdfd_vfd))));
-
-       nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ, WAIT_EVENT_DATA_FILE_READ);
+       nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_READ);
 
        TRACE_POSTGRESQL_SMGR_MD_READ_DONE(forknum, blocknum,
                                                                           reln->smgr_rnode.node.spcNode,
@@ -824,13 +803,7 @@ mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 
        Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
 
-       if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
-               ereport(ERROR,
-                               (errcode_for_file_access(),
-                                errmsg("could not seek to block %u in file \"%s\": %m",
-                                               blocknum, FilePathName(v->mdfd_vfd))));
-
-       nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, WAIT_EVENT_DATA_FILE_WRITE);
+       nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_WRITE);
 
        TRACE_POSTGRESQL_SMGR_MD_WRITE_DONE(forknum, blocknum,
                                                                                reln->smgr_rnode.node.spcNode,
@@ -1979,7 +1952,7 @@ _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
 {
        off_t           len;
 
-       len = FileSeek(seg->mdfd_vfd, 0L, SEEK_END);
+       len = FileSize(seg->mdfd_vfd);
        if (len < 0)
                ereport(ERROR,
                                (errcode_for_file_access(),
index 8e7c9728f4b3c3c969d8ec15617854e51df4bb12..1289589a46b4a5c776eb421b0ddf86ba0f3f2e79 100644 (file)
@@ -15,7 +15,7 @@
 /*
  * calls:
  *
- *     File {Close, Read, Write, Seek, Tell, Sync}
+ *     File {Close, Read, Write, Size, Sync}
  *     {Path Name Open, Allocate, Free} File
  *
  * These are NOT JUST RENAMINGS OF THE UNIX ROUTINES.
 #include <dirent.h>
 
 
-/*
- * FileSeek uses the standard UNIX lseek(2) flags.
- */
-
 typedef int File;
 
 
@@ -68,10 +64,10 @@ extern File PathNameOpenFilePerm(const char *fileName, int fileFlags, mode_t fil
 extern File OpenTemporaryFile(bool interXact);
 extern void FileClose(File file);
 extern int     FilePrefetch(File file, off_t offset, int amount, uint32 wait_event_info);
-extern int     FileRead(File file, char *buffer, int amount, uint32 wait_event_info);
-extern int     FileWrite(File file, char *buffer, int amount, uint32 wait_event_info);
+extern int     FileRead(File file, char *buffer, int amount, off_t offset, uint32 wait_event_info);
+extern int     FileWrite(File file, char *buffer, int amount, off_t offset, uint32 wait_event_info);
 extern int     FileSync(File file, uint32 wait_event_info);
-extern off_t FileSeek(File file, off_t offset, int whence);
+extern off_t FileSize(File file);
 extern int     FileTruncate(File file, off_t offset, uint32 wait_event_info);
 extern void FileWriteback(File file, off_t offset, off_t nbytes, uint32 wait_event_info);
 extern char *FilePathName(File file);