diff options
author | Tom Lane | 2012-07-17 20:55:39 +0000 |
---|---|---|
committer | Tom Lane | 2012-07-17 20:56:54 +0000 |
commit | 73b796a52c50d6f44400c99eff1a01c89d08782f (patch) | |
tree | d7f4cfff90c2d4412f4db45adb15ba957a56255e /src/include | |
parent | 71f2dd23210f9607d1584fad89e0f8df9750e921 (diff) |
Improve coding around the fsync request queue.
In all branches back to 8.3, this patch fixes a questionable assumption in
CompactCheckpointerRequestQueue/CompactBgwriterRequestQueue that there are
no uninitialized pad bytes in the request queue structs. This would only
cause trouble if (a) there were such pad bytes, which could happen in 8.4
and up if the compiler makes enum ForkNumber narrower than 32 bits, but
otherwise would require not-currently-planned changes in the widths of
other typedefs; and (b) the kernel has not uniformly initialized the
contents of shared memory to zeroes. Still, it seems a tad risky, and we
can easily remove any risk by pre-zeroing the request array for ourselves.
In addition to that, we need to establish a coding rule that struct
RelFileNode can't contain any padding bytes, since such structs are copied
into the request array verbatim. (There are other places that are assuming
this anyway, it turns out.)
In 9.1 and up, the risk was a bit larger because we were also effectively
assuming that struct RelFileNodeBackend contained no pad bytes, and with
fields of different types in there, that would be much easier to break.
However, there is no good reason to ever transmit fsync or delete requests
for temp files to the bgwriter/checkpointer, so we can revert the request
structs to plain RelFileNode, getting rid of the padding risk and saving
some marginal number of bytes and cycles in fsync queue manipulation while
we are at it. The savings might be more than marginal during deletion of
a temp relation, because the old code transmitted an entirely useless but
nonetheless expensive-to-process ForgetRelationFsync request to the
background process, and also had the background process perform the file
deletion even though that can safely be done immediately.
In addition, make some cleanup of nearby comments and small improvements to
the code in CompactCheckpointerRequestQueue/CompactBgwriterRequestQueue.
Diffstat (limited to 'src/include')
-rw-r--r-- | src/include/postmaster/bgwriter.h | 2 | ||||
-rw-r--r-- | src/include/storage/relfilenode.h | 16 | ||||
-rw-r--r-- | src/include/storage/smgr.h | 7 |
3 files changed, 18 insertions, 7 deletions
diff --git a/src/include/postmaster/bgwriter.h b/src/include/postmaster/bgwriter.h index 996065c2edf..2e97e6aea55 100644 --- a/src/include/postmaster/bgwriter.h +++ b/src/include/postmaster/bgwriter.h @@ -31,7 +31,7 @@ extern void CheckpointerMain(void) __attribute__((noreturn)); extern void RequestCheckpoint(int flags); extern void CheckpointWriteDelay(int flags, double progress); -extern bool ForwardFsyncRequest(RelFileNodeBackend rnode, ForkNumber forknum, +extern bool ForwardFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno); extern void AbsorbFsyncRequests(void); diff --git a/src/include/storage/relfilenode.h b/src/include/storage/relfilenode.h index 60c38295375..5ec1d8f7177 100644 --- a/src/include/storage/relfilenode.h +++ b/src/include/storage/relfilenode.h @@ -69,6 +69,10 @@ typedef enum ForkNumber * Note: in pg_class, relfilenode can be zero to denote that the relation * is a "mapped" relation, whose current true filenode number is available * from relmapper.c. Again, this case is NOT allowed in RelFileNodes. + * + * Note: various places use RelFileNode in hashtable keys. Therefore, + * there *must not* be any unused padding bytes in this struct. That + * should be safe as long as all the fields are of type Oid. */ typedef struct RelFileNode { @@ -79,7 +83,11 @@ typedef struct RelFileNode /* * Augmenting a relfilenode with the backend ID provides all the information - * we need to locate the physical storage. + * we need to locate the physical storage. The backend ID is InvalidBackendId + * for regular relations (those accessible to more than one backend), or the + * owning backend's ID for backend-local relations. Backend-local relations + * are always transient and removed in case of a database crash; they are + * never WAL-logged or fsync'd. */ typedef struct RelFileNodeBackend { @@ -87,11 +95,15 @@ typedef struct RelFileNodeBackend BackendId backend; } RelFileNodeBackend; +#define RelFileNodeBackendIsTemp(rnode) \ + ((rnode).backend != InvalidBackendId) + /* * Note: RelFileNodeEquals and RelFileNodeBackendEquals compare relNode first * since that is most likely to be different in two unequal RelFileNodes. It * is probably redundant to compare spcNode if the other fields are found equal, - * but do it anyway to be sure. + * but do it anyway to be sure. Likewise for checking the backend ID in + * RelFileNodeBackendEquals. */ #define RelFileNodeEquals(node1, node2) \ ((node1).relNode == (node2).relNode && \ diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h index f8fc2b2d6e8..3560d539076 100644 --- a/src/include/storage/smgr.h +++ b/src/include/storage/smgr.h @@ -69,7 +69,7 @@ typedef struct SMgrRelationData typedef SMgrRelationData *SMgrRelation; #define SmgrIsTemp(smgr) \ - ((smgr)->smgr_rnode.backend != InvalidBackendId) + RelFileNodeBackendIsTemp((smgr)->smgr_rnode) extern void smgrinit(void); extern SMgrRelation smgropen(RelFileNode rnode, BackendId backend); @@ -124,10 +124,9 @@ extern void mdsync(void); extern void mdpostckpt(void); extern void SetForwardFsyncRequests(void); -extern void RememberFsyncRequest(RelFileNodeBackend rnode, ForkNumber forknum, +extern void RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno); -extern void ForgetRelationFsyncRequests(RelFileNodeBackend rnode, - ForkNumber forknum); +extern void ForgetRelationFsyncRequests(RelFileNode rnode, ForkNumber forknum); extern void ForgetDatabaseFsyncRequests(Oid dbid); /* smgrtype.c */ |