Track I/O timing for temporary file blocks in EXPLAIN (BUFFERS)
authorMichael Paquier <michael@paquier.xyz>
Fri, 8 Apr 2022 02:27:21 +0000 (11:27 +0900)
committerMichael Paquier <michael@paquier.xyz>
Fri, 8 Apr 2022 02:27:21 +0000 (11:27 +0900)
Previously, the output of EXPLAIN (BUFFERS) option showed only the I/O
timing spent reading and writing shared and local buffers.  This commit
adds on top of that the I/O timing for temporary buffers in the output
of EXPLAIN (for spilled external sorts, hashes, materialization. etc).
This can be helpful for users in cases where the I/O related to
temporary buffers is the bottleneck.

Like its cousin, this information is available only when track_io_timing
is enabled.  Playing the patch, this is showing an extra overhead of up
to 1% even when using gettimeofday() as implementation for interval
timings, which is slightly within the usual range noise still that's
measurable.

Author: Masahiko Sawada
Reviewed-by: Georgios Kokolatos, Melanie Plageman, Julien Rouhaud,
Ranier Vilela
Discussion: https://postgr.es/m/CAD21AoAJgotTeP83p6HiAGDhs_9Fw9pZ2J=_tYTsiO5Ob-V5GQ@mail.gmail.com

doc/src/sgml/ref/explain.sgml
src/backend/commands/explain.c
src/backend/executor/instrument.c
src/backend/storage/file/buffile.c
src/include/executor/instrument.h
src/test/regress/expected/explain.out
src/test/regress/sql/explain.sql

index 4d758fb237e3994d56bedcbcb5642837953fa0bf..857c4e6e867e8e606b076ad1e6be684352979334 100644 (file)
@@ -174,14 +174,15 @@ ROLLBACK;
       Include information on buffer usage. Specifically, include the number of
       shared blocks hit, read, dirtied, and written, the number of local blocks
       hit, read, dirtied, and written, the number of temp blocks read and
-      written, and the time spent reading and writing data file blocks
-      (in milliseconds) if <xref linkend="guc-track-io-timing"/> is enabled.
-      A <emphasis>hit</emphasis> means that a read was avoided because the block was
-      found already in cache when needed.
+      written, and the time spent reading and writing data file blocks and
+      temporary file blocks (in milliseconds) if
+      <xref linkend="guc-track-io-timing"/> is enabled.  A
+      <emphasis>hit</emphasis> means that a read was avoided because the block
+      was found already in cache when needed.
       Shared blocks contain data from regular tables and indexes;
       local blocks contain data from temporary tables and indexes;
-      while temp blocks contain short-term working data used in sorts, hashes,
-      Materialize plan nodes, and similar cases.
+      while temporary blocks contain short-term working data used in sorts,
+      hashes, Materialize plan nodes, and similar cases.
       The number of blocks <emphasis>dirtied</emphasis> indicates the number of
       previously unmodified blocks that were changed by this query; while the
       number of blocks <emphasis>written</emphasis> indicates the number of
index 512b41dc3a2da8263b1b228c3269a27a00314abd..d2a24798220f0e9d0c7d0524f2ed1cc604a41f68 100644 (file)
@@ -3521,8 +3521,11 @@ show_buffer_usage(ExplainState *es, const BufferUsage *usage, bool planning)
                                                                usage->temp_blks_written > 0);
                bool            has_timing = (!INSTR_TIME_IS_ZERO(usage->blk_read_time) ||
                                                                  !INSTR_TIME_IS_ZERO(usage->blk_write_time));
+               bool            has_temp_timing = (!INSTR_TIME_IS_ZERO(usage->temp_blk_read_time) ||
+                                                                          !INSTR_TIME_IS_ZERO(usage->temp_blk_write_time));
                bool            show_planning = (planning && (has_shared ||
-                                                                                                 has_local || has_temp || has_timing));
+                                                                                                 has_local || has_temp || has_timing ||
+                                                                                                 has_temp_timing));
 
                if (show_planning)
                {
@@ -3587,16 +3590,33 @@ show_buffer_usage(ExplainState *es, const BufferUsage *usage, bool planning)
                }
 
                /* As above, show only positive counter values. */
-               if (has_timing)
+               if (has_timing || has_temp_timing)
                {
                        ExplainIndentText(es);
                        appendStringInfoString(es->str, "I/O Timings:");
-                       if (!INSTR_TIME_IS_ZERO(usage->blk_read_time))
-                               appendStringInfo(es->str, " read=%0.3f",
-                                                                INSTR_TIME_GET_MILLISEC(usage->blk_read_time));
-                       if (!INSTR_TIME_IS_ZERO(usage->blk_write_time))
-                               appendStringInfo(es->str, " write=%0.3f",
-                                                                INSTR_TIME_GET_MILLISEC(usage->blk_write_time));
+
+                       if (has_timing)
+                       {
+                               appendStringInfoString(es->str, " shared/local");
+                               if (!INSTR_TIME_IS_ZERO(usage->blk_read_time))
+                                       appendStringInfo(es->str, " read=%0.3f",
+                                                                        INSTR_TIME_GET_MILLISEC(usage->blk_read_time));
+                               if (!INSTR_TIME_IS_ZERO(usage->blk_write_time))
+                                       appendStringInfo(es->str, " write=%0.3f",
+                                                                        INSTR_TIME_GET_MILLISEC(usage->blk_write_time));
+                               if (has_temp_timing)
+                                       appendStringInfoChar(es->str, ',');
+                       }
+                       if (has_temp_timing)
+                       {
+                               appendStringInfoString(es->str, " temp");
+                               if (!INSTR_TIME_IS_ZERO(usage->temp_blk_read_time))
+                                       appendStringInfo(es->str, " read=%0.3f",
+                                                                        INSTR_TIME_GET_MILLISEC(usage->temp_blk_read_time));
+                               if (!INSTR_TIME_IS_ZERO(usage->temp_blk_write_time))
+                                       appendStringInfo(es->str, " write=%0.3f",
+                                                                        INSTR_TIME_GET_MILLISEC(usage->temp_blk_write_time));
+                       }
                        appendStringInfoChar(es->str, '\n');
                }
 
@@ -3633,6 +3653,12 @@ show_buffer_usage(ExplainState *es, const BufferUsage *usage, bool planning)
                        ExplainPropertyFloat("I/O Write Time", "ms",
                                                                 INSTR_TIME_GET_MILLISEC(usage->blk_write_time),
                                                                 3, es);
+                       ExplainPropertyFloat("Temp I/O Read Time", "ms",
+                                                                INSTR_TIME_GET_MILLISEC(usage->temp_blk_read_time),
+                                                                3, es);
+                       ExplainPropertyFloat("Temp I/O Write Time", "ms",
+                                                                INSTR_TIME_GET_MILLISEC(usage->temp_blk_write_time),
+                                                                3, es);
                }
        }
 }
index c5ff02a84247975942335c15403f92401017ff67..ceff4727d4ae1e5181c6ec8108e34fe0c77309cc 100644 (file)
@@ -237,6 +237,8 @@ BufferUsageAdd(BufferUsage *dst, const BufferUsage *add)
        dst->temp_blks_written += add->temp_blks_written;
        INSTR_TIME_ADD(dst->blk_read_time, add->blk_read_time);
        INSTR_TIME_ADD(dst->blk_write_time, add->blk_write_time);
+       INSTR_TIME_ADD(dst->temp_blk_read_time, add->temp_blk_read_time);
+       INSTR_TIME_ADD(dst->temp_blk_write_time, add->temp_blk_write_time);
 }
 
 /* dst += add - sub */
@@ -259,6 +261,10 @@ BufferUsageAccumDiff(BufferUsage *dst,
                                                  add->blk_read_time, sub->blk_read_time);
        INSTR_TIME_ACCUM_DIFF(dst->blk_write_time,
                                                  add->blk_write_time, sub->blk_write_time);
+       INSTR_TIME_ACCUM_DIFF(dst->temp_blk_read_time,
+                                                 add->temp_blk_read_time, sub->temp_blk_read_time);
+       INSTR_TIME_ACCUM_DIFF(dst->temp_blk_write_time,
+                                                 add->temp_blk_write_time, sub->temp_blk_write_time);
 }
 
 /* helper functions for WAL usage accumulation */
index 3108e96dc927dd213924a6728d7f084bc11a76af..56b88594cc8dbf8a00069069bfd6024bc3318b1f 100644 (file)
@@ -429,6 +429,8 @@ static void
 BufFileLoadBuffer(BufFile *file)
 {
        File            thisfile;
+       instr_time      io_start;
+       instr_time      io_time;
 
        /*
         * Advance to next component file if necessary and possible.
@@ -440,10 +442,14 @@ BufFileLoadBuffer(BufFile *file)
                file->curOffset = 0L;
        }
 
+       thisfile = file->files[file->curFile];
+
+       if (track_io_timing)
+               INSTR_TIME_SET_CURRENT(io_start);
+
        /*
         * Read whatever we can get, up to a full bufferload.
         */
-       thisfile = file->files[file->curFile];
        file->nbytes = FileRead(thisfile,
                                                        file->buffer.data,
                                                        sizeof(file->buffer),
@@ -458,6 +464,13 @@ BufFileLoadBuffer(BufFile *file)
                                                FilePathName(thisfile))));
        }
 
+       if (track_io_timing)
+       {
+               INSTR_TIME_SET_CURRENT(io_time);
+               INSTR_TIME_SUBTRACT(io_time, io_start);
+               INSTR_TIME_ADD(pgBufferUsage.temp_blk_read_time, io_time);
+       }
+
        /* we choose not to advance curOffset here */
 
        if (file->nbytes > 0)
@@ -485,6 +498,8 @@ BufFileDumpBuffer(BufFile *file)
        while (wpos < file->nbytes)
        {
                off_t           availbytes;
+               instr_time      io_start;
+               instr_time      io_time;
 
                /*
                 * Advance to next component file if necessary and possible.
@@ -507,6 +522,10 @@ BufFileDumpBuffer(BufFile *file)
                        bytestowrite = (int) availbytes;
 
                thisfile = file->files[file->curFile];
+
+               if (track_io_timing)
+                       INSTR_TIME_SET_CURRENT(io_start);
+
                bytestowrite = FileWrite(thisfile,
                                                                 file->buffer.data + wpos,
                                                                 bytestowrite,
@@ -517,6 +536,14 @@ BufFileDumpBuffer(BufFile *file)
                                        (errcode_for_file_access(),
                                         errmsg("could not write to file \"%s\": %m",
                                                        FilePathName(thisfile))));
+
+               if (track_io_timing)
+               {
+                       INSTR_TIME_SET_CURRENT(io_time);
+                       INSTR_TIME_SUBTRACT(io_time, io_start);
+                       INSTR_TIME_ADD(pgBufferUsage.temp_blk_write_time, io_time);
+               }
+
                file->curOffset += bytestowrite;
                wpos += bytestowrite;
 
index 1b7157bdd152768762f2f9ea214accae2e704772..2945cce3a97f4e2722627c89f3fa244c2290c648 100644 (file)
@@ -33,8 +33,10 @@ typedef struct BufferUsage
        int64           local_blks_written; /* # of local disk blocks written */
        int64           temp_blks_read; /* # of temp blocks read */
        int64           temp_blks_written;      /* # of temp blocks written */
-       instr_time      blk_read_time;  /* time spent reading */
-       instr_time      blk_write_time; /* time spent writing */
+       instr_time      blk_read_time;  /* time spent reading blocks */
+       instr_time      blk_write_time; /* time spent writing blocks */
+       instr_time      temp_blk_read_time; /* time spent reading temp blocks */
+       instr_time      temp_blk_write_time;    /* time spent writing temp blocks */
 } BufferUsage;
 
 /*
index bc361759219a1dde6fce83b8baae76a74fca63b0..48620edbc2be5118ded9418cc938a849536ee7eb 100644 (file)
@@ -85,56 +85,6 @@ select explain_filter('explain (analyze, buffers, format text) select * from int
  Execution Time: N.N ms
 (3 rows)
 
-select explain_filter('explain (analyze, buffers, format json) select * from int8_tbl i8');
-           explain_filter           
-------------------------------------
- [                                 +
-   {                               +
-     "Plan": {                     +
-       "Node Type": "Seq Scan",    +
-       "Parallel Aware": false,    +
-       "Async Capable": false,     +
-       "Relation Name": "int8_tbl",+
-       "Alias": "i8",              +
-       "Startup Cost": N.N,        +
-       "Total Cost": N.N,          +
-       "Plan Rows": N,             +
-       "Plan Width": N,            +
-       "Actual Startup Time": N.N, +
-       "Actual Total Time": N.N,   +
-       "Actual Rows": N,           +
-       "Actual Loops": N,          +
-       "Shared Hit Blocks": N,     +
-       "Shared Read Blocks": N,    +
-       "Shared Dirtied Blocks": N, +
-       "Shared Written Blocks": N, +
-       "Local Hit Blocks": N,      +
-       "Local Read Blocks": N,     +
-       "Local Dirtied Blocks": N,  +
-       "Local Written Blocks": N,  +
-       "Temp Read Blocks": N,      +
-       "Temp Written Blocks": N    +
-     },                            +
-     "Planning": {                 +
-       "Shared Hit Blocks": N,     +
-       "Shared Read Blocks": N,    +
-       "Shared Dirtied Blocks": N, +
-       "Shared Written Blocks": N, +
-       "Local Hit Blocks": N,      +
-       "Local Read Blocks": N,     +
-       "Local Dirtied Blocks": N,  +
-       "Local Written Blocks": N,  +
-       "Temp Read Blocks": N,      +
-       "Temp Written Blocks": N    +
-     },                            +
-     "Planning Time": N.N,         +
-     "Triggers": [                 +
-     ],                            +
-     "Execution Time": N.N         +
-   }                               +
- ]
-(1 row)
-
 select explain_filter('explain (analyze, buffers, format xml) select * from int8_tbl i8');
                      explain_filter                     
 --------------------------------------------------------
@@ -276,6 +226,68 @@ select explain_filter('explain (buffers, format json) select * from int8_tbl i8'
  ]
 (1 row)
 
+-- Check output including I/O timings.  These fields are conditional
+-- but always set in JSON format, so check them only in this case.
+set track_io_timing = on;
+select explain_filter('explain (analyze, buffers, format json) select * from int8_tbl i8');
+           explain_filter           
+------------------------------------
+ [                                 +
+   {                               +
+     "Plan": {                     +
+       "Node Type": "Seq Scan",    +
+       "Parallel Aware": false,    +
+       "Async Capable": false,     +
+       "Relation Name": "int8_tbl",+
+       "Alias": "i8",              +
+       "Startup Cost": N.N,        +
+       "Total Cost": N.N,          +
+       "Plan Rows": N,             +
+       "Plan Width": N,            +
+       "Actual Startup Time": N.N, +
+       "Actual Total Time": N.N,   +
+       "Actual Rows": N,           +
+       "Actual Loops": N,          +
+       "Shared Hit Blocks": N,     +
+       "Shared Read Blocks": N,    +
+       "Shared Dirtied Blocks": N, +
+       "Shared Written Blocks": N, +
+       "Local Hit Blocks": N,      +
+       "Local Read Blocks": N,     +
+       "Local Dirtied Blocks": N,  +
+       "Local Written Blocks": N,  +
+       "Temp Read Blocks": N,      +
+       "Temp Written Blocks": N,   +
+       "I/O Read Time": N.N,       +
+       "I/O Write Time": N.N,      +
+       "Temp I/O Read Time": N.N,  +
+       "Temp I/O Write Time": N.N  +
+     },                            +
+     "Planning": {                 +
+       "Shared Hit Blocks": N,     +
+       "Shared Read Blocks": N,    +
+       "Shared Dirtied Blocks": N, +
+       "Shared Written Blocks": N, +
+       "Local Hit Blocks": N,      +
+       "Local Read Blocks": N,     +
+       "Local Dirtied Blocks": N,  +
+       "Local Written Blocks": N,  +
+       "Temp Read Blocks": N,      +
+       "Temp Written Blocks": N,   +
+       "I/O Read Time": N.N,       +
+       "I/O Write Time": N.N,      +
+       "Temp I/O Read Time": N.N,  +
+       "Temp I/O Write Time": N.N  +
+     },                            +
+     "Planning Time": N.N,         +
+     "Triggers": [                 +
+     ],                            +
+     "Execution Time": N.N         +
+   }                               +
+ ]
+(1 row)
+
+set track_io_timing = off;
 -- SETTINGS option
 -- We have to ignore other settings that might be imposed by the environment,
 -- so printing the whole Settings field unfortunately won't do.
index 5069fa87957a4d4ea76ae191a6d5ef1b7c3d99f6..ae3f7a308d7dde933d4fa7b90b4c2be38aaaa64a 100644 (file)
@@ -65,12 +65,17 @@ select explain_filter('explain select * from int8_tbl i8');
 select explain_filter('explain (analyze) select * from int8_tbl i8');
 select explain_filter('explain (analyze, verbose) select * from int8_tbl i8');
 select explain_filter('explain (analyze, buffers, format text) select * from int8_tbl i8');
-select explain_filter('explain (analyze, buffers, format json) select * from int8_tbl i8');
 select explain_filter('explain (analyze, buffers, format xml) select * from int8_tbl i8');
 select explain_filter('explain (analyze, buffers, format yaml) select * from int8_tbl i8');
 select explain_filter('explain (buffers, format text) select * from int8_tbl i8');
 select explain_filter('explain (buffers, format json) select * from int8_tbl i8');
 
+-- Check output including I/O timings.  These fields are conditional
+-- but always set in JSON format, so check them only in this case.
+set track_io_timing = on;
+select explain_filter('explain (analyze, buffers, format json) select * from int8_tbl i8');
+set track_io_timing = off;
+
 -- SETTINGS option
 -- We have to ignore other settings that might be imposed by the environment,
 -- so printing the whole Settings field unfortunately won't do.