Add contrib/pg_walinspect.
authorJeff Davis <jdavis@postgresql.org>
Fri, 8 Apr 2022 07:02:10 +0000 (00:02 -0700)
committerJeff Davis <jdavis@postgresql.org>
Fri, 8 Apr 2022 07:26:44 +0000 (00:26 -0700)
Provides similar functionality to pg_waldump, but from a SQL interface
rather than a separate utility.

Author: Bharath Rupireddy
Reviewed-by: Greg Stark, Kyotaro Horiguchi, Andres Freund, Ashutosh Sharma, Nitin Jadhav, RKN Sai Krishna
Discussion: https://postgr.es/m/CALj2ACUGUYXsEQdKhEdsBzhGEyF3xggvLdD8C0VT72TNEfOiog%40mail.gmail.com

25 files changed:
contrib/Makefile
contrib/pg_walinspect/.gitignore [new file with mode: 0644]
contrib/pg_walinspect/Makefile [new file with mode: 0644]
contrib/pg_walinspect/expected/pg_walinspect.out [new file with mode: 0644]
contrib/pg_walinspect/pg_walinspect--1.0.sql [new file with mode: 0644]
contrib/pg_walinspect/pg_walinspect.c [new file with mode: 0644]
contrib/pg_walinspect/pg_walinspect.control [new file with mode: 0644]
contrib/pg_walinspect/sql/pg_walinspect.sql [new file with mode: 0644]
doc/src/sgml/contrib.sgml
doc/src/sgml/filelist.sgml
doc/src/sgml/func.sgml
doc/src/sgml/pgwalinspect.sgml [new file with mode: 0644]
src/backend/access/rmgrdesc/xlogdesc.c
src/backend/access/transam/Makefile
src/backend/access/transam/xlogreader.c
src/backend/access/transam/xlogstats.c [new file with mode: 0644]
src/backend/access/transam/xlogutils.c
src/bin/pg_waldump/.gitignore
src/bin/pg_waldump/Makefile
src/bin/pg_waldump/pg_waldump.c
src/include/access/xlog.h
src/include/access/xlog_internal.h
src/include/access/xlogreader.h
src/include/access/xlogstats.h [new file with mode: 0644]
src/include/access/xlogutils.h

index 332b486ecc2026267f0fe83d582ee66762f86b77..bbf220407b0bfc79f1b64b5eca07fae5b86b186e 100644 (file)
@@ -41,6 +41,7 @@ SUBDIRS = \
                pgrowlocks      \
                pgstattuple     \
                pg_visibility   \
+               pg_walinspect   \
                postgres_fdw    \
                seg             \
                spi             \
diff --git a/contrib/pg_walinspect/.gitignore b/contrib/pg_walinspect/.gitignore
new file mode 100644 (file)
index 0000000..5dcb3ff
--- /dev/null
@@ -0,0 +1,4 @@
+# Generated subdirectories
+/log/
+/results/
+/tmp_check/
diff --git a/contrib/pg_walinspect/Makefile b/contrib/pg_walinspect/Makefile
new file mode 100644 (file)
index 0000000..350845c
--- /dev/null
@@ -0,0 +1,23 @@
+# contrib/pg_walinspect/Makefile
+
+MODULE_big = pg_walinspect
+OBJS = \
+       $(WIN32RES) \
+       pg_walinspect.o
+PGFILEDESC = "pg_walinspect - functions to inspect contents of PostgreSQL Write-Ahead Log"
+
+EXTENSION = pg_walinspect
+DATA = pg_walinspect--1.0.sql
+
+REGRESS = pg_walinspect
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/pg_walinspect
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/contrib/pg_walinspect/expected/pg_walinspect.out b/contrib/pg_walinspect/expected/pg_walinspect.out
new file mode 100644 (file)
index 0000000..634b1bb
--- /dev/null
@@ -0,0 +1,165 @@
+CREATE EXTENSION pg_walinspect;
+CREATE TABLE sample_tbl(col1 int, col2 int);
+-- Make sure checkpoints don't interfere with the test.
+SELECT lsn as wal_lsn1 FROM
+  pg_create_physical_replication_slot('regress_pg_walinspect_slot', true, false)
+  \gset
+INSERT INTO sample_tbl SELECT * FROM generate_series(1, 2);
+SELECT pg_current_wal_lsn() AS wal_lsn2 \gset
+INSERT INTO sample_tbl SELECT * FROM generate_series(1, 2);
+-- ===================================================================
+-- Tests for input validation
+-- ===================================================================
+SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_records_info(:'wal_lsn2', :'wal_lsn1'); -- ERROR
+ERROR:  WAL start LSN must be less than end LSN
+SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_stats(:'wal_lsn2', :'wal_lsn1'); -- ERROR
+ERROR:  WAL start LSN must be less than end LSN
+-- ===================================================================
+-- Tests for all function executions
+-- ===================================================================
+SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_record_info(:'wal_lsn1');
+ ok 
+----
+ t
+(1 row)
+
+SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_records_info(:'wal_lsn1', :'wal_lsn2');
+ ok 
+----
+ t
+(1 row)
+
+SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_records_info_till_end_of_wal(:'wal_lsn1');
+ ok 
+----
+ t
+(1 row)
+
+SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_stats(:'wal_lsn1', :'wal_lsn2');
+ ok 
+----
+ t
+(1 row)
+
+SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_stats_till_end_of_wal(:'wal_lsn1');
+ ok 
+----
+ t
+(1 row)
+
+-- ===================================================================
+-- Test for filtering out WAL records of a particular table
+-- ===================================================================
+SELECT oid AS sample_tbl_oid FROM pg_class WHERE relname = 'sample_tbl' \gset
+SELECT COUNT(*) >= 1 AS ok FROM pg_get_wal_records_info(:'wal_lsn1', :'wal_lsn2')
+                       WHERE block_ref LIKE concat('%', :'sample_tbl_oid', '%') AND resource_manager = 'Heap';
+ ok 
+----
+ t
+(1 row)
+
+-- ===================================================================
+-- Test for filtering out WAL records based on resource_manager and
+-- record_type
+-- ===================================================================
+SELECT COUNT(*) >= 1 AS ok FROM pg_get_wal_records_info(:'wal_lsn1', :'wal_lsn2')
+                       WHERE resource_manager = 'Heap' AND record_type = 'INSERT';
+ ok 
+----
+ t
+(1 row)
+
+-- ===================================================================
+-- Tests for permissions
+-- ===================================================================
+CREATE ROLE regress_pg_walinspect;
+SELECT has_function_privilege('regress_pg_walinspect',
+  'pg_get_wal_record_info(pg_lsn)', 'EXECUTE'); -- no
+ has_function_privilege 
+------------------------
+ f
+(1 row)
+
+SELECT has_function_privilege('regress_pg_walinspect',
+  'pg_get_wal_records_info(pg_lsn, pg_lsn) ', 'EXECUTE'); -- no
+ has_function_privilege 
+------------------------
+ f
+(1 row)
+
+SELECT has_function_privilege('regress_pg_walinspect',
+  'pg_get_wal_stats(pg_lsn, pg_lsn, boolean) ', 'EXECUTE'); -- no
+ has_function_privilege 
+------------------------
+ f
+(1 row)
+
+-- Functions accessible by users with role pg_read_server_files
+GRANT pg_read_server_files TO regress_pg_walinspect;
+SELECT has_function_privilege('regress_pg_walinspect',
+  'pg_get_wal_record_info(pg_lsn)', 'EXECUTE'); -- yes
+ has_function_privilege 
+------------------------
+ t
+(1 row)
+
+SELECT has_function_privilege('regress_pg_walinspect',
+  'pg_get_wal_records_info(pg_lsn, pg_lsn) ', 'EXECUTE'); -- yes
+ has_function_privilege 
+------------------------
+ t
+(1 row)
+
+SELECT has_function_privilege('regress_pg_walinspect',
+  'pg_get_wal_stats(pg_lsn, pg_lsn, boolean) ', 'EXECUTE'); -- yes
+ has_function_privilege 
+------------------------
+ t
+(1 row)
+
+REVOKE pg_read_server_files FROM regress_pg_walinspect;
+-- Superuser can grant execute to other users
+GRANT EXECUTE ON FUNCTION pg_get_wal_record_info(pg_lsn)
+  TO regress_pg_walinspect;
+GRANT EXECUTE ON FUNCTION pg_get_wal_records_info(pg_lsn, pg_lsn)
+  TO regress_pg_walinspect;
+GRANT EXECUTE ON FUNCTION pg_get_wal_stats(pg_lsn, pg_lsn, boolean)
+  TO regress_pg_walinspect;
+SELECT has_function_privilege('regress_pg_walinspect',
+  'pg_get_wal_record_info(pg_lsn)', 'EXECUTE'); -- yes
+ has_function_privilege 
+------------------------
+ t
+(1 row)
+
+SELECT has_function_privilege('regress_pg_walinspect',
+  'pg_get_wal_records_info(pg_lsn, pg_lsn) ', 'EXECUTE'); -- yes
+ has_function_privilege 
+------------------------
+ t
+(1 row)
+
+SELECT has_function_privilege('regress_pg_walinspect',
+  'pg_get_wal_stats(pg_lsn, pg_lsn, boolean) ', 'EXECUTE'); -- yes
+ has_function_privilege 
+------------------------
+ t
+(1 row)
+
+REVOKE EXECUTE ON FUNCTION pg_get_wal_record_info(pg_lsn)
+  FROM regress_pg_walinspect;
+REVOKE EXECUTE ON FUNCTION pg_get_wal_records_info(pg_lsn, pg_lsn)
+  FROM regress_pg_walinspect;
+REVOKE EXECUTE ON FUNCTION pg_get_wal_stats(pg_lsn, pg_lsn, boolean)
+  FROM regress_pg_walinspect;
+-- ===================================================================
+-- Clean up
+-- ===================================================================
+DROP ROLE regress_pg_walinspect;
+SELECT pg_drop_replication_slot('regress_pg_walinspect_slot');
+ pg_drop_replication_slot 
+--------------------------
+(1 row)
+
+DROP TABLE sample_tbl;
diff --git a/contrib/pg_walinspect/pg_walinspect--1.0.sql b/contrib/pg_walinspect/pg_walinspect--1.0.sql
new file mode 100644 (file)
index 0000000..4243516
--- /dev/null
@@ -0,0 +1,118 @@
+/* contrib/pg_walinspect/pg_walinspect--1.0.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION pg_walinspect" to load this file. \quit
+
+--
+-- pg_get_wal_record_info()
+--
+CREATE FUNCTION pg_get_wal_record_info(IN in_lsn pg_lsn,
+    OUT start_lsn pg_lsn,
+    OUT end_lsn pg_lsn,
+    OUT prev_lsn pg_lsn,
+    OUT xid xid,
+    OUT resource_manager text,
+    OUT record_type text,
+    OUT record_length int4,
+    OUT main_data_length int4,
+    OUT fpi_length int4,
+    OUT description text,
+    OUT block_ref text
+)
+AS 'MODULE_PATHNAME', 'pg_get_wal_record_info'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+REVOKE EXECUTE ON FUNCTION pg_get_wal_record_info(pg_lsn) FROM PUBLIC;
+GRANT EXECUTE ON FUNCTION pg_get_wal_record_info(pg_lsn) TO pg_read_server_files;
+
+--
+-- pg_get_wal_records_info()
+--
+CREATE FUNCTION pg_get_wal_records_info(IN start_lsn pg_lsn,
+    IN end_lsn pg_lsn,
+    OUT start_lsn pg_lsn,
+    OUT end_lsn pg_lsn,
+    OUT prev_lsn pg_lsn,
+    OUT xid xid,
+    OUT resource_manager text,
+    OUT record_type text,
+    OUT record_length int4,
+    OUT main_data_length int4,
+    OUT fpi_length int4,
+    OUT description text,
+    OUT block_ref text
+)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME', 'pg_get_wal_records_info'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+REVOKE EXECUTE ON FUNCTION pg_get_wal_records_info(pg_lsn, pg_lsn) FROM PUBLIC;
+GRANT EXECUTE ON FUNCTION pg_get_wal_records_info(pg_lsn, pg_lsn) TO pg_read_server_files;
+
+--
+-- pg_get_wal_records_info_till_end_of_wal()
+--
+CREATE FUNCTION pg_get_wal_records_info_till_end_of_wal(IN start_lsn pg_lsn,
+    OUT start_lsn pg_lsn,
+    OUT end_lsn pg_lsn,
+    OUT prev_lsn pg_lsn,
+    OUT xid xid,
+    OUT resource_manager text,
+    OUT record_type text,
+    OUT record_length int4,
+    OUT main_data_length int4,
+    OUT fpi_length int4,
+    OUT description text,
+    OUT block_ref text
+)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME', 'pg_get_wal_records_info_till_end_of_wal'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+REVOKE EXECUTE ON FUNCTION pg_get_wal_records_info_till_end_of_wal(pg_lsn) FROM PUBLIC;
+GRANT EXECUTE ON FUNCTION pg_get_wal_records_info_till_end_of_wal(pg_lsn) TO pg_read_server_files;
+
+--
+-- pg_get_wal_stats()
+--
+CREATE FUNCTION pg_get_wal_stats(IN start_lsn pg_lsn,
+    IN end_lsn pg_lsn,
+    IN  per_record boolean DEFAULT false,
+    OUT "resource_manager/record_type" text,
+    OUT count int8,
+    OUT count_percentage float4,
+    OUT record_size int8,
+    OUT record_size_percentage float4,
+    OUT fpi_size int8,
+    OUT fpi_size_percentage float4,
+    OUT combined_size int8,
+    OUT combined_size_percentage float4
+)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME', 'pg_get_wal_stats'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+REVOKE EXECUTE ON FUNCTION pg_get_wal_stats(pg_lsn, pg_lsn, boolean) FROM PUBLIC;
+GRANT EXECUTE ON FUNCTION pg_get_wal_stats(pg_lsn, pg_lsn, boolean) TO pg_read_server_files;
+
+--
+-- pg_get_wal_stats_till_end_of_wal()
+--
+CREATE FUNCTION pg_get_wal_stats_till_end_of_wal(IN start_lsn pg_lsn,
+    IN  per_record boolean DEFAULT false,
+    OUT "resource_manager/record_type" text,
+    OUT count int8,
+    OUT count_percentage float4,
+    OUT record_size int8,
+    OUT record_size_percentage float4,
+    OUT fpi_size int8,
+    OUT fpi_size_percentage float4,
+    OUT combined_size int8,
+    OUT combined_size_percentage float4
+)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME', 'pg_get_wal_stats_till_end_of_wal'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+REVOKE EXECUTE ON FUNCTION pg_get_wal_stats_till_end_of_wal(pg_lsn, boolean) FROM PUBLIC;
+GRANT EXECUTE ON FUNCTION pg_get_wal_stats_till_end_of_wal(pg_lsn, boolean) TO pg_read_server_files;
diff --git a/contrib/pg_walinspect/pg_walinspect.c b/contrib/pg_walinspect/pg_walinspect.c
new file mode 100644 (file)
index 0000000..58afa1a
--- /dev/null
@@ -0,0 +1,629 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_walinspect.c
+ *               Functions to inspect contents of PostgreSQL Write-Ahead Log
+ *
+ * Copyright (c) 2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *               contrib/pg_walinspect/pg_walinspect.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/xlog.h"
+#include "access/xlog_internal.h"
+#include "access/xlogreader.h"
+#include "access/xlogrecovery.h"
+#include "access/xlogstats.h"
+#include "access/xlogutils.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "utils/builtins.h"
+#include "utils/pg_lsn.h"
+
+/*
+ * NOTE: For any code change or issue fix here, it is highly recommended to
+ * give a thought about doing the same in pg_waldump tool as well.
+ */
+
+PG_MODULE_MAGIC;
+
+PG_FUNCTION_INFO_V1(pg_get_wal_record_info);
+PG_FUNCTION_INFO_V1(pg_get_wal_records_info);
+PG_FUNCTION_INFO_V1(pg_get_wal_records_info_till_end_of_wal);
+PG_FUNCTION_INFO_V1(pg_get_wal_stats);
+PG_FUNCTION_INFO_V1(pg_get_wal_stats_till_end_of_wal);
+
+static bool IsFutureLSN(XLogRecPtr lsn, XLogRecPtr *curr_lsn);
+static XLogReaderState *InitXLogReaderState(XLogRecPtr lsn,
+                                                                                       XLogRecPtr *first_record);
+static XLogRecord *ReadNextXLogRecord(XLogReaderState *xlogreader,
+                                                                         XLogRecPtr first_record);
+static void GetWALRecordInfo(XLogReaderState *record, XLogRecPtr lsn,
+                                                        Datum *values, bool *nulls, uint32 ncols);
+static XLogRecPtr ValidateInputLSNs(bool till_end_of_wal,
+                                                                       XLogRecPtr start_lsn, XLogRecPtr end_lsn);
+static void GetWALRecordsInfo(FunctionCallInfo fcinfo, XLogRecPtr start_lsn,
+                                                         XLogRecPtr end_lsn);
+static void GetXLogSummaryStats(XLogStats * stats, ReturnSetInfo *rsinfo,
+                                                               Datum *values, bool *nulls, uint32 ncols,
+                                                               bool stats_per_record);
+static void FillXLogStatsRow(const char *name, uint64 n, uint64 total_count,
+                                                        uint64 rec_len, uint64 total_rec_len,
+                                                        uint64 fpi_len, uint64 total_fpi_len,
+                                                        uint64 tot_len, uint64 total_len,
+                                                        Datum *values, bool *nulls, uint32 ncols);
+static void GetWalStats(FunctionCallInfo fcinfo, XLogRecPtr start_lsn,
+                                               XLogRecPtr end_lsn, bool stats_per_record);
+
+/*
+ * Check if the given LSN is in future. Also, return the LSN up to which the
+ * server has WAL.
+ */
+static bool
+IsFutureLSN(XLogRecPtr lsn, XLogRecPtr *curr_lsn)
+{
+       /*
+        * We determine the current LSN of the server similar to how page_read
+        * callback read_local_xlog_page_no_wait does.
+        */
+       if (!RecoveryInProgress())
+               *curr_lsn = GetFlushRecPtr(NULL);
+       else
+               *curr_lsn = GetXLogReplayRecPtr(NULL);
+
+       Assert(!XLogRecPtrIsInvalid(*curr_lsn));
+
+       if (lsn >= *curr_lsn)
+               return true;
+
+       return false;
+}
+
+/*
+ * Intialize WAL reader and identify first valid LSN.
+ */
+static XLogReaderState *
+InitXLogReaderState(XLogRecPtr lsn, XLogRecPtr *first_record)
+{
+       XLogReaderState *xlogreader;
+
+       /*
+        * Reading WAL below the first page of the first sgements isn't allowed.
+        * This is a bootstrap WAL page and the page_read callback fails to read
+        * it.
+        */
+       if (lsn < XLOG_BLCKSZ)
+               ereport(ERROR,
+                               (errmsg("could not read WAL at LSN %X/%X",
+                                               LSN_FORMAT_ARGS(lsn))));
+
+       xlogreader = XLogReaderAllocate(wal_segment_size, NULL,
+                                                                       XL_ROUTINE(.page_read = &read_local_xlog_page_no_wait,
+                                                                                          .segment_open = &wal_segment_open,
+                                                                                          .segment_close = &wal_segment_close),
+                                                                       NULL);
+
+       if (xlogreader == NULL)
+               ereport(ERROR,
+                               (errcode(ERRCODE_OUT_OF_MEMORY),
+                                errmsg("out of memory"),
+                                errdetail("Failed while allocating a WAL reading processor.")));
+
+       /* first find a valid recptr to start from */
+       *first_record = XLogFindNextRecord(xlogreader, lsn);
+
+       if (XLogRecPtrIsInvalid(*first_record))
+               ereport(ERROR,
+                               (errmsg("could not find a valid record after %X/%X",
+                                               LSN_FORMAT_ARGS(lsn))));
+
+       return xlogreader;
+}
+
+/*
+ * Read next WAL record.
+ *
+ * By design, to be less intrusive in a running system, no slot is allocated
+ * to reserve the WAL we're about to read. Therefore this function can
+ * encounter read errors for historical WAL.
+ *
+ * We guard against ordinary errors trying to read WAL that hasn't been
+ * written yet by limiting end_lsn to the flushed WAL, but that can also
+ * encounter errors if the flush pointer falls in the middle of a record.
+ */
+static XLogRecord *
+ReadNextXLogRecord(XLogReaderState *xlogreader, XLogRecPtr first_record)
+{
+       XLogRecord *record;
+       char    *errormsg;
+
+       record = XLogReadRecord(xlogreader, &errormsg);
+
+       if (record == NULL)
+       {
+               if (errormsg)
+                       ereport(ERROR,
+                                       (errcode_for_file_access(),
+                                        errmsg("could not read WAL at %X/%X: %s",
+                                                       LSN_FORMAT_ARGS(first_record), errormsg)));
+               else
+                       ereport(ERROR,
+                                       (errcode_for_file_access(),
+                                        errmsg("could not read WAL at %X/%X",
+                                                       LSN_FORMAT_ARGS(first_record))));
+       }
+
+       return record;
+}
+
+/*
+ * Get a single WAL record info.
+ */
+static void
+GetWALRecordInfo(XLogReaderState *record, XLogRecPtr lsn,
+                                Datum *values, bool *nulls, uint32 ncols)
+{
+       const char *id;
+       RmgrData desc;
+       uint32  fpi_len = 0;
+       StringInfoData rec_desc;
+       StringInfoData rec_blk_ref;
+       uint32  main_data_len;
+       int     i = 0;
+
+       desc = GetRmgr(XLogRecGetRmid(record));
+       id = desc.rm_identify(XLogRecGetInfo(record));
+
+       if (id == NULL)
+               id = psprintf("UNKNOWN (%x)", XLogRecGetInfo(record) & ~XLR_INFO_MASK);
+
+       initStringInfo(&rec_desc);
+       desc.rm_desc(&rec_desc, record);
+
+       /* Block references. */
+       initStringInfo(&rec_blk_ref);
+       XLogRecGetBlockRefInfo(record, false, true, &rec_blk_ref, &fpi_len);
+
+       main_data_len = XLogRecGetDataLen(record);
+
+       values[i++] = LSNGetDatum(lsn);
+       values[i++] = LSNGetDatum(record->EndRecPtr);
+       values[i++] = LSNGetDatum(XLogRecGetPrev(record));
+       values[i++] = TransactionIdGetDatum(XLogRecGetXid(record));
+       values[i++] = CStringGetTextDatum(desc.rm_name);
+       values[i++] = CStringGetTextDatum(id);
+       values[i++] = UInt32GetDatum(XLogRecGetTotalLen(record));
+       values[i++] = UInt32GetDatum(main_data_len);
+       values[i++] = UInt32GetDatum(fpi_len);
+       values[i++] = CStringGetTextDatum(rec_desc.data);
+       values[i++] = CStringGetTextDatum(rec_blk_ref.data);
+
+       Assert(i == ncols);
+}
+
+/*
+ * Get WAL record info.
+ *
+ * This function emits an error if a future WAL LSN i.e. WAL LSN the database
+ * system doesn't know about is specified.
+ */
+Datum
+pg_get_wal_record_info(PG_FUNCTION_ARGS)
+{
+#define PG_GET_WAL_RECORD_INFO_COLS 11
+       Datum   result;
+       Datum   values[PG_GET_WAL_RECORD_INFO_COLS];
+       bool    nulls[PG_GET_WAL_RECORD_INFO_COLS];
+       XLogRecPtr      lsn;
+       XLogRecPtr      curr_lsn;
+       XLogRecPtr      first_record;
+       XLogReaderState *xlogreader;
+       TupleDesc       tupdesc;
+       HeapTuple       tuple;
+
+       lsn = PG_GETARG_LSN(0);
+
+       if (IsFutureLSN(lsn, &curr_lsn))
+       {
+               /*
+                * GetFlushRecPtr or GetXLogReplayRecPtr gives "end+1" LSN of the last
+                * record flushed or replayed respectively. But let's use the LSN up
+                * to "end" in user facing message.
+                */
+               ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                errmsg("cannot accept future input LSN"),
+                                errdetail("Last known WAL LSN on the database system is at %X/%X.",
+                                                  LSN_FORMAT_ARGS(curr_lsn))));
+       }
+
+       /* Build a tuple descriptor for our result type. */
+       if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+               elog(ERROR, "return type must be a row type");
+
+       xlogreader = InitXLogReaderState(lsn, &first_record);
+
+       (void) ReadNextXLogRecord(xlogreader, first_record);
+
+       MemSet(values, 0, sizeof(values));
+       MemSet(nulls, 0, sizeof(nulls));
+
+       GetWALRecordInfo(xlogreader, first_record, values, nulls,
+                                        PG_GET_WAL_RECORD_INFO_COLS);
+
+       XLogReaderFree(xlogreader);
+
+       tuple = heap_form_tuple(tupdesc, values, nulls);
+       result = HeapTupleGetDatum(tuple);
+
+       PG_RETURN_DATUM(result);
+#undef PG_GET_WAL_RECORD_INFO_COLS
+}
+
+/*
+ * Validate the input LSNs and compute end LSN for till_end_of_wal versions.
+ */
+static XLogRecPtr
+ValidateInputLSNs(bool till_end_of_wal, XLogRecPtr start_lsn,
+                                 XLogRecPtr end_lsn)
+{
+       XLogRecPtr      curr_lsn;
+
+       if (IsFutureLSN(start_lsn, &curr_lsn))
+       {
+               /*
+                * GetFlushRecPtr or GetXLogReplayRecPtr gives "end+1" LSN of the last
+                * record flushed or replayed respectively. But let's use the LSN up
+                * to "end" in user facing message.
+                */
+               ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                errmsg("cannot accept future start LSN"),
+                                errdetail("Last known WAL LSN on the database system is at %X/%X.",
+                                                  LSN_FORMAT_ARGS(curr_lsn))));
+       }
+
+       if (till_end_of_wal)
+               end_lsn = curr_lsn;
+
+       if (end_lsn > curr_lsn)
+               ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                errmsg("cannot accept future end LSN"),
+                                errdetail("Last known WAL LSN on the database system is at %X/%X.",
+                                                  LSN_FORMAT_ARGS(curr_lsn))));
+
+       if (start_lsn >= end_lsn)
+               ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                errmsg("WAL start LSN must be less than end LSN")));
+
+       return end_lsn;
+}
+
+/*
+ * Get info and data of all WAL records between start LSN and end LSN.
+ */
+static void
+GetWALRecordsInfo(FunctionCallInfo fcinfo, XLogRecPtr start_lsn,
+                                 XLogRecPtr end_lsn)
+{
+#define PG_GET_WAL_RECORDS_INFO_COLS 11
+       XLogRecPtr      first_record;
+       XLogReaderState *xlogreader;
+       ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+       Datum   values[PG_GET_WAL_RECORDS_INFO_COLS];
+       bool    nulls[PG_GET_WAL_RECORDS_INFO_COLS];
+
+       SetSingleFuncCall(fcinfo, 0);
+
+       xlogreader = InitXLogReaderState(start_lsn, &first_record);
+
+       Assert(xlogreader);
+
+       MemSet(values, 0, sizeof(values));
+       MemSet(nulls, 0, sizeof(nulls));
+
+       for (;;)
+       {
+               (void) ReadNextXLogRecord(xlogreader, first_record);
+
+               if (xlogreader->EndRecPtr <= end_lsn)
+               {
+                       GetWALRecordInfo(xlogreader, xlogreader->currRecPtr, values, nulls,
+                                                        PG_GET_WAL_RECORDS_INFO_COLS);
+
+                       tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
+                                                                values, nulls);
+               }
+
+               /* if we read up to end_lsn, we're done */
+               if (xlogreader->EndRecPtr >= end_lsn)
+                       break;
+
+               CHECK_FOR_INTERRUPTS();
+       }
+
+       XLogReaderFree(xlogreader);
+
+#undef PG_GET_WAL_RECORDS_INFO_COLS
+}
+
+/*
+ * Get info and data of all WAL records between start LSN and end LSN.
+ *
+ * This function emits an error if a future start or end WAL LSN i.e. WAL LSN
+ * the database system doesn't know about is specified.
+ */
+Datum
+pg_get_wal_records_info(PG_FUNCTION_ARGS)
+{
+       XLogRecPtr      start_lsn;
+       XLogRecPtr      end_lsn;
+
+       start_lsn = PG_GETARG_LSN(0);
+       end_lsn = PG_GETARG_LSN(1);
+
+       end_lsn = ValidateInputLSNs(false, start_lsn, end_lsn);
+
+       GetWALRecordsInfo(fcinfo, start_lsn, end_lsn);
+
+       PG_RETURN_VOID();
+}
+
+/*
+ * Get info and data of all WAL records from start LSN till end of WAL.
+ *
+ * This function emits an error if a future start i.e. WAL LSN the database
+ * system doesn't know about is specified.
+ */
+Datum
+pg_get_wal_records_info_till_end_of_wal(PG_FUNCTION_ARGS)
+{
+       XLogRecPtr      start_lsn;
+       XLogRecPtr      end_lsn = InvalidXLogRecPtr;
+
+       start_lsn = PG_GETARG_LSN(0);
+
+       end_lsn = ValidateInputLSNs(true, start_lsn, end_lsn);
+
+       GetWALRecordsInfo(fcinfo, start_lsn, end_lsn);
+
+       PG_RETURN_VOID();
+}
+
+/*
+ * Fill single row of record counts and sizes for an rmgr or record.
+ */
+static void
+FillXLogStatsRow(const char *name,
+                                uint64 n, uint64 total_count,
+                                uint64 rec_len, uint64 total_rec_len,
+                                uint64 fpi_len, uint64 total_fpi_len,
+                                uint64 tot_len, uint64 total_len,
+                                Datum *values, bool *nulls, uint32 ncols)
+{
+       double  n_pct,
+                       rec_len_pct,
+                       fpi_len_pct,
+                       tot_len_pct;
+       int     i = 0;
+
+       n_pct = 0;
+       if (total_count != 0)
+               n_pct = 100 * (double) n / total_count;
+
+       rec_len_pct = 0;
+       if (total_rec_len != 0)
+               rec_len_pct = 100 * (double) rec_len / total_rec_len;
+
+       fpi_len_pct = 0;
+       if (total_fpi_len != 0)
+               fpi_len_pct = 100 * (double) fpi_len / total_fpi_len;
+
+       tot_len_pct = 0;
+       if (total_len != 0)
+               tot_len_pct = 100 * (double) tot_len / total_len;
+
+       values[i++] = CStringGetTextDatum(name);
+       values[i++] = Int64GetDatum(n);
+       values[i++] = Float4GetDatum(n_pct);
+       values[i++] = Int64GetDatum(rec_len);
+       values[i++] = Float4GetDatum(rec_len_pct);
+       values[i++] = Int64GetDatum(fpi_len);
+       values[i++] = Float4GetDatum(fpi_len_pct);
+       values[i++] = Int64GetDatum(tot_len);
+       values[i++] = Float4GetDatum(tot_len_pct);
+
+       Assert(i == ncols);
+}
+
+/*
+ * Get summary statistics about the records seen so far.
+ */
+static void
+GetXLogSummaryStats(XLogStats *stats, ReturnSetInfo *rsinfo,
+                                       Datum *values, bool *nulls, uint32 ncols,
+                                       bool stats_per_record)
+{
+       uint64  total_count = 0;
+       uint64  total_rec_len = 0;
+       uint64  total_fpi_len = 0;
+       uint64  total_len = 0;
+       int     ri;
+
+       /*
+        * Each row shows its percentages of the total, so make a first pass to
+        * calculate column totals.
+        */
+       for (ri = 0; ri <= RM_MAX_ID; ri++)
+       {
+               if (!RmgrIdIsValid(ri))
+                       continue;
+
+               total_count += stats->rmgr_stats[ri].count;
+               total_rec_len += stats->rmgr_stats[ri].rec_len;
+               total_fpi_len += stats->rmgr_stats[ri].fpi_len;
+       }
+       total_len = total_rec_len + total_fpi_len;
+
+       for (ri = 0; ri <= RM_MAX_ID; ri++)
+       {
+               uint64          count;
+               uint64          rec_len;
+               uint64          fpi_len;
+               uint64          tot_len;
+               RmgrData        desc;
+
+               if (!RmgrIdIsValid(ri))
+                       continue;
+
+               if (!RmgrIdExists(ri))
+                       continue;
+
+               desc = GetRmgr(ri);
+
+               if (stats_per_record)
+               {
+                       int rj;
+
+                       for (rj = 0; rj < MAX_XLINFO_TYPES; rj++)
+                       {
+                               const char *id;
+
+                               count = stats->record_stats[ri][rj].count;
+                               rec_len = stats->record_stats[ri][rj].rec_len;
+                               fpi_len = stats->record_stats[ri][rj].fpi_len;
+                               tot_len = rec_len + fpi_len;
+
+                               /* Skip undefined combinations and ones that didn't occur */
+                               if (count == 0)
+                                       continue;
+
+                               /* the upper four bits in xl_info are the rmgr's */
+                               id = desc.rm_identify(rj << 4);
+                               if (id == NULL)
+                                       id = psprintf("UNKNOWN (%x)", rj << 4);
+
+                               FillXLogStatsRow(psprintf("%s/%s", desc.rm_name, id), count,
+                                                                total_count, rec_len, total_rec_len, fpi_len,
+                                                                total_fpi_len, tot_len, total_len,
+                                                                values, nulls, ncols);
+
+                               tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
+                                                                        values, nulls);
+                       }
+               }
+               else
+               {
+                       count = stats->rmgr_stats[ri].count;
+                       rec_len = stats->rmgr_stats[ri].rec_len;
+                       fpi_len = stats->rmgr_stats[ri].fpi_len;
+                       tot_len = rec_len + fpi_len;
+
+                       FillXLogStatsRow(desc.rm_name, count, total_count, rec_len,
+                                                        total_rec_len, fpi_len, total_fpi_len, tot_len,
+                                                        total_len, values, nulls, ncols);
+
+                       tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
+                                                                values, nulls);
+               }
+       }
+}
+
+/*
+ * Get WAL stats between start LSN and end LSN.
+ */
+static void
+GetWalStats(FunctionCallInfo fcinfo, XLogRecPtr start_lsn,
+                       XLogRecPtr end_lsn, bool stats_per_record)
+{
+#define PG_GET_WAL_STATS_COLS 9
+       XLogRecPtr      first_record;
+       XLogReaderState *xlogreader;
+       XLogStats stats;
+       ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+       Datum   values[PG_GET_WAL_STATS_COLS];
+       bool    nulls[PG_GET_WAL_STATS_COLS];
+
+       SetSingleFuncCall(fcinfo, 0);
+
+       xlogreader = InitXLogReaderState(start_lsn, &first_record);
+
+       MemSet(&stats, 0, sizeof(stats));
+
+       for (;;)
+       {
+               (void) ReadNextXLogRecord(xlogreader, first_record);
+
+               if (xlogreader->EndRecPtr <= end_lsn)
+                       XLogRecStoreStats(&stats, xlogreader);
+
+               /* if we read up to end_lsn, we're done */
+               if (xlogreader->EndRecPtr >= end_lsn)
+                       break;
+
+               CHECK_FOR_INTERRUPTS();
+       }
+
+       XLogReaderFree(xlogreader);
+
+       MemSet(values, 0, sizeof(values));
+       MemSet(nulls, 0, sizeof(nulls));
+
+       GetXLogSummaryStats(&stats, rsinfo, values, nulls,
+                                               PG_GET_WAL_STATS_COLS,
+                                               stats_per_record);
+
+#undef PG_GET_WAL_STATS_COLS
+}
+
+/*
+ * Get stats of all WAL records between start LSN and end LSN.
+ *
+ * This function emits an error if a future start or end WAL LSN i.e. WAL LSN
+ * the database system doesn't know about is specified.
+ */
+Datum
+pg_get_wal_stats(PG_FUNCTION_ARGS)
+{
+       XLogRecPtr      start_lsn;
+       XLogRecPtr      end_lsn;
+       bool    stats_per_record;
+
+       start_lsn = PG_GETARG_LSN(0);
+       end_lsn = PG_GETARG_LSN(1);
+       stats_per_record = PG_GETARG_BOOL(2);
+
+       end_lsn = ValidateInputLSNs(false, start_lsn, end_lsn);
+
+       GetWalStats(fcinfo, start_lsn, end_lsn, stats_per_record);
+
+       PG_RETURN_VOID();
+}
+
+/*
+ * Get stats of all WAL records from start LSN till end of WAL.
+ *
+ * This function emits an error if a future start i.e. WAL LSN the database
+ * system doesn't know about is specified.
+ */
+Datum
+pg_get_wal_stats_till_end_of_wal(PG_FUNCTION_ARGS)
+{
+       XLogRecPtr      start_lsn;
+       XLogRecPtr      end_lsn = InvalidXLogRecPtr;
+       bool    stats_per_record;
+
+       start_lsn = PG_GETARG_LSN(0);
+       stats_per_record = PG_GETARG_BOOL(1);
+
+       end_lsn = ValidateInputLSNs(true, start_lsn, end_lsn);
+
+       GetWalStats(fcinfo, start_lsn, end_lsn, stats_per_record);
+
+       PG_RETURN_VOID();
+}
diff --git a/contrib/pg_walinspect/pg_walinspect.control b/contrib/pg_walinspect/pg_walinspect.control
new file mode 100644 (file)
index 0000000..017e56a
--- /dev/null
@@ -0,0 +1,5 @@
+# pg_walinspect extension
+comment = 'functions to inspect contents of PostgreSQL Write-Ahead Log'
+default_version = '1.0'
+module_pathname = '$libdir/pg_walinspect'
+relocatable = true
diff --git a/contrib/pg_walinspect/sql/pg_walinspect.sql b/contrib/pg_walinspect/sql/pg_walinspect.sql
new file mode 100644 (file)
index 0000000..1a9fd09
--- /dev/null
@@ -0,0 +1,120 @@
+CREATE EXTENSION pg_walinspect;
+
+CREATE TABLE sample_tbl(col1 int, col2 int);
+
+-- Make sure checkpoints don't interfere with the test.
+SELECT lsn as wal_lsn1 FROM
+  pg_create_physical_replication_slot('regress_pg_walinspect_slot', true, false)
+  \gset
+
+INSERT INTO sample_tbl SELECT * FROM generate_series(1, 2);
+
+SELECT pg_current_wal_lsn() AS wal_lsn2 \gset
+
+INSERT INTO sample_tbl SELECT * FROM generate_series(1, 2);
+
+-- ===================================================================
+-- Tests for input validation
+-- ===================================================================
+
+SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_records_info(:'wal_lsn2', :'wal_lsn1'); -- ERROR
+
+SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_stats(:'wal_lsn2', :'wal_lsn1'); -- ERROR
+
+-- ===================================================================
+-- Tests for all function executions
+-- ===================================================================
+
+SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_record_info(:'wal_lsn1');
+
+SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_records_info(:'wal_lsn1', :'wal_lsn2');
+
+SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_records_info_till_end_of_wal(:'wal_lsn1');
+
+SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_stats(:'wal_lsn1', :'wal_lsn2');
+
+SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_stats_till_end_of_wal(:'wal_lsn1');
+
+-- ===================================================================
+-- Test for filtering out WAL records of a particular table
+-- ===================================================================
+
+SELECT oid AS sample_tbl_oid FROM pg_class WHERE relname = 'sample_tbl' \gset
+
+SELECT COUNT(*) >= 1 AS ok FROM pg_get_wal_records_info(:'wal_lsn1', :'wal_lsn2')
+                       WHERE block_ref LIKE concat('%', :'sample_tbl_oid', '%') AND resource_manager = 'Heap';
+
+-- ===================================================================
+-- Test for filtering out WAL records based on resource_manager and
+-- record_type
+-- ===================================================================
+
+SELECT COUNT(*) >= 1 AS ok FROM pg_get_wal_records_info(:'wal_lsn1', :'wal_lsn2')
+                       WHERE resource_manager = 'Heap' AND record_type = 'INSERT';
+
+-- ===================================================================
+-- Tests for permissions
+-- ===================================================================
+CREATE ROLE regress_pg_walinspect;
+
+SELECT has_function_privilege('regress_pg_walinspect',
+  'pg_get_wal_record_info(pg_lsn)', 'EXECUTE'); -- no
+
+SELECT has_function_privilege('regress_pg_walinspect',
+  'pg_get_wal_records_info(pg_lsn, pg_lsn) ', 'EXECUTE'); -- no
+
+SELECT has_function_privilege('regress_pg_walinspect',
+  'pg_get_wal_stats(pg_lsn, pg_lsn, boolean) ', 'EXECUTE'); -- no
+
+-- Functions accessible by users with role pg_read_server_files
+
+GRANT pg_read_server_files TO regress_pg_walinspect;
+
+SELECT has_function_privilege('regress_pg_walinspect',
+  'pg_get_wal_record_info(pg_lsn)', 'EXECUTE'); -- yes
+
+SELECT has_function_privilege('regress_pg_walinspect',
+  'pg_get_wal_records_info(pg_lsn, pg_lsn) ', 'EXECUTE'); -- yes
+
+SELECT has_function_privilege('regress_pg_walinspect',
+  'pg_get_wal_stats(pg_lsn, pg_lsn, boolean) ', 'EXECUTE'); -- yes
+
+REVOKE pg_read_server_files FROM regress_pg_walinspect;
+
+-- Superuser can grant execute to other users
+GRANT EXECUTE ON FUNCTION pg_get_wal_record_info(pg_lsn)
+  TO regress_pg_walinspect;
+
+GRANT EXECUTE ON FUNCTION pg_get_wal_records_info(pg_lsn, pg_lsn)
+  TO regress_pg_walinspect;
+
+GRANT EXECUTE ON FUNCTION pg_get_wal_stats(pg_lsn, pg_lsn, boolean)
+  TO regress_pg_walinspect;
+
+SELECT has_function_privilege('regress_pg_walinspect',
+  'pg_get_wal_record_info(pg_lsn)', 'EXECUTE'); -- yes
+
+SELECT has_function_privilege('regress_pg_walinspect',
+  'pg_get_wal_records_info(pg_lsn, pg_lsn) ', 'EXECUTE'); -- yes
+
+SELECT has_function_privilege('regress_pg_walinspect',
+  'pg_get_wal_stats(pg_lsn, pg_lsn, boolean) ', 'EXECUTE'); -- yes
+
+REVOKE EXECUTE ON FUNCTION pg_get_wal_record_info(pg_lsn)
+  FROM regress_pg_walinspect;
+
+REVOKE EXECUTE ON FUNCTION pg_get_wal_records_info(pg_lsn, pg_lsn)
+  FROM regress_pg_walinspect;
+
+REVOKE EXECUTE ON FUNCTION pg_get_wal_stats(pg_lsn, pg_lsn, boolean)
+  FROM regress_pg_walinspect;
+
+-- ===================================================================
+-- Clean up
+-- ===================================================================
+
+DROP ROLE regress_pg_walinspect;
+
+SELECT pg_drop_replication_slot('regress_pg_walinspect_slot');
+
+DROP TABLE sample_tbl;
index 1e42ce1a7f7ef1bafd3d5a121565a76e2af139d4..4e7b87a42fc7dc484589ad9dac202a544c48f63e 100644 (file)
@@ -131,6 +131,7 @@ CREATE EXTENSION <replaceable>module_name</replaceable>;
  &pgsurgery;
  &pgtrgm;
  &pgvisibility;
+ &pgwalinspect;
  &postgres-fdw;
  &seg;
  &sepgsql;
index 7dea670969c1445ec011283e5b5ca6efe755bdce..1e82cb2d3d923dbe01f11f3c6a8e4a09cb5c0ad2 100644 (file)
 <!ENTITY pgsurgery       SYSTEM "pgsurgery.sgml">
 <!ENTITY pgtrgm          SYSTEM "pgtrgm.sgml">
 <!ENTITY pgvisibility    SYSTEM "pgvisibility.sgml">
+<!ENTITY pgwalinspect   SYSTEM "pgwalinspect.sgml">
 <!ENTITY postgres-fdw    SYSTEM "postgres-fdw.sgml">
 <!ENTITY seg             SYSTEM "seg.sgml">
 <!ENTITY contrib-spi     SYSTEM "contrib-spi.sgml">
index 29c43768860e1937c372f9caefe4b9465988f9fc..5047e090db02628e68a732a6b2f058608458a159 100644 (file)
@@ -29205,7 +29205,7 @@ postgres=# SELECT * FROM pg_walfile_name_offset((pg_backup_stop()).lsn);
       </row>
 
       <row>
-       <entry role="func_table_entry"><para role="func_signature">
+       <entry id="pg-logical-emit-message" role="func_table_entry"><para role="func_signature">
         <indexterm>
          <primary>pg_logical_emit_message</primary>
         </indexterm>
diff --git a/doc/src/sgml/pgwalinspect.sgml b/doc/src/sgml/pgwalinspect.sgml
new file mode 100644 (file)
index 0000000..44010a5
--- /dev/null
@@ -0,0 +1,275 @@
+<!-- doc/src/sgml/pgwalinspect.sgml -->
+
+<sect1 id="pgwalinspect" xreflabel="pg_walinspect">
+ <title>pg_walinspect</title>
+
+ <indexterm zone="pgwalinspect">
+  <primary>pg_walinspect</primary>
+ </indexterm>
+
+ <para>
+  The <filename>pg_walinspect</filename> module provides SQL functions that
+  allow you to inspect the contents of write-ahead log of
+  a running <productname>PostgreSQL</productname> database cluster at a low
+  level, which is useful for debugging or analytical or reporting or
+  educational purposes. It is similar to <xref linkend="pgwaldump"/>, but
+  accessible through SQL rather than a separate utility.
+ </para>
+
+ <para>
+  All the functions of this module will provide the WAL information using the
+  current server's timeline ID.
+ </para>
+
+ <para>
+  All the functions of this module will try to find the first valid WAL record
+  that is at or after the given <replaceable>in_lsn</replaceable> or
+  <replaceable>start_lsn</replaceable> and will emit error if no such record
+  is available. Similarly, the <replaceable>end_lsn</replaceable> must be
+  available, and if it falls in the middle of a record, the entire record must
+  be available.
+ </para>
+
+ <note>
+  <para>
+   Some functions, such as <function><link
+   linkend="pg-logical-emit-message">pg_logical_emit_message</link></function>,
+   return the LSN <emphasis>after</emphasis> the record just
+   inserted. Therefore, if you pass that LSN as
+   <replaceable>in_lsn</replaceable> or <replaceable>start_lsn</replaceable>
+   to one of these functions, it will return the <emphasis>next</emphasis>
+   record.
+  </para>
+ </note>
+ <para>
+  By default, use of these functions is restricted to superusers and members of
+  the <literal>pg_read_server_files</literal> role. Access may be granted by
+  superusers to others using <command>GRANT</command>.
+ </para>
+
+ <sect2>
+  <title>General Functions</title>
+
+  <variablelist>
+   <varlistentry>
+    <term>
+     <function>
+      pg_get_wal_record_info(in_lsn pg_lsn,
+                             start_lsn OUT pg_lsn,
+                             end_lsn OUT pg_lsn,
+                             prev_lsn OUT pg_lsn,
+                             xid OUT xid,
+                             resource_manager OUT text,
+                             record_type OUT text,
+                             record_length OUT int4,
+                             main_data_length OUT int4,
+                             fpi_length OUT int4,
+                             description OUT text,
+                             block_ref OUT text)
+     </function>
+    </term>
+
+    <listitem>
+     <para>
+      Gets WAL record information of a given LSN. If the given LSN isn't
+      at the start of a WAL record, it gives the information of the next
+      available valid WAL record; or an error if no such record is found.
+     </para>
+    </listitem>
+   </varlistentry>
+
+    <varlistentry>
+    <term>
+     <function>
+      pg_get_wal_records_info(start_lsn pg_lsn,
+                              end_lsn pg_lsn,
+                              start_lsn OUT pg_lsn,
+                              end_lsn OUT pg_lsn,
+                              prev_lsn OUT pg_lsn,
+                              xid OUT xid,
+                              resource_manager OUT text,
+                              record_type OUT text,
+                              record_length OUT int4,
+                              main_data_length OUT int4,
+                              fpi_length OUT int4,
+                              description OUT text,
+                              block_ref OUT text)
+      returns setof record
+     </function>
+    </term>
+
+    <listitem>
+     <para>
+      Gets information of all the valid WAL records between
+      <replaceable>start_lsn</replaceable> and <replaceable>end_lsn</replaceable>.
+      Returns one row per WAL record. If <replaceable>start_lsn</replaceable>
+      or <replaceable>end_lsn</replaceable> are not yet available, the
+      function will raise an error. For example, usage of the function is as
+      follows:
+<screen>
+postgres=# select start_lsn, end_lsn, prev_lsn, xid, resource_manager, record_type, record_length, main_data_length, fpi_length, description from pg_get_wal_records_info('0/14F9A30', '0/15011D7');
+ start_lsn |  end_lsn  | prev_lsn  | xid | resource_manager | record_type  | record_length | main_data_length | fpi_length |     description     
+-----------+-----------+-----------+-----+------------------+--------------+---------------+------------------+------------+---------------------
+ 0/14FA118 | 0/14FB4B0 | 0/14F9958 | 725 | Btree            | INSERT_LEAF  |          5013 |                2 |       4960 | off 246
+ 0/14FB4B0 | 0/14FD050 | 0/14FA118 | 725 | Btree            | INSERT_LEAF  |          7045 |                2 |       6992 | off 130
+ 0/14FD050 | 0/14FD0A8 | 0/14FB4B0 | 725 | Heap2            | MULTI_INSERT |            85 |                6 |          0 | 1 tuples flags 0x02
+ 0/14FD0A8 | 0/14FD0F0 | 0/14FD050 | 725 | Btree            | INSERT_LEAF  |            72 |                2 |          0 | off 155
+ 0/14FD0F0 | 0/14FD138 | 0/14FD0A8 | 725 | Btree            | INSERT_LEAF  |            72 |                2 |          0 | off 134
+ 0/14FD138 | 0/14FD210 | 0/14FD0F0 | 725 | Heap             | INSERT       |           211 |                3 |          0 | off 11 flags 0x00
+ 0/14FD210 | 0/14FD250 | 0/14FD138 | 725 | Btree            | INSERT_LEAF  |            64 |                2 |          0 | off 246
+ 0/14FD250 | 0/14FF260 | 0/14FD210 | 725 | Btree            | INSERT_LEAF  |          8181 |                2 |       8128 | off 47
+ 0/14FF260 | 0/14FF2B8 | 0/14FD250 | 725 | Heap2            | MULTI_INSERT |            85 |                6 |          0 | 1 tuples flags 0x02
+ 0/14FF2B8 | 0/14FF300 | 0/14FF260 | 725 | Btree            | INSERT_LEAF  |            72 |                2 |          0 | off 155
+ 0/14FF300 | 0/15008D8 | 0/14FF2B8 | 725 | Btree            | INSERT_LEAF  |          5565 |                2 |       5512 | off 106
+ 0/15008D8 | 0/1500C48 | 0/14FF300 | 725 | Heap             | INSERT       |           874 |                3 |        820 | off 2 flags 0x01
+(12 rows)
+</screen>
+     </para>
+    </listitem>
+   </varlistentry>
+
+    <varlistentry>
+    <term>
+     <function>
+      pg_get_wal_records_info_till_end_of_wal(start_lsn pg_lsn,
+                                              start_lsn OUT pg_lsn,
+                                              end_lsn OUT pg_lsn,
+                                              prev_lsn OUT pg_lsn,
+                                              xid OUT xid,
+                                              resource_manager OUT text,
+                                              record_type OUT text,
+                                              record_length OUT int4,
+                                              main_data_length OUT int4,
+                                              fpi_length OUT int4,
+                                              description OUT text,
+                                              block_ref OUT text)
+      returns setof record
+     </function>
+    </term>
+
+    <listitem>
+     <para>
+      This function is same as <function>pg_get_wal_records_info()</function>
+      except that it gets information of all the valid WAL records from 
+      <replaceable>start_lsn</replaceable> till the end of WAL.
+     </para>
+    </listitem>
+   </varlistentry>
+
+    <varlistentry>
+    <term>
+     <function>
+      pg_get_wal_stats(start_lsn pg_lsn,
+                       end_lsn pg_lsn,
+                       per_record boolean DEFAULT false,
+                       "resource_manager/record_type" OUT text,
+                       count OUT int8,
+                       count_percentage OUT float4,
+                       record_length OUT int8,
+                       record_length_percentage OUT float4,
+                       fpi_length OUT int8,
+                       fpi_length_percentage OUT float4,
+                       combined_size OUT int8,
+                       combined_size_percentage OUT float4)
+      returns setof record
+     </function>
+    </term>
+
+    <listitem>
+     <para>
+      Gets statistics of all the valid WAL records between
+      <replaceable>start_lsn</replaceable> and
+      <replaceable>end_lsn</replaceable>. By default, it returns one row per
+      <replaceable>resource_manager</replaceable> type. When
+      <replaceable>per_record</replaceable> is set to <literal>true</literal>,
+      it returns one row per <replaceable>record_type</replaceable>.
+      If <replaceable>start_lsn</replaceable>
+      or <replaceable>end_lsn</replaceable> are not yet available, the
+      function will raise an error. For example, usage of the function is as
+      follows:
+<screen>
+postgres=# select * from pg_get_wal_stats('0/12FBA30', '0/15011D7') where count > 0;
+ resource_manager/record_type | count | count_percentage | record_size | record_size_percentage | fpi_size | fpi_size_percentage | combined_size | combined_size_percentage 
+------------------------------+-------+------------------+-------------+------------------------+----------+---------------------+---------------+--------------------------
+ XLOG                         |    10 |       0.10871929 |         796 |            0.052369177 |      352 |         0.061031006 |          1148 |              0.054751817
+ Transaction                  |   187 |        2.0330508 |       62773 |              4.1298623 |        0 |                   0 |         62773 |                2.9938467
+ Storage                      |    13 |       0.14133507 |         546 |            0.035921574 |        0 |                   0 |           546 |                0.0260405
+ Database                     |     2 |      0.021743858 |          84 |            0.005526396 |        0 |                   0 |            84 |             0.0040062307
+ Standby                      |   218 |        2.3700805 |       15908 |              1.0465941 |        0 |                   0 |         15908 |               0.75870377
+ Heap2                        |  1897 |        20.624048 |      383916 |              25.257998 |   364472 |           63.193447 |        748388 |                35.693035
+ Heap                         |  1318 |        14.329202 |      621390 |               40.88151 |   139660 |           24.214746 |        761050 |                 36.29693
+ Btree                        |  5553 |         60.37182 |      434565 |              28.590216 |    72272 |           12.530776 |        506837 |                 24.17269
+(8 rows)
+</screen>
+
+With <replaceable>per_record</replaceable> passed as <literal>true</literal>:
+
+<screen>
+postgres=# select * from pg_get_wal_stats('0/14AFC30', '0/15011D7', true) where count > 0;
+ resource_manager/record_type | count | count_percentage | record_size | record_size_percentage | fpi_size | fpi_size_percentage | combined_size | combined_size_percentage 
+------------------------------+-------+------------------+-------------+------------------------+----------+---------------------+---------------+--------------------------
+ XLOG/CHECKPOINT_SHUTDOWN     |     1 |       0.32894737 |         114 |             0.22891566 |        0 |                   0 |           114 |               0.03534489
+ XLOG/CHECKPOINT_ONLINE       |     4 |        1.3157895 |         456 |             0.91566265 |        0 |                   0 |           456 |               0.14137957
+ XLOG/NEXTOID                 |     1 |       0.32894737 |          30 |            0.060240965 |        0 |                   0 |            30 |              0.009301287
+ Transaction/COMMIT           |     9 |        2.9605262 |        1173 |              2.3554218 |        0 |                   0 |          1173 |               0.36368033
+ Storage/CREATE               |     1 |       0.32894737 |          42 |            0.084337346 |        0 |                   0 |            42 |             0.0130218025
+ Database/CREATE_FILE_COPY    |     2 |       0.65789473 |          84 |             0.16867469 |        0 |                   0 |            84 |              0.026043605
+ Standby/RUNNING_XACTS        |     6 |        1.9736842 |         316 |              0.6345382 |        0 |                   0 |           316 |               0.09797356
+ Standby/INVALIDATIONS        |    45 |        14.802631 |        4018 |               8.068274 |        0 |                   0 |          4018 |                1.2457525
+ Heap2/PRUNE                  |     4 |        1.3157895 |         270 |              0.5421687 |        0 |                   0 |           270 |               0.08371159
+ Heap2/FREEZE_PAGE            |    27 |         8.881579 |       20904 |              41.975903 |        0 |                   0 |         20904 |                 6.481137
+ Heap2/VISIBLE                |    29 |         9.539474 |        1756 |              3.5261045 |    73728 |           27.032736 |         75484 |                23.403278
+ Heap2/MULTI_INSERT           |    13 |        4.2763157 |        1049 |              2.1064258 |    12216 |            4.479057 |         13265 |                 4.112719
+ Heap/INSERT                  |    19 |             6.25 |        2324 |              4.6666665 |    43884 |           16.090284 |         46208 |                14.326463
+ Heap/UPDATE                  |     7 |        2.3026316 |         511 |              1.0261045 |    54340 |           19.924028 |         54851 |                17.006165
+ Heap/HOT_UPDATE              |    11 |         3.618421 |        1134 |              2.2771084 |      468 |          0.17159452 |          1602 |               0.49668875
+ Heap/LOCK                    |     8 |         2.631579 |         432 |              0.8674699 |        0 |                   0 |           432 |               0.13393854
+ Heap/INPLACE                 |    45 |        14.802631 |        9123 |              18.319277 |    16076 |            5.894345 |         25199 |                7.8127713
+ Heap/UPDATE+INIT             |     1 |       0.32894737 |         817 |              1.6405623 |        0 |                   0 |           817 |               0.25330505
+ Btree/INSERT_LEAF            |    70 |        23.026316 |        5183 |              10.407631 |    72024 |           26.407955 |         77207 |                23.937483
+ Btree/DEDUP                  |     1 |       0.32894737 |          64 |             0.12851405 |        0 |                   0 |            64 |              0.019842746
+(20 rows)
+</screen>
+     </para>
+    </listitem>
+   </varlistentry>
+
+    <varlistentry>
+    <term>
+     <function>
+      pg_get_wal_stats_till_end_of_wal(start_lsn pg_lsn,
+                                       per_record boolean DEFAULT false,
+                                       "resource_manager/record_type" OUT text,
+                                       count OUT int8,
+                                       count_percentage OUT float4,
+                                       record_length OUT int8,
+                                       record_length_percentage OUT float4,
+                                       fpi_length OUT int8,
+                                       fpi_length_percentage OUT float4,
+                                       combined_size OUT int8,
+                                       combined_size_percentage OUT float4)
+      returns setof record
+     </function>
+    </term>
+
+    <listitem>
+     <para>
+      This function is same as <function>pg_get_wal_stats()</function> except
+      that it gets statistics of all the valid WAL records from
+      <replaceable>start_lsn</replaceable> till end of WAL.
+     </para>
+    </listitem>
+   </varlistentry>
+
+  </variablelist>
+ </sect2>
+
+ <sect2>
+  <title>Author</title>
+
+  <para>
+   Bharath Rupireddy <email>bharath.rupireddyforpostgres@gmail.com</email>
+  </para>
+ </sect2>
+
+</sect1>
index e7452af67901a8c54bf2396be0950c7338490361..dff1e7685e9b7ea07df022e7e7c2f8859e69807c 100644 (file)
@@ -200,3 +200,133 @@ xlog_identify(uint8 info)
 
        return id;
 }
+
+/*
+ * Returns a string giving information about all the blocks in an
+ * XLogRecord.
+ */
+void
+XLogRecGetBlockRefInfo(XLogReaderState *record, bool pretty,
+                                          bool detailed_format, StringInfo buf,
+                                          uint32 *fpi_len)
+{
+       int     block_id;
+
+       Assert(record != NULL);
+
+       if (detailed_format && pretty)
+               appendStringInfoChar(buf, '\n');
+
+       for (block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
+       {
+               RelFileNode rnode = {InvalidOid, InvalidOid, InvalidOid};
+               ForkNumber      forknum = InvalidForkNumber;
+               BlockNumber blk = InvalidBlockNumber;
+
+               if (!XLogRecHasBlockRef(record, block_id))
+                       continue;
+
+               XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blk);
+
+               if (detailed_format)
+               {
+                       /* Get block references in detailed format. */
+
+                       if (pretty)
+                               appendStringInfoChar(buf, '\t');
+                       else if (block_id > 0)
+                               appendStringInfoChar(buf, ' ');
+
+                       appendStringInfo(buf,
+                                                        "blkref #%d: rel %u/%u/%u fork %s blk %u",
+                                                        block_id,
+                                                        rnode.spcNode, rnode.dbNode, rnode.relNode,
+                                                        forkNames[forknum],
+                                                        blk);
+
+                       if (XLogRecHasBlockImage(record, block_id))
+                       {
+                               uint8           bimg_info = XLogRecGetBlock(record, block_id)->bimg_info;
+
+                               /* Calculate the amount of FPI data in the record. */
+                               if (fpi_len)
+                                       *fpi_len += XLogRecGetBlock(record, block_id)->bimg_len;
+
+                               if (BKPIMAGE_COMPRESSED(bimg_info))
+                               {
+                                       const char *method;
+
+                                       if ((bimg_info & BKPIMAGE_COMPRESS_PGLZ) != 0)
+                                               method = "pglz";
+                                       else if ((bimg_info & BKPIMAGE_COMPRESS_LZ4) != 0)
+                                               method = "lz4";
+                                       else if ((bimg_info & BKPIMAGE_COMPRESS_ZSTD) != 0)
+                                               method = "zstd";
+                                       else
+                                               method = "unknown";
+
+                                       appendStringInfo(buf,
+                                                                        " (FPW%s); hole: offset: %u, length: %u, "
+                                                                        "compression saved: %u, method: %s",
+                                                                        XLogRecBlockImageApply(record, block_id) ?
+                                                                        "" : " for WAL verification",
+                                                                        XLogRecGetBlock(record, block_id)->hole_offset,
+                                                                        XLogRecGetBlock(record, block_id)->hole_length,
+                                                                        BLCKSZ -
+                                                                        XLogRecGetBlock(record, block_id)->hole_length -
+                                                                        XLogRecGetBlock(record, block_id)->bimg_len,
+                                                                        method);
+                               }
+                               else
+                               {
+                                       appendStringInfo(buf,
+                                                                        " (FPW%s); hole: offset: %u, length: %u",
+                                                                        XLogRecBlockImageApply(record, block_id) ?
+                                                                        "" : " for WAL verification",
+                                                                        XLogRecGetBlock(record, block_id)->hole_offset,
+                                                                        XLogRecGetBlock(record, block_id)->hole_length);
+                               }
+                       }
+
+                       if (pretty)
+                               appendStringInfoChar(buf, '\n');
+               }
+               else
+               {
+                       /* Get block references in short format. */
+
+                       if (forknum != MAIN_FORKNUM)
+                       {
+                               appendStringInfo(buf,
+                                                                ", blkref #%d: rel %u/%u/%u fork %s blk %u",
+                                                                block_id,
+                                                                rnode.spcNode, rnode.dbNode, rnode.relNode,
+                                                                forkNames[forknum],
+                                                                blk);
+                       }
+                       else
+                       {
+                               appendStringInfo(buf,
+                                                                ", blkref #%d: rel %u/%u/%u blk %u",
+                                                                block_id,
+                                                                rnode.spcNode, rnode.dbNode, rnode.relNode,
+                                                                blk);
+                       }
+
+                       if (XLogRecHasBlockImage(record, block_id))
+                       {
+                               /* Calculate the amount of FPI data in the record. */
+                               if (fpi_len)
+                                       *fpi_len += XLogRecGetBlock(record, block_id)->bimg_len;
+
+                               if (XLogRecBlockImageApply(record, block_id))
+                                       appendStringInfo(buf, " FPW");
+                               else
+                                       appendStringInfo(buf, " FPW for WAL verification");
+                       }
+               }
+       }
+
+       if (!detailed_format && pretty)
+               appendStringInfoChar(buf, '\n');
+}
index 8c17c88dfc4806f38bcda7cdf2514342527e08d6..3e5444a6f76980395b55db137cda5aab07455ede 100644 (file)
@@ -34,6 +34,7 @@ OBJS = \
        xlogprefetcher.o \
        xlogreader.o \
        xlogrecovery.o \
+       xlogstats.o \
        xlogutils.o
 
 include $(top_srcdir)/src/backend/common.mk
index 5862d9dc75f894779e9852b64c5fc1297047ef72..a5f1a648d3db238f60f5ebdd08d5a736c45efbcd 100644 (file)
@@ -1320,13 +1320,6 @@ XLogReaderValidatePageHeader(XLogReaderState *state, XLogRecPtr recptr,
        return true;
 }
 
-#ifdef FRONTEND
-/*
- * Functions that are currently not needed in the backend, but are better
- * implemented inside xlogreader.c because of the internal facilities available
- * here.
- */
-
 /*
  * Find the first record with an lsn >= RecPtr.
  *
@@ -1447,8 +1440,6 @@ err:
        return InvalidXLogRecPtr;
 }
 
-#endif                                                 /* FRONTEND */
-
 /*
  * Helper function to ease writing of XLogRoutine->page_read callbacks.
  * If this function is used, caller must supply a segment_open callback in
diff --git a/src/backend/access/transam/xlogstats.c b/src/backend/access/transam/xlogstats.c
new file mode 100644 (file)
index 0000000..aff3069
--- /dev/null
@@ -0,0 +1,93 @@
+/*-------------------------------------------------------------------------
+ *
+ * xlogstats.c
+ *             Functions for WAL Statitstics
+ *
+ * Copyright (c) 2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *             src/backend/access/transam/xlogstats.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/xlogreader.h"
+#include "access/xlogstats.h"
+
+/*
+ * Calculate the size of a record, split into !FPI and FPI parts.
+ */
+void
+XLogRecGetLen(XLogReaderState *record, uint32 *rec_len,
+                         uint32 *fpi_len)
+{
+       int     block_id;
+
+       /*
+        * Calculate the amount of FPI data in the record.
+        *
+        * XXX: We peek into xlogreader's private decoded backup blocks for the
+        * bimg_len indicating the length of FPI data.
+        */
+       *fpi_len = 0;
+       for (block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
+       {
+               if (XLogRecHasBlockImage(record, block_id))
+                       *fpi_len += XLogRecGetBlock(record, block_id)->bimg_len;
+       }
+
+       /*
+        * Calculate the length of the record as the total length - the length of
+        * all the block images.
+        */
+       *rec_len = XLogRecGetTotalLen(record) - *fpi_len;
+}
+
+/*
+ * Store per-rmgr and per-record statistics for a given record.
+ */
+void
+XLogRecStoreStats(XLogStats *stats, XLogReaderState *record)
+{
+       RmgrId  rmid;
+       uint8   recid;
+       uint32  rec_len;
+       uint32  fpi_len;
+
+       Assert(stats != NULL && record != NULL);
+
+       stats->count++;
+
+       rmid = XLogRecGetRmid(record);
+
+       XLogRecGetLen(record, &rec_len, &fpi_len);
+
+       /* Update per-rmgr statistics */
+
+       stats->rmgr_stats[rmid].count++;
+       stats->rmgr_stats[rmid].rec_len += rec_len;
+       stats->rmgr_stats[rmid].fpi_len += fpi_len;
+
+       /*
+        * Update per-record statistics, where the record is identified by a
+        * combination of the RmgrId and the four bits of the xl_info field that
+        * are the rmgr's domain (resulting in sixteen possible entries per
+        * RmgrId).
+        */
+
+       recid = XLogRecGetInfo(record) >> 4;
+
+       /*
+        * XACT records need to be handled differently. Those records use the
+        * first bit of those four bits for an optional flag variable and the
+        * following three bits for the opcode. We filter opcode out of xl_info
+        * and use it as the identifier of the record.
+        */
+       if (rmid == RM_XACT_ID)
+               recid &= 0x07;
+
+       stats->record_stats[rmid][recid].count++;
+       stats->record_stats[rmid][recid].rec_len += rec_len;
+       stats->record_stats[rmid][recid].fpi_len += fpi_len;
+}
index bb2d3ec991c52041df56dcd162553d20608ff6f0..b5d34c61e6669d42f2b177f8a049122dce9b056a 100644 (file)
@@ -80,6 +80,10 @@ typedef struct xl_invalid_page
 
 static HTAB *invalid_page_tab = NULL;
 
+static int
+read_local_xlog_page_guts(XLogReaderState *state, XLogRecPtr targetPagePtr,
+                                                 int reqLen, XLogRecPtr targetRecPtr,
+                                                 char *cur_page, bool wait_for_wal);
 
 /* Report a reference to an invalid page */
 static void
@@ -870,6 +874,31 @@ wal_segment_close(XLogReaderState *state)
 int
 read_local_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr,
                                         int reqLen, XLogRecPtr targetRecPtr, char *cur_page)
+{
+       return read_local_xlog_page_guts(state, targetPagePtr, reqLen,
+                                                                        targetRecPtr, cur_page, true);
+}
+
+/*
+ * Same as read_local_xlog_page except that it doesn't wait for future WAL
+ * to be available.
+ */
+int
+read_local_xlog_page_no_wait(XLogReaderState *state, XLogRecPtr targetPagePtr,
+                                                        int reqLen, XLogRecPtr targetRecPtr,
+                                                        char *cur_page)
+{
+       return read_local_xlog_page_guts(state, targetPagePtr, reqLen,
+                                                                        targetRecPtr, cur_page, false);
+}
+
+/*
+ * Implementation of read_local_xlog_page and its no wait version.
+ */
+static int
+read_local_xlog_page_guts(XLogReaderState *state, XLogRecPtr targetPagePtr,
+                                                 int reqLen, XLogRecPtr targetRecPtr,
+                                                 char *cur_page, bool wait_for_wal)
 {
        XLogRecPtr      read_upto,
                                loc;
@@ -925,6 +954,10 @@ read_local_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr,
                        if (loc <= read_upto)
                                break;
 
+                       /* If asked, let's not wait for future WAL. */
+                       if (!wait_for_wal)
+                               break;
+
                        CHECK_FOR_INTERRUPTS();
                        pg_usleep(1000L);
                }
index 3be00a8b61f9c97640efcf9a6ffb9088c65a11ac..dabb6e34b6f4b8d67ec286245f94b0412d6ae81d 100644 (file)
@@ -23,6 +23,7 @@
 /xactdesc.c
 /xlogdesc.c
 /xlogreader.c
+/xlogstat.c
 
 # Generated by test suite
 /tmp_check/
index 9f333d0c8ab10569d4264e99086267898d8e6f0a..d6459e17c73593286cb0a5d807ea4fde4a4f1a29 100644 (file)
@@ -13,7 +13,8 @@ OBJS = \
        compat.o \
        pg_waldump.o \
        rmgrdesc.o \
-       xlogreader.o
+       xlogreader.o \
+       xlogstats.o
 
 override CPPFLAGS := -DFRONTEND $(CPPFLAGS)
 
@@ -29,6 +30,9 @@ pg_waldump: $(OBJS) | submake-libpgport
 xlogreader.c: % : $(top_srcdir)/src/backend/access/transam/%
        rm -f $@ && $(LN_S) $< .
 
+xlogstats.c: % : $(top_srcdir)/src/backend/access/transam/%
+       rm -f $@ && $(LN_S) $< .
+
 $(RMGRDESCSOURCES): % : $(top_srcdir)/src/backend/access/rmgrdesc/%
        rm -f $@ && $(LN_S) $< .
 
@@ -42,7 +46,7 @@ uninstall:
        rm -f '$(DESTDIR)$(bindir)/pg_waldump$(X)'
 
 clean distclean maintainer-clean:
-       rm -f pg_waldump$(X) $(OBJS) $(RMGRDESCSOURCES) xlogreader.c
+       rm -f pg_waldump$(X) $(OBJS) $(RMGRDESCSOURCES) xlogreader.c xlogstats.c
        rm -rf tmp_check
 
 check:
index 30ca7684bd080baae567a504b02043e6ff7b4b02..8bf6899d6782e5a10ebd98fc418496d10b2fede1 100644 (file)
 #include "access/xlog_internal.h"
 #include "access/xlogreader.h"
 #include "access/xlogrecord.h"
+#include "access/xlogstats.h"
 #include "common/fe_memutils.h"
 #include "common/logging.h"
 #include "getopt_long.h"
 #include "rmgrdesc.h"
 
+/*
+ * NOTE: For any code change or issue fix here, it is highly recommended to
+ * give a thought about doing the same in pg_walinspect contrib module as well.
+ */
+
 static const char *progname;
 
 static int     WalSegSz;
@@ -66,24 +72,6 @@ typedef struct XLogDumpConfig
        bool            filter_by_fpw;
 } XLogDumpConfig;
 
-typedef struct Stats
-{
-       uint64          count;
-       uint64          rec_len;
-       uint64          fpi_len;
-} Stats;
-
-#define MAX_XLINFO_TYPES 16
-
-typedef struct XLogDumpStats
-{
-       uint64          count;
-       XLogRecPtr      startptr;
-       XLogRecPtr      endptr;
-       Stats           rmgr_stats[RM_MAX_ID + 1];
-       Stats           record_stats[RM_MAX_ID + 1][MAX_XLINFO_TYPES];
-} XLogDumpStats;
-
 #define fatal_error(...) do { pg_log_fatal(__VA_ARGS__); exit(EXIT_FAILURE); } while(0)
 
 /*
@@ -453,81 +441,6 @@ XLogRecordHasFPW(XLogReaderState *record)
        return false;
 }
 
-/*
- * Calculate the size of a record, split into !FPI and FPI parts.
- */
-static void
-XLogDumpRecordLen(XLogReaderState *record, uint32 *rec_len, uint32 *fpi_len)
-{
-       int                     block_id;
-
-       /*
-        * Calculate the amount of FPI data in the record.
-        *
-        * XXX: We peek into xlogreader's private decoded backup blocks for the
-        * bimg_len indicating the length of FPI data.
-        */
-       *fpi_len = 0;
-       for (block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
-       {
-               if (XLogRecHasBlockImage(record, block_id))
-                       *fpi_len += XLogRecGetBlock(record, block_id)->bimg_len;
-       }
-
-       /*
-        * Calculate the length of the record as the total length - the length of
-        * all the block images.
-        */
-       *rec_len = XLogRecGetTotalLen(record) - *fpi_len;
-}
-
-/*
- * Store per-rmgr and per-record statistics for a given record.
- */
-static void
-XLogDumpCountRecord(XLogDumpConfig *config, XLogDumpStats *stats,
-                                       XLogReaderState *record)
-{
-       RmgrId          rmid;
-       uint8           recid;
-       uint32          rec_len;
-       uint32          fpi_len;
-
-       stats->count++;
-
-       rmid = XLogRecGetRmid(record);
-
-       XLogDumpRecordLen(record, &rec_len, &fpi_len);
-
-       /* Update per-rmgr statistics */
-
-       stats->rmgr_stats[rmid].count++;
-       stats->rmgr_stats[rmid].rec_len += rec_len;
-       stats->rmgr_stats[rmid].fpi_len += fpi_len;
-
-       /*
-        * Update per-record statistics, where the record is identified by a
-        * combination of the RmgrId and the four bits of the xl_info field that
-        * are the rmgr's domain (resulting in sixteen possible entries per
-        * RmgrId).
-        */
-
-       recid = XLogRecGetInfo(record) >> 4;
-
-       /*
-        * XACT records need to be handled differently. Those records use the
-        * first bit of those four bits for an optional flag variable and the
-        * following three bits for the opcode. We filter opcode out of xl_info
-        * and use it as the identifier of the record.
-        */
-       if (rmid == RM_XACT_ID)
-               recid &= 0x07;
-
-       stats->record_stats[rmid][recid].count++;
-       stats->record_stats[rmid][recid].rec_len += rec_len;
-       stats->record_stats[rmid][recid].fpi_len += fpi_len;
-}
-
 /*
  * Print a record to stdout
  */
@@ -538,15 +451,11 @@ XLogDumpDisplayRecord(XLogDumpConfig *config, XLogReaderState *record)
        const RmgrDescData *desc = GetRmgrDesc(XLogRecGetRmid(record));
        uint32          rec_len;
        uint32          fpi_len;
-       RelFileNode rnode;
-       ForkNumber      forknum;
-       BlockNumber blk;
-       int                     block_id;
        uint8           info = XLogRecGetInfo(record);
        XLogRecPtr      xl_prev = XLogRecGetPrev(record);
        StringInfoData s;
 
-       XLogDumpRecordLen(record, &rec_len, &fpi_len);
+       XLogRecGetLen(record, &rec_len, &fpi_len);
 
        printf("rmgr: %-11s len (rec/tot): %6u/%6u, tx: %10u, lsn: %X/%08X, prev %X/%08X, ",
                   desc->rm_name,
@@ -564,93 +473,11 @@ XLogDumpDisplayRecord(XLogDumpConfig *config, XLogReaderState *record)
        initStringInfo(&s);
        desc->rm_desc(&s, record);
        printf("%s", s.data);
-       pfree(s.data);
-
-       if (!config->bkp_details)
-       {
-               /* print block references (short format) */
-               for (block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
-               {
-                       if (!XLogRecHasBlockRef(record, block_id))
-                               continue;
-
-                       XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blk);
-                       if (forknum != MAIN_FORKNUM)
-                               printf(", blkref #%d: rel %u/%u/%u fork %s blk %u",
-                                          block_id,
-                                          rnode.spcNode, rnode.dbNode, rnode.relNode,
-                                          forkNames[forknum],
-                                          blk);
-                       else
-                               printf(", blkref #%d: rel %u/%u/%u blk %u",
-                                          block_id,
-                                          rnode.spcNode, rnode.dbNode, rnode.relNode,
-                                          blk);
-                       if (XLogRecHasBlockImage(record, block_id))
-                       {
-                               if (XLogRecBlockImageApply(record, block_id))
-                                       printf(" FPW");
-                               else
-                                       printf(" FPW for WAL verification");
-                       }
-               }
-               putchar('\n');
-       }
-       else
-       {
-               /* print block references (detailed format) */
-               putchar('\n');
-               for (block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
-               {
-                       if (!XLogRecHasBlockRef(record, block_id))
-                               continue;
-
-                       XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blk);
-                       printf("\tblkref #%d: rel %u/%u/%u fork %s blk %u",
-                                  block_id,
-                                  rnode.spcNode, rnode.dbNode, rnode.relNode,
-                                  forkNames[forknum],
-                                  blk);
-                       if (XLogRecHasBlockImage(record, block_id))
-                       {
-                               uint8           bimg_info = XLogRecGetBlock(record, block_id)->bimg_info;
 
-                               if (BKPIMAGE_COMPRESSED(bimg_info))
-                               {
-                                       const char *method;
-
-                                       if ((bimg_info & BKPIMAGE_COMPRESS_PGLZ) != 0)
-                                               method = "pglz";
-                                       else if ((bimg_info & BKPIMAGE_COMPRESS_LZ4) != 0)
-                                               method = "lz4";
-                                       else if ((bimg_info & BKPIMAGE_COMPRESS_ZSTD) != 0)
-                                               method = "zstd";
-                                       else
-                                               method = "unknown";
-
-                                       printf(" (FPW%s); hole: offset: %u, length: %u, "
-                                                  "compression saved: %u, method: %s",
-                                                  XLogRecBlockImageApply(record, block_id) ?
-                                                  "" : " for WAL verification",
-                                                  XLogRecGetBlock(record, block_id)->hole_offset,
-                                                  XLogRecGetBlock(record, block_id)->hole_length,
-                                                  BLCKSZ -
-                                                  XLogRecGetBlock(record, block_id)->hole_length -
-                                                  XLogRecGetBlock(record, block_id)->bimg_len,
-                                                  method);
-                               }
-                               else
-                               {
-                                       printf(" (FPW%s); hole: offset: %u, length: %u",
-                                                  XLogRecBlockImageApply(record, block_id) ?
-                                                  "" : " for WAL verification",
-                                                  XLogRecGetBlock(record, block_id)->hole_offset,
-                                                  XLogRecGetBlock(record, block_id)->hole_length);
-                               }
-                       }
-                       putchar('\n');
-               }
-       }
+       resetStringInfo(&s);
+       XLogRecGetBlockRefInfo(record, true, config->bkp_details, &s, NULL);
+       printf("%s", s.data);
+       pfree(s.data);
 }
 
 /*
@@ -698,7 +525,7 @@ XLogDumpStatsRow(const char *name,
  * Display summary statistics about the records seen so far.
  */
 static void
-XLogDumpDisplayStats(XLogDumpConfig *config, XLogDumpStats *stats)
+XLogDumpDisplayStats(XLogDumpConfig *config, XLogStats *stats)
 {
        int                     ri,
                                rj;
@@ -722,6 +549,9 @@ XLogDumpDisplayStats(XLogDumpConfig *config, XLogDumpStats *stats)
 
        for (ri = 0; ri <= RM_MAX_ID; ri++)
        {
+               if (!RmgrIdIsValid(ri))
+                       continue;
+
                total_count += stats->rmgr_stats[ri].count;
                total_rec_len += stats->rmgr_stats[ri].rec_len;
                total_fpi_len += stats->rmgr_stats[ri].fpi_len;
@@ -867,7 +697,7 @@ main(int argc, char **argv)
        XLogReaderState *xlogreader_state;
        XLogDumpPrivate private;
        XLogDumpConfig config;
-       XLogDumpStats stats;
+       XLogStats stats;
        XLogRecord *record;
        XLogRecPtr      first_record;
        char       *waldir = NULL;
@@ -921,7 +751,7 @@ main(int argc, char **argv)
 
        memset(&private, 0, sizeof(XLogDumpPrivate));
        memset(&config, 0, sizeof(XLogDumpConfig));
-       memset(&stats, 0, sizeof(XLogDumpStats));
+       memset(&stats, 0, sizeof(XLogStats));
 
        private.timeline = 1;
        private.startptr = InvalidXLogRecPtr;
@@ -1319,7 +1149,7 @@ main(int argc, char **argv)
                {
                        if (config.stats == true)
                        {
-                               XLogDumpCountRecord(&config, &stats, xlogreader_state);
+                               XLogRecStoreStats(&stats, xlogreader_state);
                                stats.endptr = xlogreader_state->EndRecPtr;
                        }
                        else
index e302bd102cd8162c846592939fd3c58910f8bef8..5e1e3446aedc7dd52cb6c0dd1bae089017643d6e 100644 (file)
@@ -31,7 +31,7 @@ extern XLogRecPtr XactLastRecEnd;
 extern PGDLLIMPORT XLogRecPtr XactLastCommitEnd;
 
 /* these variables are GUC parameters related to XLOG */
-extern int     wal_segment_size;
+extern PGDLLIMPORT int wal_segment_size;
 extern int     min_wal_size_mb;
 extern int     max_wal_size_mb;
 extern int     wal_keep_size_mb;
index d9df7f295d54ab12fe8ebf7a819c5593c3504dee..750f6341205215d5065343bbe851b20d135df3af 100644 (file)
@@ -320,7 +320,7 @@ typedef struct RmgrData
                                                          struct XLogRecordBuffer *buf);
 } RmgrData;
 
-extern RmgrData RmgrTable[];
+extern PGDLLIMPORT RmgrData RmgrTable[];
 extern void RmgrStartup(void);
 extern void RmgrCleanup(void);
 extern void RmgrNotFound(RmgrId rmid);
@@ -350,6 +350,10 @@ extern XLogRecPtr RequestXLogSwitch(bool mark_unimportant);
 
 extern void GetOldestRestartPoint(XLogRecPtr *oldrecptr, TimeLineID *oldtli);
 
+extern void XLogRecGetBlockRefInfo(XLogReaderState *record, bool pretty,
+                                                                  bool detailed_format, StringInfo buf,
+                                                                  uint32 *fpi_len);
+
 /*
  * Exported for the functions in timeline.c and xlogarchive.c.  Only valid
  * in the startup process.
index d8eb857611040de2f0f471ab14d9770dad2593ea..727e9fe971838ff95b9ed781f87c51bf382ecec5 100644 (file)
@@ -344,9 +344,7 @@ extern void XLogReaderSetDecodeBuffer(XLogReaderState *state,
 
 /* Position the XLogReader to given record */
 extern void XLogBeginRead(XLogReaderState *state, XLogRecPtr RecPtr);
-#ifdef FRONTEND
 extern XLogRecPtr XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr);
-#endif                                                 /* FRONTEND */
 
 /* Return values from XLogPageReadCB. */
 typedef enum XLogPageReadResult
diff --git a/src/include/access/xlogstats.h b/src/include/access/xlogstats.h
new file mode 100644 (file)
index 0000000..453bb1f
--- /dev/null
@@ -0,0 +1,40 @@
+/*-------------------------------------------------------------------------
+ *
+ * xlogstats.h
+ *             Definitions for WAL Statitstics
+ *
+ * Copyright (c) 2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *             src/include/access/xlogstats.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef XLOGSTATS_H
+#define XLOGSTATS_H
+
+#define MAX_XLINFO_TYPES 16
+
+typedef struct XLogRecStats
+{
+       uint64  count;
+       uint64  rec_len;
+       uint64  fpi_len;
+} XLogRecStats;
+
+typedef struct XLogStats
+{
+       uint64  count;
+#ifdef FRONTEND
+       XLogRecPtr      startptr;
+       XLogRecPtr      endptr;
+#endif
+       XLogRecStats    rmgr_stats[RM_MAX_ID + 1];
+       XLogRecStats    record_stats[RM_MAX_ID + 1][MAX_XLINFO_TYPES];
+} XLogStats;
+
+extern void XLogRecGetLen(XLogReaderState *record, uint32 *rec_len,
+                                                 uint32 *fpi_len);
+extern void XLogRecStoreStats(XLogStats *stats, XLogReaderState *record);
+
+#endif                                                 /* XLOGSTATS_H */
index ff40f96e423ed2d582cf462a2229b7656ac3ceb9..3746e31e40f75609cf58f8c5e2ecaeb080823263 100644 (file)
@@ -93,6 +93,10 @@ extern void FreeFakeRelcacheEntry(Relation fakerel);
 extern int     read_local_xlog_page(XLogReaderState *state,
                                                                 XLogRecPtr targetPagePtr, int reqLen,
                                                                 XLogRecPtr targetRecPtr, char *cur_page);
+extern int     read_local_xlog_page_no_wait(XLogReaderState *state,
+                                                                                XLogRecPtr targetPagePtr, int reqLen,
+                                                                                XLogRecPtr targetRecPtr,
+                                                                                char *cur_page);
 extern void wal_segment_open(XLogReaderState *state,
                                                         XLogSegNo nextSegNo,
                                                         TimeLineID *tli_p);