summaryrefslogtreecommitdiff
path: root/src/bin
diff options
context:
space:
mode:
authorRobert Haas2022-09-27 17:25:21 +0000
committerRobert Haas2022-09-27 17:25:21 +0000
commit05d4cbf9b6ba708858984b01ca0fc56d59d4ec7c (patch)
tree645e3ac17f002ae33e086dbf871c330986452c35 /src/bin
parent2f47715cc8649f854b1df28dfc338af9801db217 (diff)
Increase width of RelFileNumbers from 32 bits to 56 bits.
RelFileNumbers are now assigned using a separate counter, instead of being assigned from the OID counter. This counter never wraps around: if all 2^56 possible RelFileNumbers are used, an internal error occurs. As the cluster is limited to 2^64 total bytes of WAL, this limitation should not cause a problem in practice. If the counter were 64 bits wide rather than 56 bits wide, we would need to increase the width of the BufferTag, which might adversely impact buffer lookup performance. Also, this lets us use bigint for pg_class.relfilenode and other places where these values are exposed at the SQL level without worrying about overflow. This should remove the need to keep "tombstone" files around until the next checkpoint when relations are removed. We do that to keep RelFileNumbers from being recycled, but now that won't happen anyway. However, this patch doesn't actually change anything in this area; it just makes it possible for a future patch to do so. Dilip Kumar, based on an idea from Andres Freund, who also reviewed some earlier versions of the patch. Further review and some wordsmithing by me. Also reviewed at various points by Ashutosh Sharma, Vignesh C, Amul Sul, Álvaro Herrera, and Tom Lane. Discussion: http://postgr.es/m/CA+Tgmobp7+7kmi4gkq7Y+4AM9fTvL+O1oQ4-5gFTT+6Ng-dQ=g@mail.gmail.com
Diffstat (limited to 'src/bin')
-rw-r--r--src/bin/pg_checksums/pg_checksums.c4
-rw-r--r--src/bin/pg_controldata/pg_controldata.c2
-rw-r--r--src/bin/pg_dump/pg_dump.c26
-rw-r--r--src/bin/pg_rewind/filemap.c6
-rw-r--r--src/bin/pg_upgrade/info.c3
-rw-r--r--src/bin/pg_upgrade/pg_upgrade.c6
-rw-r--r--src/bin/pg_upgrade/relfilenumber.c4
-rw-r--r--src/bin/pg_waldump/pg_waldump.c2
-rw-r--r--src/bin/scripts/t/090_reindexdb.pl2
9 files changed, 27 insertions, 28 deletions
diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c
index 324ccf77834..ddb5ec117f2 100644
--- a/src/bin/pg_checksums/pg_checksums.c
+++ b/src/bin/pg_checksums/pg_checksums.c
@@ -485,9 +485,7 @@ main(int argc, char *argv[])
mode = PG_MODE_ENABLE;
break;
case 'f':
- if (!option_parse_int(optarg, "-f/--filenode", 0,
- INT_MAX,
- NULL))
+ if (!option_parse_relfilenumber(optarg, "-f/--filenode"))
exit(1);
only_filenode = pstrdup(optarg);
break;
diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c
index c390ec51ce9..2f0e91fc2f9 100644
--- a/src/bin/pg_controldata/pg_controldata.c
+++ b/src/bin/pg_controldata/pg_controldata.c
@@ -250,6 +250,8 @@ main(int argc, char *argv[])
printf(_("Latest checkpoint's NextXID: %u:%u\n"),
EpochFromFullTransactionId(ControlFile->checkPointCopy.nextXid),
XidFromFullTransactionId(ControlFile->checkPointCopy.nextXid));
+ printf(_("Latest checkpoint's NextRelFileNumber:%llu\n"),
+ (unsigned long long) ControlFile->checkPointCopy.nextRelFileNumber);
printf(_("Latest checkpoint's NextOID: %u\n"),
ControlFile->checkPointCopy.nextOid);
printf(_("Latest checkpoint's NextMultiXactId: %u\n"),
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index bd9b066e4eb..9f78971cab5 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -3184,15 +3184,15 @@ dumpDatabase(Archive *fout)
atooid(PQgetvalue(lo_res, i, ii_oid)));
oid = atooid(PQgetvalue(lo_res, i, ii_oid));
- relfilenumber = atooid(PQgetvalue(lo_res, i, ii_relfilenode));
+ relfilenumber = atorelnumber(PQgetvalue(lo_res, i, ii_relfilenode));
if (oid == LargeObjectRelationId)
appendPQExpBuffer(loOutQry,
- "SELECT pg_catalog.binary_upgrade_set_next_heap_relfilenode('%u'::pg_catalog.oid);\n",
+ "SELECT pg_catalog.binary_upgrade_set_next_heap_relfilenode('" UINT64_FORMAT "'::pg_catalog.int8);\n",
relfilenumber);
else if (oid == LargeObjectLOidPNIndexId)
appendPQExpBuffer(loOutQry,
- "SELECT pg_catalog.binary_upgrade_set_next_index_relfilenode('%u'::pg_catalog.oid);\n",
+ "SELECT pg_catalog.binary_upgrade_set_next_index_relfilenode('" UINT64_FORMAT "'::pg_catalog.int8);\n",
relfilenumber);
}
@@ -4877,16 +4877,16 @@ binary_upgrade_set_pg_class_oids(Archive *fout,
relkind = *PQgetvalue(upgrade_res, 0, PQfnumber(upgrade_res, "relkind"));
- relfilenumber = atooid(PQgetvalue(upgrade_res, 0,
- PQfnumber(upgrade_res, "relfilenode")));
+ relfilenumber = atorelnumber(PQgetvalue(upgrade_res, 0,
+ PQfnumber(upgrade_res, "relfilenode")));
toast_oid = atooid(PQgetvalue(upgrade_res, 0,
PQfnumber(upgrade_res, "reltoastrelid")));
- toast_relfilenumber = atooid(PQgetvalue(upgrade_res, 0,
- PQfnumber(upgrade_res, "toast_relfilenode")));
+ toast_relfilenumber = atorelnumber(PQgetvalue(upgrade_res, 0,
+ PQfnumber(upgrade_res, "toast_relfilenode")));
toast_index_oid = atooid(PQgetvalue(upgrade_res, 0,
PQfnumber(upgrade_res, "indexrelid")));
- toast_index_relfilenumber = atooid(PQgetvalue(upgrade_res, 0,
- PQfnumber(upgrade_res, "toast_index_relfilenode")));
+ toast_index_relfilenumber = atorelnumber(PQgetvalue(upgrade_res, 0,
+ PQfnumber(upgrade_res, "toast_index_relfilenode")));
appendPQExpBufferStr(upgrade_buffer,
"\n-- For binary upgrade, must preserve pg_class oids and relfilenodes\n");
@@ -4904,7 +4904,7 @@ binary_upgrade_set_pg_class_oids(Archive *fout,
*/
if (RelFileNumberIsValid(relfilenumber) && relkind != RELKIND_PARTITIONED_TABLE)
appendPQExpBuffer(upgrade_buffer,
- "SELECT pg_catalog.binary_upgrade_set_next_heap_relfilenode('%u'::pg_catalog.oid);\n",
+ "SELECT pg_catalog.binary_upgrade_set_next_heap_relfilenode('" UINT64_FORMAT "'::pg_catalog.int8);\n",
relfilenumber);
/*
@@ -4918,7 +4918,7 @@ binary_upgrade_set_pg_class_oids(Archive *fout,
"SELECT pg_catalog.binary_upgrade_set_next_toast_pg_class_oid('%u'::pg_catalog.oid);\n",
toast_oid);
appendPQExpBuffer(upgrade_buffer,
- "SELECT pg_catalog.binary_upgrade_set_next_toast_relfilenode('%u'::pg_catalog.oid);\n",
+ "SELECT pg_catalog.binary_upgrade_set_next_toast_relfilenode('" UINT64_FORMAT "'::pg_catalog.int8);\n",
toast_relfilenumber);
/* every toast table has an index */
@@ -4926,7 +4926,7 @@ binary_upgrade_set_pg_class_oids(Archive *fout,
"SELECT pg_catalog.binary_upgrade_set_next_index_pg_class_oid('%u'::pg_catalog.oid);\n",
toast_index_oid);
appendPQExpBuffer(upgrade_buffer,
- "SELECT pg_catalog.binary_upgrade_set_next_index_relfilenode('%u'::pg_catalog.oid);\n",
+ "SELECT pg_catalog.binary_upgrade_set_next_index_relfilenode('" UINT64_FORMAT "'::pg_catalog.int8);\n",
toast_index_relfilenumber);
}
@@ -4939,7 +4939,7 @@ binary_upgrade_set_pg_class_oids(Archive *fout,
"SELECT pg_catalog.binary_upgrade_set_next_index_pg_class_oid('%u'::pg_catalog.oid);\n",
pg_class_oid);
appendPQExpBuffer(upgrade_buffer,
- "SELECT pg_catalog.binary_upgrade_set_next_index_relfilenode('%u'::pg_catalog.oid);\n",
+ "SELECT pg_catalog.binary_upgrade_set_next_index_relfilenode('" UINT64_FORMAT "'::pg_catalog.int8);\n",
relfilenumber);
}
diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c
index 269ed6446e6..197ec0eac91 100644
--- a/src/bin/pg_rewind/filemap.c
+++ b/src/bin/pg_rewind/filemap.c
@@ -538,7 +538,7 @@ isRelDataFile(const char *path)
segNo = 0;
matched = false;
- nmatch = sscanf(path, "global/%u.%u", &rlocator.relNumber, &segNo);
+ nmatch = sscanf(path, "global/" UINT64_FORMAT ".%u", &rlocator.relNumber, &segNo);
if (nmatch == 1 || nmatch == 2)
{
rlocator.spcOid = GLOBALTABLESPACE_OID;
@@ -547,7 +547,7 @@ isRelDataFile(const char *path)
}
else
{
- nmatch = sscanf(path, "base/%u/%u.%u",
+ nmatch = sscanf(path, "base/%u/" UINT64_FORMAT ".%u",
&rlocator.dbOid, &rlocator.relNumber, &segNo);
if (nmatch == 2 || nmatch == 3)
{
@@ -556,7 +556,7 @@ isRelDataFile(const char *path)
}
else
{
- nmatch = sscanf(path, "pg_tblspc/%u/" TABLESPACE_VERSION_DIRECTORY "/%u/%u.%u",
+ nmatch = sscanf(path, "pg_tblspc/%u/" TABLESPACE_VERSION_DIRECTORY "/%u/" UINT64_FORMAT ".%u",
&rlocator.spcOid, &rlocator.dbOid, &rlocator.relNumber,
&segNo);
if (nmatch == 3 || nmatch == 4)
diff --git a/src/bin/pg_upgrade/info.c b/src/bin/pg_upgrade/info.c
index f18cf971202..0c712a62669 100644
--- a/src/bin/pg_upgrade/info.c
+++ b/src/bin/pg_upgrade/info.c
@@ -527,7 +527,8 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
relname = PQgetvalue(res, relnum, i_relname);
curr->relname = pg_strdup(relname);
- curr->relfilenumber = atooid(PQgetvalue(res, relnum, i_relfilenumber));
+ curr->relfilenumber =
+ atorelnumber(PQgetvalue(res, relnum, i_relfilenumber));
curr->tblsp_alloc = false;
/* Is the tablespace oid non-default? */
diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c
index 115faa222e3..7ab1bcc9c8d 100644
--- a/src/bin/pg_upgrade/pg_upgrade.c
+++ b/src/bin/pg_upgrade/pg_upgrade.c
@@ -15,10 +15,8 @@
* oids are the same between old and new clusters. This is important
* because toast oids are stored as toast pointers in user tables.
*
- * While pg_class.oid and pg_class.relfilenode are initially the same in a
- * cluster, they can diverge due to CLUSTER, REINDEX, or VACUUM FULL. We
- * control assignments of pg_class.relfilenode because we want the filenames
- * to match between the old and new cluster.
+ * We control assignments of pg_class.relfilenode because we want the
+ * filenames to match between the old and new cluster.
*
* We control assignment of pg_tablespace.oid because we want the oid to match
* between the old and new cluster.
diff --git a/src/bin/pg_upgrade/relfilenumber.c b/src/bin/pg_upgrade/relfilenumber.c
index c3f3d6bc0af..529267d670a 100644
--- a/src/bin/pg_upgrade/relfilenumber.c
+++ b/src/bin/pg_upgrade/relfilenumber.c
@@ -190,14 +190,14 @@ transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_fro
else
snprintf(extent_suffix, sizeof(extent_suffix), ".%d", segno);
- snprintf(old_file, sizeof(old_file), "%s%s/%u/%u%s%s",
+ snprintf(old_file, sizeof(old_file), "%s%s/%u/" UINT64_FORMAT "%s%s",
map->old_tablespace,
map->old_tablespace_suffix,
map->db_oid,
map->relfilenumber,
type_suffix,
extent_suffix);
- snprintf(new_file, sizeof(new_file), "%s%s/%u/%u%s%s",
+ snprintf(new_file, sizeof(new_file), "%s%s/%u/" UINT64_FORMAT "%s%s",
map->new_tablespace,
map->new_tablespace_suffix,
map->db_oid,
diff --git a/src/bin/pg_waldump/pg_waldump.c b/src/bin/pg_waldump/pg_waldump.c
index 9993378ca58..6fdc7dcf529 100644
--- a/src/bin/pg_waldump/pg_waldump.c
+++ b/src/bin/pg_waldump/pg_waldump.c
@@ -884,7 +884,7 @@ main(int argc, char **argv)
}
break;
case 'R':
- if (sscanf(optarg, "%u/%u/%u",
+ if (sscanf(optarg, "%u/%u/" UINT64_FORMAT,
&config.filter_by_relation.spcOid,
&config.filter_by_relation.dbOid,
&config.filter_by_relation.relNumber) != 3 ||
diff --git a/src/bin/scripts/t/090_reindexdb.pl b/src/bin/scripts/t/090_reindexdb.pl
index e706d686e39..de5cee6fa08 100644
--- a/src/bin/scripts/t/090_reindexdb.pl
+++ b/src/bin/scripts/t/090_reindexdb.pl
@@ -40,7 +40,7 @@ my $toast_index = $node->safe_psql('postgres',
# REINDEX operations. A set of relfilenodes is saved from the catalogs
# and then compared with pg_class.
$node->safe_psql('postgres',
- 'CREATE TABLE index_relfilenodes (parent regclass, indname text, indoid oid, relfilenode oid);'
+ 'CREATE TABLE index_relfilenodes (parent regclass, indname text, indoid oid, relfilenode int8);'
);
# Save the relfilenode of a set of toast indexes, one from the catalog
# pg_constraint and one from the test table.