diff options
| author | Robert Haas | 2022-09-27 17:25:21 +0000 |
|---|---|---|
| committer | Robert Haas | 2022-09-27 17:25:21 +0000 |
| commit | 05d4cbf9b6ba708858984b01ca0fc56d59d4ec7c (patch) | |
| tree | 645e3ac17f002ae33e086dbf871c330986452c35 /src/bin | |
| parent | 2f47715cc8649f854b1df28dfc338af9801db217 (diff) | |
Increase width of RelFileNumbers from 32 bits to 56 bits.
RelFileNumbers are now assigned using a separate counter, instead of
being assigned from the OID counter. This counter never wraps around:
if all 2^56 possible RelFileNumbers are used, an internal error
occurs. As the cluster is limited to 2^64 total bytes of WAL, this
limitation should not cause a problem in practice.
If the counter were 64 bits wide rather than 56 bits wide, we would
need to increase the width of the BufferTag, which might adversely
impact buffer lookup performance. Also, this lets us use bigint for
pg_class.relfilenode and other places where these values are exposed
at the SQL level without worrying about overflow.
This should remove the need to keep "tombstone" files around until
the next checkpoint when relations are removed. We do that to keep
RelFileNumbers from being recycled, but now that won't happen
anyway. However, this patch doesn't actually change anything in
this area; it just makes it possible for a future patch to do so.
Dilip Kumar, based on an idea from Andres Freund, who also reviewed
some earlier versions of the patch. Further review and some
wordsmithing by me. Also reviewed at various points by Ashutosh
Sharma, Vignesh C, Amul Sul, Álvaro Herrera, and Tom Lane.
Discussion: http://postgr.es/m/CA+Tgmobp7+7kmi4gkq7Y+4AM9fTvL+O1oQ4-5gFTT+6Ng-dQ=g@mail.gmail.com
Diffstat (limited to 'src/bin')
| -rw-r--r-- | src/bin/pg_checksums/pg_checksums.c | 4 | ||||
| -rw-r--r-- | src/bin/pg_controldata/pg_controldata.c | 2 | ||||
| -rw-r--r-- | src/bin/pg_dump/pg_dump.c | 26 | ||||
| -rw-r--r-- | src/bin/pg_rewind/filemap.c | 6 | ||||
| -rw-r--r-- | src/bin/pg_upgrade/info.c | 3 | ||||
| -rw-r--r-- | src/bin/pg_upgrade/pg_upgrade.c | 6 | ||||
| -rw-r--r-- | src/bin/pg_upgrade/relfilenumber.c | 4 | ||||
| -rw-r--r-- | src/bin/pg_waldump/pg_waldump.c | 2 | ||||
| -rw-r--r-- | src/bin/scripts/t/090_reindexdb.pl | 2 |
9 files changed, 27 insertions, 28 deletions
diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c index 324ccf77834..ddb5ec117f2 100644 --- a/src/bin/pg_checksums/pg_checksums.c +++ b/src/bin/pg_checksums/pg_checksums.c @@ -485,9 +485,7 @@ main(int argc, char *argv[]) mode = PG_MODE_ENABLE; break; case 'f': - if (!option_parse_int(optarg, "-f/--filenode", 0, - INT_MAX, - NULL)) + if (!option_parse_relfilenumber(optarg, "-f/--filenode")) exit(1); only_filenode = pstrdup(optarg); break; diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c index c390ec51ce9..2f0e91fc2f9 100644 --- a/src/bin/pg_controldata/pg_controldata.c +++ b/src/bin/pg_controldata/pg_controldata.c @@ -250,6 +250,8 @@ main(int argc, char *argv[]) printf(_("Latest checkpoint's NextXID: %u:%u\n"), EpochFromFullTransactionId(ControlFile->checkPointCopy.nextXid), XidFromFullTransactionId(ControlFile->checkPointCopy.nextXid)); + printf(_("Latest checkpoint's NextRelFileNumber:%llu\n"), + (unsigned long long) ControlFile->checkPointCopy.nextRelFileNumber); printf(_("Latest checkpoint's NextOID: %u\n"), ControlFile->checkPointCopy.nextOid); printf(_("Latest checkpoint's NextMultiXactId: %u\n"), diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index bd9b066e4eb..9f78971cab5 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -3184,15 +3184,15 @@ dumpDatabase(Archive *fout) atooid(PQgetvalue(lo_res, i, ii_oid))); oid = atooid(PQgetvalue(lo_res, i, ii_oid)); - relfilenumber = atooid(PQgetvalue(lo_res, i, ii_relfilenode)); + relfilenumber = atorelnumber(PQgetvalue(lo_res, i, ii_relfilenode)); if (oid == LargeObjectRelationId) appendPQExpBuffer(loOutQry, - "SELECT pg_catalog.binary_upgrade_set_next_heap_relfilenode('%u'::pg_catalog.oid);\n", + "SELECT pg_catalog.binary_upgrade_set_next_heap_relfilenode('" UINT64_FORMAT "'::pg_catalog.int8);\n", relfilenumber); else if (oid == LargeObjectLOidPNIndexId) appendPQExpBuffer(loOutQry, - "SELECT pg_catalog.binary_upgrade_set_next_index_relfilenode('%u'::pg_catalog.oid);\n", + "SELECT pg_catalog.binary_upgrade_set_next_index_relfilenode('" UINT64_FORMAT "'::pg_catalog.int8);\n", relfilenumber); } @@ -4877,16 +4877,16 @@ binary_upgrade_set_pg_class_oids(Archive *fout, relkind = *PQgetvalue(upgrade_res, 0, PQfnumber(upgrade_res, "relkind")); - relfilenumber = atooid(PQgetvalue(upgrade_res, 0, - PQfnumber(upgrade_res, "relfilenode"))); + relfilenumber = atorelnumber(PQgetvalue(upgrade_res, 0, + PQfnumber(upgrade_res, "relfilenode"))); toast_oid = atooid(PQgetvalue(upgrade_res, 0, PQfnumber(upgrade_res, "reltoastrelid"))); - toast_relfilenumber = atooid(PQgetvalue(upgrade_res, 0, - PQfnumber(upgrade_res, "toast_relfilenode"))); + toast_relfilenumber = atorelnumber(PQgetvalue(upgrade_res, 0, + PQfnumber(upgrade_res, "toast_relfilenode"))); toast_index_oid = atooid(PQgetvalue(upgrade_res, 0, PQfnumber(upgrade_res, "indexrelid"))); - toast_index_relfilenumber = atooid(PQgetvalue(upgrade_res, 0, - PQfnumber(upgrade_res, "toast_index_relfilenode"))); + toast_index_relfilenumber = atorelnumber(PQgetvalue(upgrade_res, 0, + PQfnumber(upgrade_res, "toast_index_relfilenode"))); appendPQExpBufferStr(upgrade_buffer, "\n-- For binary upgrade, must preserve pg_class oids and relfilenodes\n"); @@ -4904,7 +4904,7 @@ binary_upgrade_set_pg_class_oids(Archive *fout, */ if (RelFileNumberIsValid(relfilenumber) && relkind != RELKIND_PARTITIONED_TABLE) appendPQExpBuffer(upgrade_buffer, - "SELECT pg_catalog.binary_upgrade_set_next_heap_relfilenode('%u'::pg_catalog.oid);\n", + "SELECT pg_catalog.binary_upgrade_set_next_heap_relfilenode('" UINT64_FORMAT "'::pg_catalog.int8);\n", relfilenumber); /* @@ -4918,7 +4918,7 @@ binary_upgrade_set_pg_class_oids(Archive *fout, "SELECT pg_catalog.binary_upgrade_set_next_toast_pg_class_oid('%u'::pg_catalog.oid);\n", toast_oid); appendPQExpBuffer(upgrade_buffer, - "SELECT pg_catalog.binary_upgrade_set_next_toast_relfilenode('%u'::pg_catalog.oid);\n", + "SELECT pg_catalog.binary_upgrade_set_next_toast_relfilenode('" UINT64_FORMAT "'::pg_catalog.int8);\n", toast_relfilenumber); /* every toast table has an index */ @@ -4926,7 +4926,7 @@ binary_upgrade_set_pg_class_oids(Archive *fout, "SELECT pg_catalog.binary_upgrade_set_next_index_pg_class_oid('%u'::pg_catalog.oid);\n", toast_index_oid); appendPQExpBuffer(upgrade_buffer, - "SELECT pg_catalog.binary_upgrade_set_next_index_relfilenode('%u'::pg_catalog.oid);\n", + "SELECT pg_catalog.binary_upgrade_set_next_index_relfilenode('" UINT64_FORMAT "'::pg_catalog.int8);\n", toast_index_relfilenumber); } @@ -4939,7 +4939,7 @@ binary_upgrade_set_pg_class_oids(Archive *fout, "SELECT pg_catalog.binary_upgrade_set_next_index_pg_class_oid('%u'::pg_catalog.oid);\n", pg_class_oid); appendPQExpBuffer(upgrade_buffer, - "SELECT pg_catalog.binary_upgrade_set_next_index_relfilenode('%u'::pg_catalog.oid);\n", + "SELECT pg_catalog.binary_upgrade_set_next_index_relfilenode('" UINT64_FORMAT "'::pg_catalog.int8);\n", relfilenumber); } diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c index 269ed6446e6..197ec0eac91 100644 --- a/src/bin/pg_rewind/filemap.c +++ b/src/bin/pg_rewind/filemap.c @@ -538,7 +538,7 @@ isRelDataFile(const char *path) segNo = 0; matched = false; - nmatch = sscanf(path, "global/%u.%u", &rlocator.relNumber, &segNo); + nmatch = sscanf(path, "global/" UINT64_FORMAT ".%u", &rlocator.relNumber, &segNo); if (nmatch == 1 || nmatch == 2) { rlocator.spcOid = GLOBALTABLESPACE_OID; @@ -547,7 +547,7 @@ isRelDataFile(const char *path) } else { - nmatch = sscanf(path, "base/%u/%u.%u", + nmatch = sscanf(path, "base/%u/" UINT64_FORMAT ".%u", &rlocator.dbOid, &rlocator.relNumber, &segNo); if (nmatch == 2 || nmatch == 3) { @@ -556,7 +556,7 @@ isRelDataFile(const char *path) } else { - nmatch = sscanf(path, "pg_tblspc/%u/" TABLESPACE_VERSION_DIRECTORY "/%u/%u.%u", + nmatch = sscanf(path, "pg_tblspc/%u/" TABLESPACE_VERSION_DIRECTORY "/%u/" UINT64_FORMAT ".%u", &rlocator.spcOid, &rlocator.dbOid, &rlocator.relNumber, &segNo); if (nmatch == 3 || nmatch == 4) diff --git a/src/bin/pg_upgrade/info.c b/src/bin/pg_upgrade/info.c index f18cf971202..0c712a62669 100644 --- a/src/bin/pg_upgrade/info.c +++ b/src/bin/pg_upgrade/info.c @@ -527,7 +527,8 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo) relname = PQgetvalue(res, relnum, i_relname); curr->relname = pg_strdup(relname); - curr->relfilenumber = atooid(PQgetvalue(res, relnum, i_relfilenumber)); + curr->relfilenumber = + atorelnumber(PQgetvalue(res, relnum, i_relfilenumber)); curr->tblsp_alloc = false; /* Is the tablespace oid non-default? */ diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c index 115faa222e3..7ab1bcc9c8d 100644 --- a/src/bin/pg_upgrade/pg_upgrade.c +++ b/src/bin/pg_upgrade/pg_upgrade.c @@ -15,10 +15,8 @@ * oids are the same between old and new clusters. This is important * because toast oids are stored as toast pointers in user tables. * - * While pg_class.oid and pg_class.relfilenode are initially the same in a - * cluster, they can diverge due to CLUSTER, REINDEX, or VACUUM FULL. We - * control assignments of pg_class.relfilenode because we want the filenames - * to match between the old and new cluster. + * We control assignments of pg_class.relfilenode because we want the + * filenames to match between the old and new cluster. * * We control assignment of pg_tablespace.oid because we want the oid to match * between the old and new cluster. diff --git a/src/bin/pg_upgrade/relfilenumber.c b/src/bin/pg_upgrade/relfilenumber.c index c3f3d6bc0af..529267d670a 100644 --- a/src/bin/pg_upgrade/relfilenumber.c +++ b/src/bin/pg_upgrade/relfilenumber.c @@ -190,14 +190,14 @@ transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_fro else snprintf(extent_suffix, sizeof(extent_suffix), ".%d", segno); - snprintf(old_file, sizeof(old_file), "%s%s/%u/%u%s%s", + snprintf(old_file, sizeof(old_file), "%s%s/%u/" UINT64_FORMAT "%s%s", map->old_tablespace, map->old_tablespace_suffix, map->db_oid, map->relfilenumber, type_suffix, extent_suffix); - snprintf(new_file, sizeof(new_file), "%s%s/%u/%u%s%s", + snprintf(new_file, sizeof(new_file), "%s%s/%u/" UINT64_FORMAT "%s%s", map->new_tablespace, map->new_tablespace_suffix, map->db_oid, diff --git a/src/bin/pg_waldump/pg_waldump.c b/src/bin/pg_waldump/pg_waldump.c index 9993378ca58..6fdc7dcf529 100644 --- a/src/bin/pg_waldump/pg_waldump.c +++ b/src/bin/pg_waldump/pg_waldump.c @@ -884,7 +884,7 @@ main(int argc, char **argv) } break; case 'R': - if (sscanf(optarg, "%u/%u/%u", + if (sscanf(optarg, "%u/%u/" UINT64_FORMAT, &config.filter_by_relation.spcOid, &config.filter_by_relation.dbOid, &config.filter_by_relation.relNumber) != 3 || diff --git a/src/bin/scripts/t/090_reindexdb.pl b/src/bin/scripts/t/090_reindexdb.pl index e706d686e39..de5cee6fa08 100644 --- a/src/bin/scripts/t/090_reindexdb.pl +++ b/src/bin/scripts/t/090_reindexdb.pl @@ -40,7 +40,7 @@ my $toast_index = $node->safe_psql('postgres', # REINDEX operations. A set of relfilenodes is saved from the catalogs # and then compared with pg_class. $node->safe_psql('postgres', - 'CREATE TABLE index_relfilenodes (parent regclass, indname text, indoid oid, relfilenode oid);' + 'CREATE TABLE index_relfilenodes (parent regclass, indname text, indoid oid, relfilenode int8);' ); # Save the relfilenode of a set of toast indexes, one from the catalog # pg_constraint and one from the test table. |
