PostgreSQL Source Code git master
pg_rewind.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * pg_rewind.c
4 * Synchronizes a PostgreSQL data directory to a new timeline
5 *
6 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 *
8 *-------------------------------------------------------------------------
9 */
10#include "postgres_fe.h"
11
12#include <sys/stat.h>
13#include <fcntl.h>
14#include <time.h>
15#include <unistd.h>
16
17#include "access/timeline.h"
19#include "catalog/catversion.h"
20#include "catalog/pg_control.h"
22#include "common/file_perm.h"
24#include "common/string.h"
28#include "file_ops.h"
29#include "filemap.h"
30#include "getopt_long.h"
31#include "pg_rewind.h"
32#include "rewind_source.h"
33#include "storage/bufpage.h"
34
35static void usage(const char *progname);
36
37static void perform_rewind(filemap_t *filemap, rewind_source *source,
38 XLogRecPtr chkptrec,
39 TimeLineID chkpttli,
40 XLogRecPtr chkptredo);
41
42static void createBackupLabel(XLogRecPtr startpoint, TimeLineID starttli,
43 XLogRecPtr checkpointloc);
44
46 const char *content, size_t size);
47static void getRestoreCommand(const char *argv0);
48static void sanityChecks(void);
49static TimeLineHistoryEntry *getTimelineHistory(TimeLineID tli, bool is_source,
50 int *nentries);
52 int a_nentries,
53 TimeLineHistoryEntry *b_history,
54 int b_nentries,
55 XLogRecPtr *recptr, int *tliIndex);
56static void ensureCleanShutdown(const char *argv0);
57static void disconnect_atexit(void);
58
62
63static const char *progname;
65
66/* Configuration options */
67char *datadir_target = NULL;
68static char *datadir_source = NULL;
69static char *connstr_source = NULL;
70static char *restore_command = NULL;
71static char *config_file = NULL;
72
73static bool debug = false;
74bool showprogress = false;
75bool dry_run = false;
76bool do_sync = true;
77static bool restore_wal = false;
79
80/* Target history */
83
84/* Progress counters */
87
88static PGconn *conn;
90
91static void
92usage(const char *progname)
93{
94 printf(_("%s resynchronizes a PostgreSQL cluster with another copy of the cluster.\n\n"), progname);
95 printf(_("Usage:\n %s [OPTION]...\n\n"), progname);
96 printf(_("Options:\n"));
97 printf(_(" -c, --restore-target-wal use \"restore_command\" in target configuration to\n"
98 " retrieve WAL files from archives\n"));
99 printf(_(" -D, --target-pgdata=DIRECTORY existing data directory to modify\n"));
100 printf(_(" --source-pgdata=DIRECTORY source data directory to synchronize with\n"));
101 printf(_(" --source-server=CONNSTR source server to synchronize with\n"));
102 printf(_(" -n, --dry-run stop before modifying anything\n"));
103 printf(_(" -N, --no-sync do not wait for changes to be written\n"
104 " safely to disk\n"));
105 printf(_(" -P, --progress write progress messages\n"));
106 printf(_(" -R, --write-recovery-conf write configuration for replication\n"
107 " (requires --source-server)\n"));
108 printf(_(" --config-file=FILENAME use specified main server configuration\n"
109 " file when running target cluster\n"));
110 printf(_(" --debug write a lot of debug messages\n"));
111 printf(_(" --no-ensure-shutdown do not automatically fix unclean shutdown\n"));
112 printf(_(" --sync-method=METHOD set method for syncing files to disk\n"));
113 printf(_(" -V, --version output version information, then exit\n"));
114 printf(_(" -?, --help show this help, then exit\n"));
115 printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
116 printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL);
117}
118
119
120int
121main(int argc, char **argv)
122{
123 static struct option long_options[] = {
124 {"help", no_argument, NULL, '?'},
125 {"target-pgdata", required_argument, NULL, 'D'},
126 {"write-recovery-conf", no_argument, NULL, 'R'},
127 {"source-pgdata", required_argument, NULL, 1},
128 {"source-server", required_argument, NULL, 2},
129 {"no-ensure-shutdown", no_argument, NULL, 4},
130 {"config-file", required_argument, NULL, 5},
131 {"version", no_argument, NULL, 'V'},
132 {"restore-target-wal", no_argument, NULL, 'c'},
133 {"dry-run", no_argument, NULL, 'n'},
134 {"no-sync", no_argument, NULL, 'N'},
135 {"progress", no_argument, NULL, 'P'},
136 {"debug", no_argument, NULL, 3},
137 {"sync-method", required_argument, NULL, 6},
138 {NULL, 0, NULL, 0}
139 };
140 int option_index;
141 int c;
142 XLogRecPtr divergerec;
143 int lastcommontliIndex;
144 XLogRecPtr chkptrec;
145 TimeLineID chkpttli;
146 XLogRecPtr chkptredo;
147 TimeLineID source_tli;
148 TimeLineID target_tli;
149 XLogRecPtr target_wal_endrec;
150 size_t size;
151 char *buffer;
152 bool no_ensure_shutdown = false;
153 bool rewind_needed;
154 bool writerecoveryconf = false;
155 filemap_t *filemap;
156
157 pg_logging_init(argv[0]);
158 set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_rewind"));
159 progname = get_progname(argv[0]);
160
161 /* Process command-line arguments */
162 if (argc > 1)
163 {
164 if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
165 {
167 exit(0);
168 }
169 if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
170 {
171 puts("pg_rewind (PostgreSQL) " PG_VERSION);
172 exit(0);
173 }
174 }
175
176 while ((c = getopt_long(argc, argv, "cD:nNPR", long_options, &option_index)) != -1)
177 {
178 switch (c)
179 {
180 case 'c':
181 restore_wal = true;
182 break;
183
184 case 'P':
185 showprogress = true;
186 break;
187
188 case 'n':
189 dry_run = true;
190 break;
191
192 case 'N':
193 do_sync = false;
194 break;
195
196 case 'R':
197 writerecoveryconf = true;
198 break;
199
200 case 3:
201 debug = true;
203 break;
204
205 case 'D': /* -D or --target-pgdata */
207 break;
208
209 case 1: /* --source-pgdata */
211 break;
212
213 case 2: /* --source-server */
215 break;
216
217 case 4:
218 no_ensure_shutdown = true;
219 break;
220
221 case 5:
223 break;
224
225 case 6:
227 exit(1);
228 break;
229
230 default:
231 /* getopt_long already emitted a complaint */
232 pg_log_error_hint("Try \"%s --help\" for more information.", progname);
233 exit(1);
234 }
235 }
236
237 if (datadir_source == NULL && connstr_source == NULL)
238 {
239 pg_log_error("no source specified (--source-pgdata or --source-server)");
240 pg_log_error_hint("Try \"%s --help\" for more information.", progname);
241 exit(1);
242 }
243
244 if (datadir_source != NULL && connstr_source != NULL)
245 {
246 pg_log_error("only one of --source-pgdata or --source-server can be specified");
247 pg_log_error_hint("Try \"%s --help\" for more information.", progname);
248 exit(1);
249 }
250
251 if (datadir_target == NULL)
252 {
253 pg_log_error("no target data directory specified (--target-pgdata)");
254 pg_log_error_hint("Try \"%s --help\" for more information.", progname);
255 exit(1);
256 }
257
258 if (writerecoveryconf && connstr_source == NULL)
259 {
260 pg_log_error("no source server information (--source-server) specified for --write-recovery-conf");
261 pg_log_error_hint("Try \"%s --help\" for more information.", progname);
262 exit(1);
263 }
264
265 if (optind < argc)
266 {
267 pg_log_error("too many command-line arguments (first is \"%s\")",
268 argv[optind]);
269 pg_log_error_hint("Try \"%s --help\" for more information.", progname);
270 exit(1);
271 }
272
273 /*
274 * Don't allow pg_rewind to be run as root, to avoid overwriting the
275 * ownership of files in the data directory. We need only check for root
276 * -- any other user won't have sufficient permissions to modify files in
277 * the data directory.
278 */
279#ifndef WIN32
280 if (geteuid() == 0)
281 {
282 pg_log_error("cannot be executed by \"root\"");
283 pg_log_error_hint("You must run %s as the PostgreSQL superuser.",
284 progname);
285 exit(1);
286 }
287#endif
288
290
291 /* Set mask based on PGDATA permissions */
293 pg_fatal("could not read permissions of directory \"%s\": %m",
295
296 umask(pg_mode_mask);
297
298 getRestoreCommand(argv[0]);
299
300 atexit(disconnect_atexit);
301
302 /*
303 * Ok, we have all the options and we're ready to start. First, connect to
304 * remote server.
305 */
306 if (connstr_source)
307 {
309
312
313 if (showprogress)
314 pg_log_info("connected to server");
315
317 }
318 else
320
321 /*
322 * Check the status of the target instance.
323 *
324 * If the target instance was not cleanly shut down, start and stop the
325 * target cluster once in single-user mode to enforce recovery to finish,
326 * ensuring that the cluster can be used by pg_rewind. Note that if
327 * no_ensure_shutdown is specified, pg_rewind ignores this step, and users
328 * need to make sure by themselves that the target cluster is in a clean
329 * state.
330 */
333 pg_free(buffer);
334
335 if (!no_ensure_shutdown &&
338 {
339 ensureCleanShutdown(argv[0]);
340
343 pg_free(buffer);
344 }
345
346 buffer = source->fetch_file(source, XLOG_CONTROL_FILE, &size);
348 pg_free(buffer);
349
350 sanityChecks();
351
352 /*
353 * Usually, the TLI can be found in the latest checkpoint record. But if
354 * the source server is just being promoted (or it's a standby that's
355 * following a primary that's just being promoted), and the checkpoint
356 * requested by the promotion hasn't completed yet, the latest timeline is
357 * in minRecoveryPoint. So we check which is later, the TLI of the
358 * minRecoveryPoint or the latest checkpoint.
359 */
362
363 /* Similarly for the target. */
366
367 /*
368 * Find the common ancestor timeline between the clusters.
369 *
370 * If both clusters are already on the same timeline, there's nothing to
371 * do.
372 */
373 if (target_tli == source_tli)
374 {
375 pg_log_info("source and target cluster are on the same timeline");
376 rewind_needed = false;
377 target_wal_endrec = 0;
378 }
379 else
380 {
381 XLogRecPtr chkptendrec;
382 TimeLineHistoryEntry *sourceHistory;
383 int sourceNentries;
384
385 /*
386 * Retrieve timelines for both source and target, and find the point
387 * where they diverged.
388 */
389 sourceHistory = getTimelineHistory(source_tli, true, &sourceNentries);
390 targetHistory = getTimelineHistory(target_tli, false, &targetNentries);
391
392 findCommonAncestorTimeline(sourceHistory, sourceNentries,
394 &divergerec, &lastcommontliIndex);
395
396 pg_log_info("servers diverged at WAL location %X/%X on timeline %u",
397 LSN_FORMAT_ARGS(divergerec),
398 targetHistory[lastcommontliIndex].tli);
399
400 /*
401 * Don't need the source history anymore. The target history is still
402 * needed by the routines in parsexlog.c, when we read the target WAL.
403 */
404 pfree(sourceHistory);
405
406
407 /*
408 * Determine the end-of-WAL on the target.
409 *
410 * The WAL ends at the last shutdown checkpoint, or at
411 * minRecoveryPoint if it was a standby. (If we supported rewinding a
412 * server that was not shut down cleanly, we would need to replay
413 * until we reach the first invalid record, like crash recovery does.)
414 */
415
416 /* read the checkpoint record on the target to see where it ends. */
417 chkptendrec = readOneRecord(datadir_target,
419 targetNentries - 1,
421
422 if (ControlFile_target.minRecoveryPoint > chkptendrec)
423 {
424 target_wal_endrec = ControlFile_target.minRecoveryPoint;
425 }
426 else
427 {
428 target_wal_endrec = chkptendrec;
429 }
430
431 /*
432 * Check for the possibility that the target is in fact a direct
433 * ancestor of the source. In that case, there is no divergent history
434 * in the target that needs rewinding.
435 */
436 if (target_wal_endrec > divergerec)
437 {
438 rewind_needed = true;
439 }
440 else
441 {
442 /* the last common checkpoint record must be part of target WAL */
443 Assert(target_wal_endrec == divergerec);
444
445 rewind_needed = false;
446 }
447 }
448
449 if (!rewind_needed)
450 {
451 pg_log_info("no rewind required");
456 exit(0);
457 }
458
459 /* Initialize hashtable that tracks WAL files protected from removal */
460 keepwal_init();
461
462 findLastCheckpoint(datadir_target, divergerec, lastcommontliIndex,
463 &chkptrec, &chkpttli, &chkptredo, restore_command);
464 pg_log_info("rewinding from last common checkpoint at %X/%X on timeline %u",
465 LSN_FORMAT_ARGS(chkptrec), chkpttli);
466
467 /* Initialize the hash table to track the status of each file */
469
470 /*
471 * Collect information about all files in the both data directories.
472 */
473 if (showprogress)
474 pg_log_info("reading source file list");
476
477 if (showprogress)
478 pg_log_info("reading target file list");
480
481 /*
482 * Read the target WAL from last checkpoint before the point of fork, to
483 * extract all the pages that were modified on the target cluster after
484 * the fork.
485 */
486 if (showprogress)
487 pg_log_info("reading WAL in target");
488 extractPageMap(datadir_target, chkptrec, lastcommontliIndex,
489 target_wal_endrec, restore_command);
490
491 /*
492 * We have collected all information we need from both systems. Decide
493 * what to do with each file.
494 */
495 filemap = decide_file_actions();
496 if (showprogress)
497 calculate_totals(filemap);
498
499 /* this is too verbose even for verbose mode */
500 if (debug)
501 print_filemap(filemap);
502
503 /*
504 * Ok, we're ready to start copying things over.
505 */
506 if (showprogress)
507 {
508 pg_log_info("need to copy %lu MB (total source directory size is %lu MB)",
509 (unsigned long) (filemap->fetch_size / (1024 * 1024)),
510 (unsigned long) (filemap->total_size / (1024 * 1024)));
511
512 fetch_size = filemap->fetch_size;
513 fetch_done = 0;
514 }
515
516 /*
517 * We have now collected all the information we need from both systems,
518 * and we are ready to start modifying the target directory.
519 *
520 * This is the point of no return. Once we start copying things, there is
521 * no turning back!
522 */
523 perform_rewind(filemap, source, chkptrec, chkpttli, chkptredo);
524
525 if (showprogress)
526 pg_log_info("syncing target data directory");
528
529 /* Also update the standby configuration, if requested. */
534
535 /* don't need the source connection anymore */
537 if (conn)
538 {
539 PQfinish(conn);
540 conn = NULL;
541 }
542
543 pg_log_info("Done!");
544
545 return 0;
546}
547
548/*
549 * Perform the rewind.
550 *
551 * We have already collected all the information we need from the
552 * target and the source.
553 */
554static void
556 XLogRecPtr chkptrec,
557 TimeLineID chkpttli,
558 XLogRecPtr chkptredo)
559{
560 XLogRecPtr endrec;
561 TimeLineID endtli;
562 ControlFileData ControlFile_new;
563 size_t size;
564 char *buffer;
565
566 /*
567 * Execute the actions in the file map, fetching data from the source
568 * system as needed.
569 */
570 for (int i = 0; i < filemap->nentries; i++)
571 {
572 file_entry_t *entry = filemap->entries[i];
573
574 /*
575 * If this is a relation file, copy the modified blocks.
576 *
577 * This is in addition to any other changes.
578 */
580 {
582 BlockNumber blkno;
583 off_t offset;
584
586 while (datapagemap_next(iter, &blkno))
587 {
588 offset = blkno * BLCKSZ;
589 source->queue_fetch_range(source, entry->path, offset, BLCKSZ);
590 }
591 pg_free(iter);
592 }
593
594 switch (entry->action)
595 {
596 case FILE_ACTION_NONE:
597 /* nothing else to do */
598 break;
599
600 case FILE_ACTION_COPY:
602 break;
603
605 truncate_target_file(entry->path, entry->source_size);
606 break;
607
610 entry->target_size,
611 entry->source_size - entry->target_size);
612 break;
613
615 remove_target(entry);
616 break;
617
619 create_target(entry);
620 break;
621
623 pg_fatal("no action decided for file \"%s\"", entry->path);
624 break;
625 }
626 }
627
628 /* Complete any remaining range-fetches that we queued up above. */
630
632
633 progress_report(true);
634
635 /*
636 * Fetch the control file from the source last. This ensures that the
637 * minRecoveryPoint is up-to-date.
638 */
639 buffer = source->fetch_file(source, XLOG_CONTROL_FILE, &size);
641 pg_free(buffer);
642
643 /*
644 * Sanity check: If the source is a local system, the control file should
645 * not have changed since we started.
646 *
647 * XXX: We assume it hasn't been modified, but actually, what could go
648 * wrong? The logic handles a libpq source that's modified concurrently,
649 * why not a local datadir?
650 */
651 if (datadir_source &&
653 sizeof(ControlFileData)) != 0)
654 {
655 pg_fatal("source system was modified while pg_rewind was running");
656 }
657
658 if (showprogress)
659 pg_log_info("creating backup label and updating control file");
660
661 /*
662 * Create a backup label file, to tell the target where to begin the WAL
663 * replay. Normally, from the last common checkpoint between the source
664 * and the target. But if the source is a standby server, it's possible
665 * that the last common checkpoint is *after* the standby's restartpoint.
666 * That implies that the source server has applied the checkpoint record,
667 * but hasn't performed a corresponding restartpoint yet. Make sure we
668 * start at the restartpoint's redo point in that case.
669 *
670 * Use the old version of the source's control file for this. The server
671 * might have finished the restartpoint after we started copying files,
672 * but we must begin from the redo point at the time that started copying.
673 */
674 if (ControlFile_source.checkPointCopy.redo < chkptredo)
675 {
679 }
680 createBackupLabel(chkptredo, chkpttli, chkptrec);
681
682 /*
683 * Update control file of target, to tell the target how far it must
684 * replay the WAL (minRecoveryPoint).
685 */
686 if (connstr_source)
687 {
688 /*
689 * The source is a live server. Like in an online backup, it's
690 * important that we recover all the WAL that was generated while we
691 * were copying files.
692 */
694 {
695 /*
696 * Source is a standby server. We must replay to its
697 * minRecoveryPoint.
698 */
701 }
702 else
703 {
704 /*
705 * Source is a production, non-standby, server. We must replay to
706 * the last WAL insert location.
707 */
709 pg_fatal("source system was in unexpected state at end of rewind");
710
714 }
715 }
716 else
717 {
718 /*
719 * Source is a local data directory. It should've shut down cleanly,
720 * and we must replay to the latest shutdown checkpoint.
721 */
724 }
725
726 memcpy(&ControlFile_new, &ControlFile_source_after, sizeof(ControlFileData));
727 ControlFile_new.minRecoveryPoint = endrec;
728 ControlFile_new.minRecoveryPointTLI = endtli;
729 ControlFile_new.state = DB_IN_ARCHIVE_RECOVERY;
730 if (!dry_run)
731 update_controlfile(datadir_target, &ControlFile_new, do_sync);
732}
733
734static void
736{
737 /* TODO Check that there's no backup_label in either cluster */
738
739 /* Check system_identifier match */
741 pg_fatal("source and target clusters are from different systems");
742
743 /* check version */
748 {
749 pg_fatal("clusters are not compatible with this version of pg_rewind");
750 }
751
752 /*
753 * Target cluster need to use checksums or hint bit wal-logging, this to
754 * prevent from data corruption that could occur because of hint bits.
755 */
758 {
759 pg_fatal("target server needs to use either data checksums or \"wal_log_hints = on\"");
760 }
761
762 /*
763 * Target cluster better not be running. This doesn't guard against
764 * someone starting the cluster concurrently. Also, this is probably more
765 * strict than necessary; it's OK if the target node was not shut down
766 * cleanly, as long as it isn't running at the moment.
767 */
770 pg_fatal("target server must be shut down cleanly");
771
772 /*
773 * When the source is a data directory, also require that the source
774 * server is shut down. There isn't any very strong reason for this
775 * limitation, but better safe than sorry.
776 */
777 if (datadir_source &&
780 pg_fatal("source data directory must be shut down cleanly");
781}
782
783/*
784 * Print a progress report based on the fetch_size and fetch_done variables.
785 *
786 * Progress report is written at maximum once per second, except that the
787 * last progress report is always printed.
788 *
789 * If finished is set to true, this is the last progress report. The cursor
790 * is moved to the next line.
791 */
792void
793progress_report(bool finished)
794{
796 int percent;
797 char fetch_done_str[32];
798 char fetch_size_str[32];
800
801 if (!showprogress)
802 return;
803
804 now = time(NULL);
805 if (now == last_progress_report && !finished)
806 return; /* Max once per second */
807
809 percent = fetch_size ? (int) ((fetch_done) * 100 / fetch_size) : 0;
810
811 /*
812 * Avoid overflowing past 100% or the full size. This may make the total
813 * size number change as we approach the end of the backup (the estimate
814 * will always be wrong if WAL is included), but that's better than having
815 * the done column be bigger than the total.
816 */
817 if (percent > 100)
818 percent = 100;
821
822 snprintf(fetch_done_str, sizeof(fetch_done_str), UINT64_FORMAT,
823 fetch_done / 1024);
824 snprintf(fetch_size_str, sizeof(fetch_size_str), UINT64_FORMAT,
825 fetch_size / 1024);
826
827 fprintf(stderr, _("%*s/%s kB (%d%%) copied"),
828 (int) strlen(fetch_size_str), fetch_done_str, fetch_size_str,
829 percent);
830
831 /*
832 * Stay on the same line if reporting to a terminal and we're not done
833 * yet.
834 */
835 fputc((!finished && isatty(fileno(stderr))) ? '\r' : '\n', stderr);
836}
837
838/*
839 * Find minimum from two WAL locations assuming InvalidXLogRecPtr means
840 * infinity as src/include/access/timeline.h states. This routine should
841 * be used only when comparing WAL locations related to history files.
842 */
843static XLogRecPtr
845{
847 return b;
848 else if (XLogRecPtrIsInvalid(b))
849 return a;
850 else
851 return Min(a, b);
852}
853
854/*
855 * Retrieve timeline history for the source or target system.
856 */
858getTimelineHistory(TimeLineID tli, bool is_source, int *nentries)
859{
860 TimeLineHistoryEntry *history;
861
862 /*
863 * Timeline 1 does not have a history file, so there is no need to check
864 * and fake an entry with infinite start and end positions.
865 */
866 if (tli == 1)
867 {
869 history->tli = tli;
870 history->begin = history->end = InvalidXLogRecPtr;
871 *nentries = 1;
872 }
873 else
874 {
875 char path[MAXPGPATH];
876 char *histfile;
877
878 TLHistoryFilePath(path, tli);
879
880 /* Get history file from appropriate source */
881 if (is_source)
882 histfile = source->fetch_file(source, path, NULL);
883 else
884 histfile = slurpFile(datadir_target, path, NULL);
885
886 history = rewind_parseTimeLineHistory(histfile, tli, nentries);
887 pg_free(histfile);
888 }
889
890 /* In debugging mode, print what we read */
891 if (debug)
892 {
893 int i;
894
895 if (is_source)
896 pg_log_debug("Source timeline history:");
897 else
898 pg_log_debug("Target timeline history:");
899
900 for (i = 0; i < *nentries; i++)
901 {
903
904 entry = &history[i];
905 pg_log_debug("%u: %X/%X - %X/%X", entry->tli,
906 LSN_FORMAT_ARGS(entry->begin),
907 LSN_FORMAT_ARGS(entry->end));
908 }
909 }
910
911 return history;
912}
913
914/*
915 * Determine the TLI of the last common timeline in the timeline history of
916 * two clusters. *tliIndex is set to the index of last common timeline in
917 * the arrays, and *recptr is set to the position where the timeline history
918 * diverged (ie. the first WAL record that's not the same in both clusters).
919 */
920static void
922 TimeLineHistoryEntry *b_history, int b_nentries,
923 XLogRecPtr *recptr, int *tliIndex)
924{
925 int i,
926 n;
927
928 /*
929 * Trace the history forward, until we hit the timeline diverge. It may
930 * still be possible that the source and target nodes used the same
931 * timeline number in their history but with different start position
932 * depending on the history files that each node has fetched in previous
933 * recovery processes. Hence check the start position of the new timeline
934 * as well and move down by one extra timeline entry if they do not match.
935 */
936 n = Min(a_nentries, b_nentries);
937 for (i = 0; i < n; i++)
938 {
939 if (a_history[i].tli != b_history[i].tli ||
940 a_history[i].begin != b_history[i].begin)
941 break;
942 }
943
944 if (i > 0)
945 {
946 i--;
947 *recptr = MinXLogRecPtr(a_history[i].end, b_history[i].end);
948 *tliIndex = i;
949 return;
950 }
951 else
952 {
953 pg_fatal("could not find common ancestor of the source and target cluster's timelines");
954 }
955}
956
957
958/*
959 * Create a backup_label file that forces recovery to begin at the last common
960 * checkpoint.
961 */
962static void
963createBackupLabel(XLogRecPtr startpoint, TimeLineID starttli, XLogRecPtr checkpointloc)
964{
965 XLogSegNo startsegno;
966 time_t stamp_time;
967 char strfbuf[128];
968 char xlogfilename[MAXFNAMELEN];
969 struct tm *tmp;
970 char buf[1000];
971 int len;
972
973 XLByteToSeg(startpoint, startsegno, WalSegSz);
974 XLogFileName(xlogfilename, starttli, startsegno, WalSegSz);
975
976 /*
977 * Construct backup label file
978 */
979 stamp_time = time(NULL);
980 tmp = localtime(&stamp_time);
981 strftime(strfbuf, sizeof(strfbuf), "%Y-%m-%d %H:%M:%S %Z", tmp);
982
983 len = snprintf(buf, sizeof(buf),
984 "START WAL LOCATION: %X/%X (file %s)\n"
985 "CHECKPOINT LOCATION: %X/%X\n"
986 "BACKUP METHOD: pg_rewind\n"
987 "BACKUP FROM: standby\n"
988 "START TIME: %s\n",
989 /* omit LABEL: line */
990 LSN_FORMAT_ARGS(startpoint), xlogfilename,
991 LSN_FORMAT_ARGS(checkpointloc),
992 strfbuf);
993 if (len >= sizeof(buf))
994 pg_fatal("backup label buffer too small"); /* shouldn't happen */
995
996 /* TODO: move old file out of the way, if any. */
997 open_target_file("backup_label", true); /* BACKUP_LABEL_FILE */
1000}
1001
1002/*
1003 * Check CRC of control file
1004 */
1005static void
1007{
1008 pg_crc32c crc;
1009
1010 /* Calculate CRC */
1013 FIN_CRC32C(crc);
1014
1015 /* And simply compare it */
1016 if (!EQ_CRC32C(crc, ControlFile->crc))
1017 pg_fatal("unexpected control file CRC");
1018}
1019
1020/*
1021 * Verify control file contents in the buffer 'content', and copy it to
1022 * *ControlFile.
1023 */
1024static void
1026 size_t size)
1027{
1028 if (size != PG_CONTROL_FILE_SIZE)
1029 pg_fatal("unexpected control file size %d, expected %d",
1030 (int) size, PG_CONTROL_FILE_SIZE);
1031
1032 memcpy(ControlFile, content, sizeof(ControlFileData));
1033
1034 /* set and validate WalSegSz */
1036
1038 {
1039 pg_log_error(ngettext("invalid WAL segment size in control file (%d byte)",
1040 "invalid WAL segment size in control file (%d bytes)",
1041 WalSegSz),
1042 WalSegSz);
1043 pg_log_error_detail("The WAL segment size must be a power of two between 1 MB and 1 GB.");
1044 exit(1);
1045 }
1046
1047 /* Additional checks on control file */
1049}
1050
1051/*
1052 * Get value of GUC parameter restore_command from the target cluster.
1053 *
1054 * This uses a logic based on "postgres -C" to get the value from the
1055 * cluster.
1056 */
1057static void
1059{
1060 int rc;
1061 char postgres_exec_path[MAXPGPATH];
1062 PQExpBuffer postgres_cmd;
1063
1064 if (!restore_wal)
1065 return;
1066
1067 /* find postgres executable */
1068 rc = find_other_exec(argv0, "postgres",
1070 postgres_exec_path);
1071
1072 if (rc < 0)
1073 {
1074 char full_path[MAXPGPATH];
1075
1076 if (find_my_exec(argv0, full_path) < 0)
1077 strlcpy(full_path, progname, sizeof(full_path));
1078
1079 if (rc == -1)
1080 pg_fatal("program \"%s\" is needed by %s but was not found in the same directory as \"%s\"",
1081 "postgres", progname, full_path);
1082 else
1083 pg_fatal("program \"%s\" was found by \"%s\" but was not the same version as %s",
1084 "postgres", full_path, progname);
1085 }
1086
1087 /*
1088 * Build a command able to retrieve the value of GUC parameter
1089 * restore_command, if set.
1090 */
1091 postgres_cmd = createPQExpBuffer();
1092
1093 /* path to postgres, properly quoted */
1094 appendShellString(postgres_cmd, postgres_exec_path);
1095
1096 /* add -D switch, with properly quoted data directory */
1097 appendPQExpBufferStr(postgres_cmd, " -D ");
1098 appendShellString(postgres_cmd, datadir_target);
1099
1100 /* add custom configuration file only if requested */
1101 if (config_file != NULL)
1102 {
1103 appendPQExpBufferStr(postgres_cmd, " -c config_file=");
1104 appendShellString(postgres_cmd, config_file);
1105 }
1106
1107 /* add -C switch, for restore_command */
1108 appendPQExpBufferStr(postgres_cmd, " -C restore_command");
1109
1110 restore_command = pipe_read_line(postgres_cmd->data);
1111 if (restore_command == NULL)
1112 pg_fatal("could not read \"restore_command\" from target cluster");
1113
1115
1116 if (strcmp(restore_command, "") == 0)
1117 pg_fatal("\"restore_command\" is not set in the target cluster");
1118
1119 pg_log_debug("using for rewind \"restore_command = \'%s\'\"",
1121
1122 destroyPQExpBuffer(postgres_cmd);
1123}
1124
1125
1126/*
1127 * Ensure clean shutdown of target instance by launching single-user mode
1128 * postgres to do crash recovery.
1129 */
1130static void
1132{
1133 int ret;
1134 char exec_path[MAXPGPATH];
1135 PQExpBuffer postgres_cmd;
1136
1137 /* locate postgres binary */
1138 if ((ret = find_other_exec(argv0, "postgres",
1140 exec_path)) < 0)
1141 {
1142 char full_path[MAXPGPATH];
1143
1144 if (find_my_exec(argv0, full_path) < 0)
1145 strlcpy(full_path, progname, sizeof(full_path));
1146
1147 if (ret == -1)
1148 pg_fatal("program \"%s\" is needed by %s but was not found in the same directory as \"%s\"",
1149 "postgres", progname, full_path);
1150 else
1151 pg_fatal("program \"%s\" was found by \"%s\" but was not the same version as %s",
1152 "postgres", full_path, progname);
1153 }
1154
1155 pg_log_info("executing \"%s\" for target server to complete crash recovery",
1156 exec_path);
1157
1158 /*
1159 * Skip processing if requested, but only after ensuring presence of
1160 * postgres.
1161 */
1162 if (dry_run)
1163 return;
1164
1165 /*
1166 * Finally run postgres in single-user mode. There is no need to use
1167 * fsync here. This makes the recovery faster, and the target data folder
1168 * is synced at the end anyway.
1169 */
1170 postgres_cmd = createPQExpBuffer();
1171
1172 /* path to postgres, properly quoted */
1173 appendShellString(postgres_cmd, exec_path);
1174
1175 /* add set of options with properly quoted data directory */
1176 appendPQExpBufferStr(postgres_cmd, " --single -F -D ");
1177 appendShellString(postgres_cmd, datadir_target);
1178
1179 /* add custom configuration file only if requested */
1180 if (config_file != NULL)
1181 {
1182 appendPQExpBufferStr(postgres_cmd, " -c config_file=");
1183 appendShellString(postgres_cmd, config_file);
1184 }
1185
1186 /* finish with the database name, and a properly quoted redirection */
1187 appendPQExpBufferStr(postgres_cmd, " template1 < ");
1188 appendShellString(postgres_cmd, DEVNULL);
1189
1190 fflush(NULL);
1191 if (system(postgres_cmd->data) != 0)
1192 {
1193 pg_log_error("postgres single-user mode in target cluster failed");
1194 pg_log_error_detail("Command was: %s", postgres_cmd->data);
1195 exit(1);
1196 }
1197
1198 destroyPQExpBuffer(postgres_cmd);
1199}
1200
1201static void
1203{
1204 if (conn != NULL)
1205 PQfinish(conn);
1206}
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1609
uint32 BlockNumber
Definition: block.h:31
#define PG_DATA_CHECKSUM_VERSION
Definition: bufpage.h:207
#define Min(x, y)
Definition: c.h:975
#define ngettext(s, p, n)
Definition: c.h:1152
#define Max(x, y)
Definition: c.h:969
#define PG_TEXTDOMAIN(domain)
Definition: c.h:1185
#define UINT64_FORMAT
Definition: c.h:521
uint64_t uint64
Definition: c.h:503
#define CATALOG_VERSION_NO
Definition: catversion.h:60
int find_my_exec(const char *argv0, char *retpath)
Definition: exec.c:160
char * pipe_read_line(char *cmd)
Definition: exec.c:352
void set_pglocale_pgservice(const char *argv0, const char *app)
Definition: exec.c:429
int find_other_exec(const char *argv0, const char *target, const char *versionstr, char *retpath)
Definition: exec.c:310
void update_controlfile(const char *DataDir, ControlFileData *ControlFile, bool do_sync)
#define fprintf(file, fmt, msg)
Definition: cubescan.l:21
bool datapagemap_next(datapagemap_iterator_t *iter, BlockNumber *blkno)
Definition: datapagemap.c:87
datapagemap_iterator_t * datapagemap_iterate(datapagemap_t *map)
Definition: datapagemap.c:75
#define _(x)
Definition: elog.c:91
PGconn * PQconnectdb(const char *conninfo)
Definition: fe-connect.c:813
ConnStatusType PQstatus(const PGconn *conn)
Definition: fe-connect.c:7556
void PQfinish(PGconn *conn)
Definition: fe-connect.c:5290
char * PQerrorMessage(const PGconn *conn)
Definition: fe-connect.c:7619
void * pg_malloc(size_t size)
Definition: fe_memutils.c:47
char * pg_strdup(const char *in)
Definition: fe_memutils.c:85
void pg_free(void *ptr)
Definition: fe_memutils.c:105
void traverse_datadir(const char *datadir, process_file_callback_t callback)
Definition: file_ops.c:362
char * slurpFile(const char *datadir, const char *path, size_t *filesize)
Definition: file_ops.c:314
void close_target_file(void)
Definition: file_ops.c:75
void truncate_target_file(const char *path, off_t newsize)
Definition: file_ops.c:206
void remove_target(file_entry_t *entry)
Definition: file_ops.c:130
void sync_target_dir(void)
Definition: file_ops.c:294
void create_target(file_entry_t *entry)
Definition: file_ops.c:156
void open_target_file(const char *path, bool trunc)
Definition: file_ops.c:47
void write_target_range(char *buf, off_t begin, size_t size)
Definition: file_ops.c:88
int pg_mode_mask
Definition: file_perm.c:25
bool GetDataDirectoryCreatePerm(const char *dataDir)
DataDirSyncMethod
Definition: file_utils.h:28
@ DATA_DIR_SYNC_METHOD_FSYNC
Definition: file_utils.h:29
void filehash_init(void)
Definition: filemap.c:197
void process_source_file(const char *path, file_type_t type, size_t size, const char *link_target)
Definition: filemap.c:280
void print_filemap(filemap_t *filemap)
Definition: filemap.c:541
void keepwal_init(void)
Definition: filemap.c:243
void process_target_file(const char *path, file_type_t type, size_t size, const char *link_target)
Definition: filemap.c:316
filemap_t * decide_file_actions(void)
Definition: filemap.c:861
void calculate_totals(filemap_t *filemap)
Definition: filemap.c:500
@ FILE_ACTION_REMOVE
Definition: filemap.h:27
@ FILE_ACTION_COPY
Definition: filemap.h:21
@ FILE_ACTION_NONE
Definition: filemap.h:24
@ FILE_ACTION_COPY_TAIL
Definition: filemap.h:22
@ FILE_ACTION_UNDECIDED
Definition: filemap.h:18
@ FILE_ACTION_TRUNCATE
Definition: filemap.h:26
@ FILE_ACTION_CREATE
Definition: filemap.h:20
int getopt_long(int argc, char *const argv[], const char *optstring, const struct option *longopts, int *longindex)
Definition: getopt_long.c:60
#define no_argument
Definition: getopt_long.h:25
#define required_argument
Definition: getopt_long.h:26
Assert(PointerIsAligned(start, uint64))
int b
Definition: isn.c:74
int a
Definition: isn.c:73
int i
Definition: isn.c:77
@ CONNECTION_BAD
Definition: libpq-fe.h:85
rewind_source * init_libpq_source(PGconn *conn)
Definition: libpq_source.c:81
rewind_source * init_local_source(const char *datadir)
Definition: local_source.c:38
static struct pg_tm tm
Definition: localtime.c:104
void pg_logging_increase_verbosity(void)
Definition: logging.c:185
void pg_logging_init(const char *argv0)
Definition: logging.c:83
#define pg_log_error(...)
Definition: logging.h:106
#define pg_log_error_hint(...)
Definition: logging.h:112
#define pg_log_info(...)
Definition: logging.h:124
#define pg_log_error_detail(...)
Definition: logging.h:109
#define pg_log_debug(...)
Definition: logging.h:133
void pfree(void *pointer)
Definition: mcxt.c:2150
bool parse_sync_method(const char *optarg, DataDirSyncMethod *sync_method)
Definition: option_utils.c:90
void extractPageMap(const char *datadir, XLogRecPtr startpoint, int tliIndex, XLogRecPtr endpoint, const char *restoreCommand)
Definition: parsexlog.c:66
void findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, int tliIndex, XLogRecPtr *lastchkptrec, TimeLineID *lastchkpttli, XLogRecPtr *lastchkptredo, const char *restoreCommand)
Definition: parsexlog.c:168
XLogRecPtr readOneRecord(const char *datadir, XLogRecPtr ptr, int tliIndex, const char *restoreCommand)
Definition: parsexlog.c:124
static pg_time_t last_progress_report
Definition: pg_amcheck.c:147
#define pg_fatal(...)
static bool writerecoveryconf
#define MAXPGPATH
#define PG_CONTROL_VERSION
Definition: pg_control.h:25
@ DB_IN_PRODUCTION
Definition: pg_control.h:97
@ DB_IN_ARCHIVE_RECOVERY
Definition: pg_control.h:96
@ DB_SHUTDOWNED_IN_RECOVERY
Definition: pg_control.h:93
@ DB_SHUTDOWNED
Definition: pg_control.h:92
#define PG_CONTROL_FILE_SIZE
Definition: pg_control.h:256
uint32 pg_crc32c
Definition: pg_crc32c.h:38
#define COMP_CRC32C(crc, data, len)
Definition: pg_crc32c.h:153
#define EQ_CRC32C(c1, c2)
Definition: pg_crc32c.h:42
#define INIT_CRC32C(crc)
Definition: pg_crc32c.h:41
#define FIN_CRC32C(crc)
Definition: pg_crc32c.h:158
const void size_t len
return crc
static char * argv0
Definition: pg_ctl.c:93
static char * exec_path
Definition: pg_ctl.c:88
PGDLLIMPORT int optind
Definition: getopt.c:51
PGDLLIMPORT char * optarg
Definition: getopt.c:53
static ControlFileData ControlFile_source
Definition: pg_rewind.c:60
static void createBackupLabel(XLogRecPtr startpoint, TimeLineID starttli, XLogRecPtr checkpointloc)
Definition: pg_rewind.c:963
static void usage(const char *progname)
Definition: pg_rewind.c:92
static void sanityChecks(void)
Definition: pg_rewind.c:735
static char * datadir_source
Definition: pg_rewind.c:68
static void findCommonAncestorTimeline(TimeLineHistoryEntry *a_history, int a_nentries, TimeLineHistoryEntry *b_history, int b_nentries, XLogRecPtr *recptr, int *tliIndex)
Definition: pg_rewind.c:921
static ControlFileData ControlFile_source_after
Definition: pg_rewind.c:61
int WalSegSz
Definition: pg_rewind.c:64
static char * restore_command
Definition: pg_rewind.c:70
static bool debug
Definition: pg_rewind.c:73
int main(int argc, char **argv)
Definition: pg_rewind.c:121
static XLogRecPtr MinXLogRecPtr(XLogRecPtr a, XLogRecPtr b)
Definition: pg_rewind.c:844
static void ensureCleanShutdown(const char *argv0)
Definition: pg_rewind.c:1131
TimeLineHistoryEntry * targetHistory
Definition: pg_rewind.c:81
static rewind_source * source
Definition: pg_rewind.c:89
static ControlFileData ControlFile_target
Definition: pg_rewind.c:59
void progress_report(bool finished)
Definition: pg_rewind.c:793
static TimeLineHistoryEntry * getTimelineHistory(TimeLineID tli, bool is_source, int *nentries)
Definition: pg_rewind.c:858
static void digestControlFile(ControlFileData *ControlFile, const char *content, size_t size)
Definition: pg_rewind.c:1025
static char * connstr_source
Definition: pg_rewind.c:69
static void checkControlFile(ControlFileData *ControlFile)
Definition: pg_rewind.c:1006
static void getRestoreCommand(const char *argv0)
Definition: pg_rewind.c:1058
char * datadir_target
Definition: pg_rewind.c:67
bool do_sync
Definition: pg_rewind.c:76
static bool restore_wal
Definition: pg_rewind.c:77
uint64 fetch_done
Definition: pg_rewind.c:86
int targetNentries
Definition: pg_rewind.c:82
uint64 fetch_size
Definition: pg_rewind.c:85
static char * config_file
Definition: pg_rewind.c:71
bool dry_run
Definition: pg_rewind.c:75
DataDirSyncMethod sync_method
Definition: pg_rewind.c:78
bool showprogress
Definition: pg_rewind.c:74
static const char * progname
Definition: pg_rewind.c:63
static void perform_rewind(filemap_t *filemap, rewind_source *source, XLogRecPtr chkptrec, TimeLineID chkpttli, XLogRecPtr chkptredo)
Definition: pg_rewind.c:555
static void disconnect_atexit(void)
Definition: pg_rewind.c:1202
static PGconn * conn
Definition: pg_rewind.c:88
TimeLineHistoryEntry * rewind_parseTimeLineHistory(char *buffer, TimeLineID targetTLI, int *nentries)
Definition: timeline.c:28
static char * buf
Definition: pg_test_fsync.c:72
int64 pg_time_t
Definition: pgtime.h:23
#define snprintf
Definition: port.h:239
#define DEVNULL
Definition: port.h:161
#define PG_BACKEND_VERSIONSTR
Definition: port.h:144
const char * get_progname(const char *argv0)
Definition: path.c:652
#define printf(...)
Definition: port.h:245
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
PQExpBuffer createPQExpBuffer(void)
Definition: pqexpbuffer.c:72
void destroyPQExpBuffer(PQExpBuffer str)
Definition: pqexpbuffer.c:114
void appendPQExpBufferStr(PQExpBuffer str, const char *data)
Definition: pqexpbuffer.c:367
char * c
void WriteRecoveryConfig(PGconn *pgconn, const char *target_dir, PQExpBuffer contents)
Definition: recovery_gen.c:125
PQExpBuffer GenerateRecoveryConfig(PGconn *pgconn, const char *replication_slot, char *dbname)
Definition: recovery_gen.c:28
char * GetDbnameFromConnectionOptions(const char *connstr)
Definition: recovery_gen.c:204
void get_restricted_token(void)
int pg_strip_crlf(char *str)
Definition: string.c:154
void appendShellString(PQExpBuffer buf, const char *str)
Definition: string_utils.c:582
TimeLineID ThisTimeLineID
Definition: pg_control.h:39
XLogRecPtr redo
Definition: pg_control.h:37
uint32 pg_control_version
Definition: pg_control.h:125
uint32 xlog_seg_size
Definition: pg_control.h:211
CheckPoint checkPointCopy
Definition: pg_control.h:135
XLogRecPtr minRecoveryPoint
Definition: pg_control.h:168
uint32 data_checksum_version
Definition: pg_control.h:222
XLogRecPtr checkPoint
Definition: pg_control.h:133
uint64 system_identifier
Definition: pg_control.h:110
uint32 catalog_version_no
Definition: pg_control.h:126
TimeLineID minRecoveryPointTLI
Definition: pg_control.h:169
pg_crc32c crc
Definition: pg_control.h:238
XLogRecPtr begin
Definition: timeline.h:28
TimeLineID tli
Definition: timeline.h:27
XLogRecPtr end
Definition: timeline.h:29
int bitmapsize
Definition: datapagemap.h:17
Definition: filemap.h:50
datapagemap_t target_pages_to_overwrite
Definition: filemap.h:68
const char * path
Definition: filemap.h:53
size_t source_size
Definition: filemap.h:75
size_t target_size
Definition: filemap.h:61
file_action_t action
Definition: filemap.h:81
file_entry_t * entries[FLEXIBLE_ARRAY_MEMBER]
Definition: filemap.h:96
int nentries
Definition: filemap.h:95
uint64 total_size
Definition: filemap.h:92
uint64 fetch_size
Definition: filemap.h:93
void(* queue_fetch_file)(struct rewind_source *, const char *path, size_t len)
Definition: rewind_source.h:60
void(* traverse_files)(struct rewind_source *, process_file_callback_t callback)
Definition: rewind_source.h:29
void(* finish_fetch)(struct rewind_source *)
Definition: rewind_source.h:66
XLogRecPtr(* get_current_wal_insert_lsn)(struct rewind_source *)
Definition: rewind_source.h:71
void(* queue_fetch_range)(struct rewind_source *, const char *path, off_t offset, size_t len)
Definition: rewind_source.h:47
char *(* fetch_file)(struct rewind_source *, const char *path, size_t *filesize)
Definition: rewind_source.h:37
void(* destroy)(struct rewind_source *)
Definition: rewind_source.h:76
static ControlFileData * ControlFile
Definition: xlog.c:585
#define IsValidWalSegSize(size)
Definition: xlog_internal.h:96
#define XLOG_CONTROL_FILE
#define MAXFNAMELEN
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
static void XLogFileName(char *fname, TimeLineID tli, XLogSegNo logSegNo, int wal_segsz_bytes)
static void TLHistoryFilePath(char *path, TimeLineID tli)
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:43
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
uint32 TimeLineID
Definition: xlogdefs.h:59
uint64 XLogSegNo
Definition: xlogdefs.h:48