Refactor pg_rewind for more clear decision making.
authorHeikki Linnakangas <heikki.linnakangas@iki.fi>
Wed, 4 Nov 2020 09:21:09 +0000 (11:21 +0200)
committerHeikki Linnakangas <heikki.linnakangas@iki.fi>
Wed, 4 Nov 2020 09:21:09 +0000 (11:21 +0200)
Deciding what to do with each file is now a separate step after all the
necessary information has been gathered. It is more clear that way.
Previously, the decision-making was divided between process_source_file()
and process_target_file(), and it was a bit hard to piece together what
the overall rules were.

Reviewed-by: Kyotaro Horiguchi, Soumyadeep Chakraborty
Discussion: https://www.postgresql.org/message-id/0c5b3783-af52-3ee5-f8fa-6e794061f70d%40iki.fi

src/bin/pg_rewind/copy_fetch.c
src/bin/pg_rewind/file_ops.c
src/bin/pg_rewind/filemap.c
src/bin/pg_rewind/filemap.h
src/bin/pg_rewind/libpq_fetch.c
src/bin/pg_rewind/parsexlog.c
src/bin/pg_rewind/pg_rewind.c

index 1edab5f18670a39fc2f69e441d91ffa1adbea1a9..e4b8ce6aaf4156d276c2e104e9d923b50e949efe 100644 (file)
@@ -210,7 +210,7 @@ copy_executeFileMap(filemap_t *map)
        for (i = 0; i < map->narray; i++)
        {
                entry = map->array[i];
-               execute_pagemap(&entry->pagemap, entry->path);
+               execute_pagemap(&entry->target_pages_to_overwrite, entry->path);
 
                switch (entry->action)
                {
@@ -219,16 +219,16 @@ copy_executeFileMap(filemap_t *map)
                                break;
 
                        case FILE_ACTION_COPY:
-                               rewind_copy_file_range(entry->path, 0, entry->newsize, true);
+                               rewind_copy_file_range(entry->path, 0, entry->source_size, true);
                                break;
 
                        case FILE_ACTION_TRUNCATE:
-                               truncate_target_file(entry->path, entry->newsize);
+                               truncate_target_file(entry->path, entry->source_size);
                                break;
 
                        case FILE_ACTION_COPY_TAIL:
-                               rewind_copy_file_range(entry->path, entry->oldsize,
-                                                                          entry->newsize, false);
+                               rewind_copy_file_range(entry->path, entry->target_size,
+                                                                          entry->source_size, false);
                                break;
 
                        case FILE_ACTION_CREATE:
@@ -238,6 +238,10 @@ copy_executeFileMap(filemap_t *map)
                        case FILE_ACTION_REMOVE:
                                remove_target(entry);
                                break;
+
+                       case FILE_ACTION_UNDECIDED:
+                               pg_fatal("no action decided for \"%s\"", entry->path);
+                               break;
                }
        }
 
index 55439db20bac10266bd02be2d8206f2a39c1cb55..ec37d0b2e0df4d413888b0cf31c3081ccce2e88d 100644 (file)
@@ -126,8 +126,9 @@ void
 remove_target(file_entry_t *entry)
 {
        Assert(entry->action == FILE_ACTION_REMOVE);
+       Assert(entry->target_exists);
 
-       switch (entry->type)
+       switch (entry->target_type)
        {
                case FILE_TYPE_DIRECTORY:
                        remove_target_dir(entry->path);
@@ -140,6 +141,10 @@ remove_target(file_entry_t *entry)
                case FILE_TYPE_SYMLINK:
                        remove_target_symlink(entry->path);
                        break;
+
+               case FILE_TYPE_UNDEFINED:
+                       pg_fatal("undefined file type for \"%s\"", entry->path);
+                       break;
        }
 }
 
@@ -147,21 +152,26 @@ void
 create_target(file_entry_t *entry)
 {
        Assert(entry->action == FILE_ACTION_CREATE);
+       Assert(!entry->target_exists);
 
-       switch (entry->type)
+       switch (entry->source_type)
        {
                case FILE_TYPE_DIRECTORY:
                        create_target_dir(entry->path);
                        break;
 
                case FILE_TYPE_SYMLINK:
-                       create_target_symlink(entry->path, entry->link_target);
+                       create_target_symlink(entry->path, entry->source_link_target);
                        break;
 
                case FILE_TYPE_REGULAR:
                        /* can't happen. Regular files are created with open_target_file. */
                        pg_fatal("invalid action (CREATE) for regular file");
                        break;
+
+               case FILE_TYPE_UNDEFINED:
+                       pg_fatal("undefined file type for \"%s\"", entry->path);
+                       break;
        }
 }
 
index 1abc257177efdb42afbfe8ba07a7b70b77de9353..d756c28ca8af8569919a94d89ceeadef9055de57 100644 (file)
@@ -26,6 +26,8 @@ static bool isRelDataFile(const char *path);
 static char *datasegpath(RelFileNode rnode, ForkNumber forknum,
                                                 BlockNumber segno);
 static int     path_cmp(const void *a, const void *b);
+
+static file_entry_t *get_filemap_entry(const char *path, bool create);
 static int     final_filemap_cmp(const void *a, const void *b);
 static void filemap_list_to_array(filemap_t *map);
 static bool check_file_excluded(const char *path, bool is_source);
@@ -146,33 +148,79 @@ filemap_create(void)
        filemap = map;
 }
 
+/* Look up or create entry for 'path' */
+static file_entry_t *
+get_filemap_entry(const char *path, bool create)
+{
+       filemap_t  *map = filemap;
+       file_entry_t *entry;
+       file_entry_t **e;
+       file_entry_t key;
+       file_entry_t *key_ptr;
+
+       if (map->array)
+       {
+               key.path = (char *) path;
+               key_ptr = &key;
+               e = bsearch(&key_ptr, map->array, map->narray, sizeof(file_entry_t *),
+                                       path_cmp);
+       }
+       else
+               e = NULL;
+
+       if (e)
+               entry = *e;
+       else if (!create)
+               entry = NULL;
+       else
+       {
+               /* Create a new entry for this file */
+               entry = pg_malloc(sizeof(file_entry_t));
+               entry->path = pg_strdup(path);
+               entry->isrelfile = isRelDataFile(path);
+               entry->action = FILE_ACTION_UNDECIDED;
+
+               entry->target_exists = false;
+               entry->target_type = FILE_TYPE_UNDEFINED;
+               entry->target_size = 0;
+               entry->target_link_target = NULL;
+               entry->target_pages_to_overwrite.bitmap = NULL;
+               entry->target_pages_to_overwrite.bitmapsize = 0;
+
+               entry->source_exists = false;
+               entry->source_type = FILE_TYPE_UNDEFINED;
+               entry->source_size = 0;
+               entry->source_link_target = NULL;
+
+               entry->next = NULL;
+
+               if (map->last)
+               {
+                       map->last->next = entry;
+                       map->last = entry;
+               }
+               else
+                       map->first = map->last = entry;
+               map->nlist++;
+       }
+
+       return entry;
+}
+
 /*
  * Callback for processing source file list.
  *
- * This is called once for every file in the source server. We decide what
- * action needs to be taken for the file, depending on whether the file
- * exists in the target and whether the size matches.
+ * This is called once for every file in the source server.  We record the
+ * type and size of the file, so that decide_file_action() can later decide what
+ * to do with it.
  */
 void
-process_source_file(const char *path, file_type_t type, size_t newsize,
+process_source_file(const char *path, file_type_t type, size_t size,
                                        const char *link_target)
 {
-       bool            exists;
-       char            localpath[MAXPGPATH];
-       struct stat statbuf;
-       filemap_t  *map = filemap;
-       file_action_t action = FILE_ACTION_NONE;
-       size_t          oldsize = 0;
        file_entry_t *entry;
 
-       Assert(map->array == NULL);
-
-       /*
-        * Skip any files matching the exclusion filters. This has the effect to
-        * remove all those files on the target.
-        */
-       if (check_file_excluded(path, true))
-               return;
+       Assert(filemap->array == NULL);
 
        /*
         * Pretend that pg_wal is a directory, even if it's really a symlink. We
@@ -182,16 +230,6 @@ process_source_file(const char *path, file_type_t type, size_t newsize,
        if (strcmp(path, "pg_wal") == 0 && type == FILE_TYPE_SYMLINK)
                type = FILE_TYPE_DIRECTORY;
 
-       /*
-        * Skip temporary files, .../pgsql_tmp/... and .../pgsql_tmp.* in source.
-        * This has the effect that all temporary files in the destination will be
-        * removed.
-        */
-       if (strstr(path, "/" PG_TEMP_FILE_PREFIX) != NULL)
-               return;
-       if (strstr(path, "/" PG_TEMP_FILES_DIR "/") != NULL)
-               return;
-
        /*
         * sanity check: a filename that looks like a data file better be a
         * regular file
@@ -199,158 +237,25 @@ process_source_file(const char *path, file_type_t type, size_t newsize,
        if (type != FILE_TYPE_REGULAR && isRelDataFile(path))
                pg_fatal("data file \"%s\" in source is not a regular file", path);
 
-       snprintf(localpath, sizeof(localpath), "%s/%s", datadir_target, path);
-
-       /* Does the corresponding file exist in the target data dir? */
-       if (lstat(localpath, &statbuf) < 0)
-       {
-               if (errno != ENOENT)
-                       pg_fatal("could not stat file \"%s\": %m",
-                                        localpath);
-
-               exists = false;
-       }
-       else
-               exists = true;
-
-       switch (type)
-       {
-               case FILE_TYPE_DIRECTORY:
-                       if (exists && !S_ISDIR(statbuf.st_mode) && strcmp(path, "pg_wal") != 0)
-                       {
-                               /* it's a directory in source, but not in target. Strange.. */
-                               pg_fatal("\"%s\" is not a directory", localpath);
-                       }
-
-                       if (!exists)
-                               action = FILE_ACTION_CREATE;
-                       else
-                               action = FILE_ACTION_NONE;
-                       oldsize = 0;
-                       break;
-
-               case FILE_TYPE_SYMLINK:
-                       if (exists &&
-#ifndef WIN32
-                               !S_ISLNK(statbuf.st_mode)
-#else
-                               !pgwin32_is_junction(localpath)
-#endif
-                               )
-                       {
-                               /*
-                                * It's a symbolic link in source, but not in target.
-                                * Strange..
-                                */
-                               pg_fatal("\"%s\" is not a symbolic link", localpath);
-                       }
-
-                       if (!exists)
-                               action = FILE_ACTION_CREATE;
-                       else
-                               action = FILE_ACTION_NONE;
-                       oldsize = 0;
-                       break;
-
-               case FILE_TYPE_REGULAR:
-                       if (exists && !S_ISREG(statbuf.st_mode))
-                               pg_fatal("\"%s\" is not a regular file", localpath);
-
-                       if (!exists || !isRelDataFile(path))
-                       {
-                               /*
-                                * File exists in source, but not in target. Or it's a
-                                * non-data file that we have no special processing for. Copy
-                                * it in toto.
-                                *
-                                * An exception: PG_VERSIONs should be identical, but avoid
-                                * overwriting it for paranoia.
-                                */
-                               if (pg_str_endswith(path, "PG_VERSION"))
-                               {
-                                       action = FILE_ACTION_NONE;
-                                       oldsize = statbuf.st_size;
-                               }
-                               else
-                               {
-                                       action = FILE_ACTION_COPY;
-                                       oldsize = 0;
-                               }
-                       }
-                       else
-                       {
-                               /*
-                                * It's a data file that exists in both.
-                                *
-                                * If it's larger in target, we can truncate it. There will
-                                * also be a WAL record of the truncation in the source
-                                * system, so WAL replay would eventually truncate the target
-                                * too, but we might as well do it now.
-                                *
-                                * If it's smaller in the target, it means that it has been
-                                * truncated in the target, or enlarged in the source, or
-                                * both. If it was truncated in the target, we need to copy
-                                * the missing tail from the source system. If it was enlarged
-                                * in the source system, there will be WAL records in the
-                                * source system for the new blocks, so we wouldn't need to
-                                * copy them here. But we don't know which scenario we're
-                                * dealing with, and there's no harm in copying the missing
-                                * blocks now, so do it now.
-                                *
-                                * If it's the same size, do nothing here. Any blocks modified
-                                * in the target will be copied based on parsing the target
-                                * system's WAL, and any blocks modified in the source will be
-                                * updated after rewinding, when the source system's WAL is
-                                * replayed.
-                                */
-                               oldsize = statbuf.st_size;
-                               if (oldsize < newsize)
-                                       action = FILE_ACTION_COPY_TAIL;
-                               else if (oldsize > newsize)
-                                       action = FILE_ACTION_TRUNCATE;
-                               else
-                                       action = FILE_ACTION_NONE;
-                       }
-                       break;
-       }
-
-       /* Create a new entry for this file */
-       entry = pg_malloc(sizeof(file_entry_t));
-       entry->path = pg_strdup(path);
-       entry->type = type;
-       entry->action = action;
-       entry->oldsize = oldsize;
-       entry->newsize = newsize;
-       entry->link_target = link_target ? pg_strdup(link_target) : NULL;
-       entry->next = NULL;
-       entry->pagemap.bitmap = NULL;
-       entry->pagemap.bitmapsize = 0;
-       entry->isrelfile = isRelDataFile(path);
-
-       if (map->last)
-       {
-               map->last->next = entry;
-               map->last = entry;
-       }
-       else
-               map->first = map->last = entry;
-       map->nlist++;
+       /* Remember this source file */
+       entry = get_filemap_entry(path, true);
+       entry->source_exists = true;
+       entry->source_type = type;
+       entry->source_size = size;
+       entry->source_link_target = link_target ? pg_strdup(link_target) : NULL;
 }
 
 /*
  * Callback for processing target file list.
  *
- * All source files must be already processed before calling this. This only
- * marks target data directory's files that didn't exist in the source for
- * deletion.
+ * All source files must be already processed before calling this.  We record
+ * the type and size of file, so that decide_file_action() can later decide
+ * what to do with it.
  */
 void
-process_target_file(const char *path, file_type_t type, size_t oldsize,
+process_target_file(const char *path, file_type_t type, size_t size,
                                        const char *link_target)
 {
-       bool            exists;
-       file_entry_t key;
-       file_entry_t *key_ptr;
        filemap_t  *map = filemap;
        file_entry_t *entry;
 
@@ -359,7 +264,6 @@ process_target_file(const char *path, file_type_t type, size_t oldsize,
         * from the target data folder all paths which have been filtered out from
         * the source data folder when processing the source files.
         */
-
        if (map->array == NULL)
        {
                /* on first call, initialize lookup array */
@@ -377,120 +281,77 @@ process_target_file(const char *path, file_type_t type, size_t oldsize,
        }
 
        /*
-        * Like in process_source_file, pretend that xlog is always a  directory.
+        * Like in process_source_file, pretend that pg_wal is always a directory.
         */
        if (strcmp(path, "pg_wal") == 0 && type == FILE_TYPE_SYMLINK)
                type = FILE_TYPE_DIRECTORY;
 
-       key.path = (char *) path;
-       key_ptr = &key;
-       exists = (bsearch(&key_ptr, map->array, map->narray, sizeof(file_entry_t *),
-                                         path_cmp) != NULL);
-
-       /* Remove any file or folder that doesn't exist in the source system. */
-       if (!exists)
-       {
-               entry = pg_malloc(sizeof(file_entry_t));
-               entry->path = pg_strdup(path);
-               entry->type = type;
-               entry->action = FILE_ACTION_REMOVE;
-               entry->oldsize = oldsize;
-               entry->newsize = 0;
-               entry->link_target = link_target ? pg_strdup(link_target) : NULL;
-               entry->next = NULL;
-               entry->pagemap.bitmap = NULL;
-               entry->pagemap.bitmapsize = 0;
-               entry->isrelfile = isRelDataFile(path);
-
-               if (map->last == NULL)
-                       map->first = entry;
-               else
-                       map->last->next = entry;
-               map->last = entry;
-               map->nlist++;
-       }
-       else
-       {
-               /*
-                * We already handled all files that exist in the source system in
-                * process_source_file().
-                */
-       }
+       /* Remember this target file */
+       entry = get_filemap_entry(path, true);
+       entry->target_exists = true;
+       entry->target_type = type;
+       entry->target_size = size;
+       entry->target_link_target = link_target ? pg_strdup(link_target) : NULL;
 }
 
 /*
  * This callback gets called while we read the WAL in the target, for every
- * block that have changed in the target system. It makes note of all the
- * changed blocks in the pagemap of the file.
+ * block that has changed in the target system.  It decides if the given
+ * 'blkno' in the target relfile needs to be overwritten from the source, and
+ * if so, records it in 'target_pages_to_overwrite' bitmap.
+ *
+ * NOTE: All the files on both systems must have already been added to the
+ * file map!
  */
 void
-process_block_change(ForkNumber forknum, RelFileNode rnode, BlockNumber blkno)
+process_target_wal_block_change(ForkNumber forknum, RelFileNode rnode,
+                                                               BlockNumber blkno)
 {
        char       *path;
-       file_entry_t key;
-       file_entry_t *key_ptr;
        file_entry_t *entry;
        BlockNumber blkno_inseg;
        int                     segno;
-       filemap_t  *map = filemap;
-       file_entry_t **e;
 
-       Assert(map->array);
+       Assert(filemap->array);
 
        segno = blkno / RELSEG_SIZE;
        blkno_inseg = blkno % RELSEG_SIZE;
 
        path = datasegpath(rnode, forknum, segno);
-
-       key.path = (char *) path;
-       key_ptr = &key;
-
-       e = bsearch(&key_ptr, map->array, map->narray, sizeof(file_entry_t *),
-                               path_cmp);
-       if (e)
-               entry = *e;
-       else
-               entry = NULL;
+       entry = get_filemap_entry(path, false);
        pfree(path);
 
        if (entry)
        {
-               Assert(entry->isrelfile);
+               int64           end_offset;
 
-               switch (entry->action)
-               {
-                       case FILE_ACTION_NONE:
-                       case FILE_ACTION_TRUNCATE:
-                               /* skip if we're truncating away the modified block anyway */
-                               if ((blkno_inseg + 1) * BLCKSZ <= entry->newsize)
-                                       datapagemap_add(&entry->pagemap, blkno_inseg);
-                               break;
-
-                       case FILE_ACTION_COPY_TAIL:
-
-                               /*
-                                * skip the modified block if it is part of the "tail" that
-                                * we're copying anyway.
-                                */
-                               if ((blkno_inseg + 1) * BLCKSZ <= entry->oldsize)
-                                       datapagemap_add(&entry->pagemap, blkno_inseg);
-                               break;
+               Assert(entry->isrelfile);
 
-                       case FILE_ACTION_COPY:
-                       case FILE_ACTION_REMOVE:
-                               break;
+               if (entry->target_type != FILE_TYPE_REGULAR)
+                       pg_fatal("unexpected page modification for non-regular file \"%s\"",
+                                        entry->path);
 
-                       case FILE_ACTION_CREATE:
-                               pg_fatal("unexpected page modification for directory or symbolic link \"%s\"", entry->path);
-               }
+               /*
+                * If the block beyond the EOF in the source system, no need to
+                * remember it now, because we're going to truncate it away from the
+                * target anyway. Also no need to remember the block if it's beyond
+                * the current EOF in the target system; we will copy it over with the
+                * "tail" from the source system, anyway.
+                */
+               end_offset = (blkno_inseg + 1) * BLCKSZ;
+               if (end_offset <= entry->source_size &&
+                       end_offset <= entry->target_size)
+                       datapagemap_add(&entry->target_pages_to_overwrite, blkno_inseg);
        }
        else
        {
                /*
                 * If we don't have any record of this file in the file map, it means
-                * that it's a relation that doesn't exist in the source system, and
-                * it was subsequently removed in the target system, too. We can
-                * safely ignore it.
+                * that it's a relation that doesn't exist in the source system.  It
+                * could exist in the target system; we haven't moved the target-only
+                * entries from the linked list to the array yet!  But in any case, if
+                * it doesn't exist in the source it will be removed from the target
+                * too, and we can safely ignore it.
                 */
        }
 }
@@ -505,6 +366,15 @@ check_file_excluded(const char *path, bool is_source)
        int                     excludeIdx;
        const char *filename;
 
+       /*
+        * Skip all temporary files, .../pgsql_tmp/... and .../pgsql_tmp.*
+        */
+       if (strstr(path, "/" PG_TEMP_FILE_PREFIX) != NULL ||
+               strstr(path, "/" PG_TEMP_FILES_DIR "/") != NULL)
+       {
+               return true;
+       }
+
        /* check individual files... */
        for (excludeIdx = 0; excludeFiles[excludeIdx].name != NULL; excludeIdx++)
        {
@@ -581,16 +451,6 @@ filemap_list_to_array(filemap_t *map)
        map->first = map->last = NULL;
 }
 
-void
-filemap_finalize(void)
-{
-       filemap_t  *map = filemap;
-
-       filemap_list_to_array(map);
-       qsort(map->array, map->narray, sizeof(file_entry_t *),
-                 final_filemap_cmp);
-}
-
 static const char *
 action_to_str(file_action_t action)
 {
@@ -631,26 +491,26 @@ calculate_totals(void)
        {
                entry = map->array[i];
 
-               if (entry->type != FILE_TYPE_REGULAR)
+               if (entry->source_type != FILE_TYPE_REGULAR)
                        continue;
 
-               map->total_size += entry->newsize;
+               map->total_size += entry->source_size;
 
                if (entry->action == FILE_ACTION_COPY)
                {
-                       map->fetch_size += entry->newsize;
+                       map->fetch_size += entry->source_size;
                        continue;
                }
 
                if (entry->action == FILE_ACTION_COPY_TAIL)
-                       map->fetch_size += (entry->newsize - entry->oldsize);
+                       map->fetch_size += (entry->source_size - entry->target_size);
 
-               if (entry->pagemap.bitmapsize > 0)
+               if (entry->target_pages_to_overwrite.bitmapsize > 0)
                {
                        datapagemap_iterator_t *iter;
                        BlockNumber blk;
 
-                       iter = datapagemap_iterate(&entry->pagemap);
+                       iter = datapagemap_iterate(&entry->target_pages_to_overwrite);
                        while (datapagemap_next(iter, &blk))
                                map->fetch_size += BLCKSZ;
 
@@ -670,13 +530,13 @@ print_filemap(void)
        {
                entry = map->array[i];
                if (entry->action != FILE_ACTION_NONE ||
-                       entry->pagemap.bitmapsize > 0)
+                       entry->target_pages_to_overwrite.bitmapsize > 0)
                {
                        pg_log_debug("%s (%s)", entry->path,
                                                 action_to_str(entry->action));
 
-                       if (entry->pagemap.bitmapsize > 0)
-                               datapagemap_print(&entry->pagemap);
+                       if (entry->target_pages_to_overwrite.bitmapsize > 0)
+                               datapagemap_print(&entry->target_pages_to_overwrite);
                }
        }
        fflush(stdout);
@@ -825,3 +685,171 @@ final_filemap_cmp(const void *a, const void *b)
        else
                return strcmp(fa->path, fb->path);
 }
+
+/*
+ * Decide what action to perform to a file.
+ */
+static file_action_t
+decide_file_action(file_entry_t *entry)
+{
+       const char *path = entry->path;
+
+       /*
+        * Don't touch the control file. It is handled specially, after copying
+        * all the other files.
+        */
+       if (strcmp(path, "global/pg_control") == 0)
+               return FILE_ACTION_NONE;
+
+       /*
+        * Remove all files matching the exclusion filters in the target.
+        */
+       if (check_file_excluded(path, true))
+       {
+               if (entry->target_exists)
+                       return FILE_ACTION_REMOVE;
+               else
+                       return FILE_ACTION_NONE;
+       }
+
+       /*
+        * Handle cases where the file is missing from one of the systems.
+        */
+       if (!entry->target_exists && entry->source_exists)
+       {
+               /*
+                * File exists in source, but not in target. Copy it in toto. (If it's
+                * a relation data file, WAL replay after rewinding should re-create
+                * it anyway. But there's no harm in copying it now.)
+                */
+               switch (entry->source_type)
+               {
+                       case FILE_TYPE_DIRECTORY:
+                       case FILE_TYPE_SYMLINK:
+                               return FILE_ACTION_CREATE;
+                       case FILE_TYPE_REGULAR:
+                               return FILE_ACTION_COPY;
+                       case FILE_TYPE_UNDEFINED:
+                               pg_fatal("unknown file type for \"%s\"", entry->path);
+                               break;
+               }
+       }
+       else if (entry->target_exists && !entry->source_exists)
+       {
+               /* File exists in target, but not source. Remove it. */
+               return FILE_ACTION_REMOVE;
+       }
+       else if (!entry->target_exists && !entry->source_exists)
+       {
+               /*
+                * Doesn't exist in either server. Why does it have an entry in the
+                * first place??
+                */
+               Assert(false);
+               return FILE_ACTION_NONE;
+       }
+
+       /*
+        * Otherwise, the file exists on both systems
+        */
+       Assert(entry->target_exists && entry->source_exists);
+
+       if (entry->source_type != entry->target_type)
+       {
+               /* But it's a different kind of object. Strange.. */
+               pg_fatal("file \"%s\" is of different type in source and target", entry->path);
+       }
+
+       /*
+        * PG_VERSION files should be identical on both systems, but avoid
+        * overwriting them for paranoia.
+        */
+       if (pg_str_endswith(entry->path, "PG_VERSION"))
+               return FILE_ACTION_NONE;
+
+       switch (entry->source_type)
+       {
+               case FILE_TYPE_DIRECTORY:
+                       return FILE_ACTION_NONE;
+
+               case FILE_TYPE_SYMLINK:
+
+                       /*
+                        * XXX: Should we check if it points to the same target?
+                        */
+                       return FILE_ACTION_NONE;
+
+               case FILE_TYPE_REGULAR:
+                       if (!entry->isrelfile)
+                       {
+                               /*
+                                * It's a non-data file that we have no special processing
+                                * for. Copy it in toto.
+                                */
+                               return FILE_ACTION_COPY;
+                       }
+                       else
+                       {
+                               /*
+                                * It's a data file that exists in both systems.
+                                *
+                                * If it's larger in target, we can truncate it. There will
+                                * also be a WAL record of the truncation in the source
+                                * system, so WAL replay would eventually truncate the target
+                                * too, but we might as well do it now.
+                                *
+                                * If it's smaller in the target, it means that it has been
+                                * truncated in the target, or enlarged in the source, or
+                                * both. If it was truncated in the target, we need to copy
+                                * the missing tail from the source system. If it was enlarged
+                                * in the source system, there will be WAL records in the
+                                * source system for the new blocks, so we wouldn't need to
+                                * copy them here. But we don't know which scenario we're
+                                * dealing with, and there's no harm in copying the missing
+                                * blocks now, so do it now.
+                                *
+                                * If it's the same size, do nothing here. Any blocks modified
+                                * in the target will be copied based on parsing the target
+                                * system's WAL, and any blocks modified in the source will be
+                                * updated after rewinding, when the source system's WAL is
+                                * replayed.
+                                */
+                               if (entry->target_size < entry->source_size)
+                                       return FILE_ACTION_COPY_TAIL;
+                               else if (entry->target_size > entry->source_size)
+                                       return FILE_ACTION_TRUNCATE;
+                               else
+                                       return FILE_ACTION_NONE;
+                       }
+                       break;
+
+               case FILE_TYPE_UNDEFINED:
+                       pg_fatal("unknown file type for \"%s\"", path);
+                       break;
+       }
+
+       /* unreachable */
+       pg_fatal("could not decide what to do with file \"%s\"", path);
+}
+
+/*
+ * Decide what to do with each file.
+ */
+void
+decide_file_actions(void)
+{
+       int                     i;
+
+       filemap_list_to_array(filemap);
+
+       for (i = 0; i < filemap->narray; i++)
+       {
+               file_entry_t *entry = filemap->array[i];
+
+               entry->action = decide_file_action(entry);
+       }
+
+       /* Sort the actions to the order that they should be performed */
+       qsort(filemap->array, filemap->narray, sizeof(file_entry_t *),
+                 final_filemap_cmp);
+}
index 0cb7425170c93be87a018ac104e7aced31509149..3d42355873435fde8562f3238909e8a895a9a75a 100644 (file)
 
 /*
  * For every file found in the local or remote system, we have a file entry
- * which says what we are going to do with the file. For relation files,
- * there is also a page map, marking pages in the file that were changed
- * locally.
- *
- * The enum values are sorted in the order we want actions to be processed.
+ * that contains information about the file on both systems.  For relation
+ * files, there is also a page map that marks pages in the file that were
+ * changed in the target after the last common checkpoint.  Each entry also
+ * contains an 'action' field, which says what we are going to do with the
+ * file.
  */
+
+/* these enum values are sorted in the order we want actions to be processed */
 typedef enum
 {
+       FILE_ACTION_UNDECIDED = 0,      /* not decided yet */
+
        FILE_ACTION_CREATE,                     /* create local directory or symbolic link */
        FILE_ACTION_COPY,                       /* copy whole file, overwriting if exists */
-       FILE_ACTION_COPY_TAIL,          /* copy tail from 'oldsize' to 'newsize' */
+       FILE_ACTION_COPY_TAIL,          /* copy tail from 'source_size' to
+                                                                * 'target_size' */
        FILE_ACTION_NONE,                       /* no action (we might still copy modified
                                                                 * blocks based on the parsed WAL) */
        FILE_ACTION_TRUNCATE,           /* truncate local file to 'newsize' bytes */
@@ -33,6 +38,8 @@ typedef enum
 
 typedef enum
 {
+       FILE_TYPE_UNDEFINED = 0,
+
        FILE_TYPE_REGULAR,
        FILE_TYPE_DIRECTORY,
        FILE_TYPE_SYMLINK
@@ -41,19 +48,34 @@ typedef enum
 typedef struct file_entry_t
 {
        char       *path;
-       file_type_t type;
+       bool            isrelfile;              /* is it a relation data file? */
 
-       file_action_t action;
+       /*
+        * Status of the file in the target.
+        */
+       bool            target_exists;
+       file_type_t target_type;
+       size_t          target_size;    /* for a regular file */
+       char       *target_link_target; /* for a symlink */
 
-       /* for a regular file */
-       size_t          oldsize;
-       size_t          newsize;
-       bool            isrelfile;              /* is it a relation data file? */
+       /*
+        * Pages that were modified in the target and need to be replaced from the
+        * source.
+        */
+       datapagemap_t target_pages_to_overwrite;
 
-       datapagemap_t pagemap;
+       /*
+        * Status of the file in the source.
+        */
+       bool            source_exists;
+       file_type_t source_type;
+       size_t          source_size;
+       char       *source_link_target; /* for a symlink */
 
-       /* for a symlink */
-       char       *link_target;
+       /*
+        * What will we do to the file?
+        */
+       file_action_t action;
 
        struct file_entry_t *next;
 } file_entry_t;
@@ -71,19 +93,18 @@ typedef struct filemap_t
        /*
         * After processing all the remote files, the entries in the linked list
         * are moved to this array. After processing local files, too, all the
-        * local entries are added to the array by filemap_finalize, and sorted in
-        * the final order. After filemap_finalize, all the entries are in the
-        * array, and the linked list is empty.
+        * local entries are added to the array by decide_file_actions(), and
+        * sorted in the final order. After decide_file_actions(), all the entries
+        * are in the array, and the linked list is empty.
         */
        file_entry_t **array;
        int                     narray;                 /* current length of array */
 
        /*
-        * Summary information. total_size is the total size of the source
-        * cluster, and fetch_size is the number of bytes that needs to be copied.
+        * Summary information.
         */
-       uint64          total_size;
-       uint64          fetch_size;
+       uint64          total_size;             /* total size of the source cluster */
+       uint64          fetch_size;             /* number of bytes that needs to be copied */
 } filemap_t;
 
 extern filemap_t *filemap;
@@ -94,11 +115,12 @@ extern void print_filemap(void);
 
 /* Functions for populating the filemap */
 extern void process_source_file(const char *path, file_type_t type,
-                                                               size_t newsize, const char *link_target);
+                                                               size_t size, const char *link_target);
 extern void process_target_file(const char *path, file_type_t type,
-                                                               size_t newsize, const char *link_target);
-extern void process_block_change(ForkNumber forknum, RelFileNode rnode,
-                                                                BlockNumber blkno);
-extern void filemap_finalize(void);
+                                                               size_t size, const char *link_target);
+extern void process_target_wal_block_change(ForkNumber forknum,
+                                                                                       RelFileNode rnode,
+                                                                                       BlockNumber blkno);
+extern void decide_file_actions(void);
 
 #endif                                                 /* FILEMAP_H */
index bf4dfc23b963dde43cf4d4d806b3a985fd2aa643..2fc4a784bdb3cf317bcfc571ae36a90751e15da8 100644 (file)
@@ -465,7 +465,7 @@ libpq_executeFileMap(filemap_t *map)
                entry = map->array[i];
 
                /* If this is a relation file, copy the modified blocks */
-               execute_pagemap(&entry->pagemap, entry->path);
+               execute_pagemap(&entry->target_pages_to_overwrite, entry->path);
 
                switch (entry->action)
                {
@@ -476,15 +476,15 @@ libpq_executeFileMap(filemap_t *map)
                        case FILE_ACTION_COPY:
                                /* Truncate the old file out of the way, if any */
                                open_target_file(entry->path, true);
-                               fetch_file_range(entry->path, 0, entry->newsize);
+                               fetch_file_range(entry->path, 0, entry->source_size);
                                break;
 
                        case FILE_ACTION_TRUNCATE:
-                               truncate_target_file(entry->path, entry->newsize);
+                               truncate_target_file(entry->path, entry->source_size);
                                break;
 
                        case FILE_ACTION_COPY_TAIL:
-                               fetch_file_range(entry->path, entry->oldsize, entry->newsize);
+                               fetch_file_range(entry->path, entry->target_size, entry->source_size);
                                break;
 
                        case FILE_ACTION_REMOVE:
@@ -494,6 +494,10 @@ libpq_executeFileMap(filemap_t *map)
                        case FILE_ACTION_CREATE:
                                create_target(entry);
                                break;
+
+                       case FILE_ACTION_UNDECIDED:
+                               pg_fatal("no action decided for \"%s\"", entry->path);
+                               break;
                }
        }
 
index a2f1ab5422bcbd344a77ce226b90ef5b7f49e0c2..eae1797f948f6445cc43b2a3f2d9dd74f30093e6 100644 (file)
@@ -436,6 +436,6 @@ extractPageInfo(XLogReaderState *record)
                if (forknum != MAIN_FORKNUM)
                        continue;
 
-               process_block_change(forknum, rnode, blkno);
+               process_target_wal_block_change(forknum, rnode, blkno);
        }
 }
index 5a7ab764db48bbaea4f333de71964e18fca69e44..4760090d06e4a35e7c5a1ace312252d398b2c460 100644 (file)
@@ -369,7 +369,7 @@ main(int argc, char **argv)
                                chkpttli);
 
        /*
-        * Build the filemap, by comparing the source and target data directories.
+        * Collect information about all files in the target and source systems.
         */
        filemap_create();
        if (showprogress)
@@ -390,8 +390,12 @@ main(int argc, char **argv)
                pg_log_info("reading WAL in target");
        extractPageMap(datadir_target, chkptrec, lastcommontliIndex,
                                   ControlFile_target.checkPoint, restore_command);
-       filemap_finalize();
 
+       /*
+        * We have collected all information we need from both systems. Decide
+        * what to do with each file.
+        */
+       decide_file_actions();
        if (showprogress)
                calculate_totals();