#include <fcntl.h>
#include <signal.h>
#include <time.h>
+#include <sys/stat.h>
#include <sys/time.h>
#include <sys/wait.h>
#include <unistd.h>
#define PGARCH_RESTART_INTERVAL 10 /* How often to attempt to restart a
* failed archiver; in seconds. */
+/*
+ * Maximum number of retries allowed when attempting to archive a WAL
+ * file.
+ */
#define NUM_ARCHIVE_RETRIES 3
+/*
+ * Maximum number of retries allowed when attempting to remove an
+ * orphan archive status file.
+ */
+#define NUM_ORPHAN_CLEANUP_RETRIES 3
+
/* ----------
* Local data
while (pgarch_readyXlog(xlog))
{
int failures = 0;
+ int failures_orphan = 0;
for (;;)
{
+ struct stat stat_buf;
+ char pathname[MAXPGPATH];
+
/*
* Do not initiate any more archive commands after receiving
* SIGTERM, nor after the postmaster has died unexpectedly. The
return;
}
+ /*
+ * Since archive status files are not removed in a durable manner,
+ * a system crash could leave behind .ready files for WAL segments
+ * that have already been recycled or removed. In this case,
+ * simply remove the orphan status file and move on. unlink() is
+ * used here as even on subsequent crashes the same orphan files
+ * would get removed, so there is no need to worry about
+ * durability.
+ */
+ snprintf(pathname, MAXPGPATH, XLOGDIR "/%s", xlog);
+ if (stat(pathname, &stat_buf) != 0 && errno == ENOENT)
+ {
+ char xlogready[MAXPGPATH];
+
+ StatusFilePath(xlogready, xlog, ".ready");
+ if (unlink(xlogready) == 0)
+ {
+ ereport(WARNING,
+ (errmsg("removed orphan archive status file \"%s\"",
+ xlogready)));
+
+ /* leave loop and move to the next status file */
+ break;
+ }
+
+ if (++failures_orphan >= NUM_ORPHAN_CLEANUP_RETRIES)
+ {
+ ereport(WARNING,
+ (errmsg("removal of orphan archive status file \"%s\" failed too many times, will try again later",
+ xlogready)));
+
+ /* give up cleanup of orphan status files */
+ return;
+ }
+
+ /* wait a bit before retrying */
+ pg_usleep(1000000L);
+ continue;
+ }
+
if (pgarch_archiveXlog(xlog))
{
/* successful */