summaryrefslogtreecommitdiff
path: root/src/backend/replication
diff options
context:
space:
mode:
authorAlvaro Herrera2020-06-24 18:23:39 +0000
committerAlvaro Herrera2020-06-24 18:23:39 +0000
commitb8fd4e02c6d01183bf6def5897ad6cf7766bfff4 (patch)
tree11d04fed6fe779e476f821bf072036619ac9d809 /src/backend/replication
parent0188bb82531f1b0ae3648fb81a4bd4a4f6242127 (diff)
Adjust max_slot_wal_keep_size behavior per review
In pg_replication_slot, change output from normal/reserved/lost to reserved/extended/unreserved/ lost, which better expresses the possible states particularly near the time where segments are no longer safe but checkpoint has not run yet. Under the new definition, reserved means the slot is consuming WAL that's still under the normal WAL size constraints; extended means it's consuming WAL that's being protected by wal_keep_segments or the slot itself, whose size is below max_slot_wal_keep_size; unreserved means the WAL is no longer safe, but checkpoint has not yet removed those files. Such as slot is in imminent danger, but can still continue for a little while and may catch up to the reserved WAL space. Also, there were some bugs in the calculations used to report the status; fixed those. Backpatch to 13. Reported-by: Fujii Masao <masao.fujii@oss.nttdata.com> Author: Kyotaro Horiguchi <horikyota.ntt@gmail.com> Reviewed-by: Fujii Masao <masao.fujii@oss.nttdata.com> Reviewed-by: Álvaro Herrera <alvherre@alvh.no-ip.org> Discussion: https://postgr.es/m/20200616.120236.1809496990963386593.horikyota.ntt@gmail.com
Diffstat (limited to 'src/backend/replication')
-rw-r--r--src/backend/replication/slotfuncs.c39
1 files changed, 31 insertions, 8 deletions
diff --git a/src/backend/replication/slotfuncs.c b/src/backend/replication/slotfuncs.c
index 3fc54cb9bab..df854bc6e3f 100644
--- a/src/backend/replication/slotfuncs.c
+++ b/src/backend/replication/slotfuncs.c
@@ -359,24 +359,47 @@ pg_get_replication_slots(PG_FUNCTION_ARGS)
nulls[i++] = true;
break;
- case WALAVAIL_NORMAL:
- values[i++] = CStringGetTextDatum("normal");
- break;
-
case WALAVAIL_RESERVED:
values[i++] = CStringGetTextDatum("reserved");
break;
+ case WALAVAIL_EXTENDED:
+ values[i++] = CStringGetTextDatum("extended");
+ break;
+
+ case WALAVAIL_UNRESERVED:
+ values[i++] = CStringGetTextDatum("unreserved");
+ break;
+
case WALAVAIL_REMOVED:
+
+ /*
+ * If we read the restart_lsn long enough ago, maybe that file
+ * has been removed by now. However, the walsender could have
+ * moved forward enough that it jumped to another file after
+ * we looked. If checkpointer signalled the process to
+ * termination, then it's definitely lost; but if a process is
+ * still alive, then "unreserved" seems more appropriate.
+ */
+ if (!XLogRecPtrIsInvalid(slot_contents.data.restart_lsn))
+ {
+ int pid;
+
+ SpinLockAcquire(&slot->mutex);
+ pid = slot->active_pid;
+ SpinLockRelease(&slot->mutex);
+ if (pid != 0)
+ {
+ values[i++] = CStringGetTextDatum("unreserved");
+ break;
+ }
+ }
values[i++] = CStringGetTextDatum("lost");
break;
-
- default:
- elog(ERROR, "invalid walstate: %d", (int) walstate);
}
if (max_slot_wal_keep_size_mb >= 0 &&
- (walstate == WALAVAIL_NORMAL || walstate == WALAVAIL_RESERVED) &&
+ (walstate == WALAVAIL_RESERVED || walstate == WALAVAIL_EXTENDED) &&
((last_removed_seg = XLogGetLastRemovedSegno()) != 0))
{
XLogRecPtr min_safe_lsn;