diff options
author | Cédric Villemain | 2011-11-07 13:00:02 +0000 |
---|---|---|
committer | Cédric Villemain | 2011-11-10 02:53:16 +0000 |
commit | 0ff408711dab18b05de26656a945fa37e363f6aa (patch) | |
tree | 11eda764c1d521f383f39c079591fd4484692de2 /check_postgres.pl | |
parent | 1118e0ab0028fee46a5bdaac9de169068e4fd03f (diff) |
Add an --assume-prod option
This is based on --assume-standby-mode. Reduce the option name per suggestion
from Greg (but I kept the original one for standby mode).
The option is only used in check_postgres_checkpoint and allows to confirm or
emit a critical if the server is not in the expected mode.
Note: this can be used in other places, and maybe improved (to reduce the
number og open_controldata calls)
TODO/FIXME:
* I found that --assume-p or --assume-s are viewed by GetOpt like the longer
version of the option, a bug ?
* The original code to call pg_controldata does not work in French (because of
regex/locale). Why not use LANG=C in those checks where there is NO point to
use locale and error prone regex ?
Diffstat (limited to 'check_postgres.pl')
-rwxr-xr-x | check_postgres.pl | 63 |
1 files changed, 61 insertions, 2 deletions
diff --git a/check_postgres.pl b/check_postgres.pl index 3b5537e30..c5e5d3037 100755 --- a/check_postgres.pl +++ b/check_postgres.pl @@ -104,6 +104,7 @@ our %msg = ( 'checkcluster-nomrtg'=> q{Must provide a number via the --mrtg option}, 'checkmode-state' => q{Database cluster state:}, 'checkmode-recovery' => q{in archive recovery}, + 'checkmode-prod' => q{in production}, 'checkpoint-baddir' => q{Invalid data_directory: "$1"}, 'checkpoint-baddir2' => q{pg_controldata could not read the given data directory: "$1"}, 'checkpoint-badver' => q{Failed to run pg_controldata - probably the wrong version ($1)}, @@ -163,6 +164,7 @@ our %msg = ( 'logfile-stderr' => q{Logfile output has been redirected to stderr: please provide a filename}, 'logfile-syslog' => q{Database is using syslog, please specify path with --logfile option (fac=$1)}, 'mode-standby' => q{Server in standby mode}, + 'mode' => q{mode}, 'mrtg-fail' => q{Action $1 failed: $2}, 'new-ver-nocver' => q{Could not download version information for $1}, 'new-ver-badver' => q{Could not parse version information for $1}, @@ -934,6 +936,7 @@ GetOptions( 'debugoutput=s', 'no-check_postgresrc', 'assume-standby-mode', + 'assume-prod', 'action=s', 'warning=s', @@ -1206,6 +1209,7 @@ Limit options: Other options: --assume-standby-mode assume that server in continious WAL recovery mode + --assume-prod assume that server in production mode --PSQL=FILE location of the psql executable; avoid using if possible -v, --verbose verbosity level; can be used more than once to increase the level -h, --help display this help information @@ -1252,7 +1256,9 @@ if ($opt{showtime}) { ## Check the current database mode our $STANDBY = 0; +our $MASTER = 0; make_sure_standby_mode() if $opt{'assume-standby-mode'}; +make_sure_prod() if $opt{'assume-prod'}; ## We don't (usually) want to die, but want a graceful Nagios-like exit instead sub ndie { @@ -1560,6 +1566,21 @@ sub make_sure_standby_mode { } ## end of make_sure_standby_mode +sub make_sure_prod { + + ## Checks if database in production mode + ## Requires $ENV{PGDATA} or --datadir + + my $last = make_sure_mode_is(); + + my $regex = msg('checkmode-prod'); + if ($last =~ /$regex/) { + $MASTER = 1; + } + + return; + +} ## end of make_sure_production_mode sub finishup { @@ -3661,7 +3682,6 @@ FROM ( } ## end of check_bloat - sub check_checkpoint { ## Checks how long in seconds since the last checkpoint on a WAL slave @@ -3674,6 +3694,9 @@ sub check_checkpoint { ## may make more sense on the master, or we may want to look at ## the WAL segments received/processed instead of the checkpoint ## timestamp. + ## This checks can use the optionnal --asume-standby-mode or + ## --assume-prod: if the mode found is not the mode assumed, a + ## CRITICAL is emitted. ## Supports: Nagios, MRTG ## Warning and critical are seconds @@ -3718,11 +3741,35 @@ sub check_checkpoint { $db->{perf} = sprintf '%s=%s;%s;%s', perfname(msg('age')), $diff, $warning, $critical; + my $mode = ''; + if ($STANDBY) { + $mode = 'STANDBY'; + } + if ($MASTER) { + $mode = 'MASTER'; + } + + ## If we have an assume flag, then honor it. + my $goodmode = 1; + if ($opt{'assume-standby-mode'} and not $STANDBY) { + $goodmode = 0; + $mode = 'NOT STANDBY'; + } + elsif ($opt{'assume-prod'} and not $MASTER) { + $goodmode = 0; + $mode = 'NOT MASTER'; + } + + if (length($mode) > 0) { + $db->{perf} .= sprintf ' %s=%s', + perfname(msg('mode')), $mode; + } + if ($MRTG) { do_mrtg({one => $diff, msg => $msg}); } - if (length $critical and $diff >= $critical) { + if ((length $critical and $diff >= $critical) or not $goodmode) { add_critical $msg; return; } @@ -7862,6 +7909,16 @@ Example: postgres@db$./check_postgres.pl --action=version --warning=8.1 --datadir /var/lib/postgresql/8.3/main/ --assume-standby-mode POSTGRES_VERSION OK: Server in standby mode | time=0.00 +=item B<--assume-prod> + +If specified, check if server in production mode is performed (--datadir is required). +The option is only relevant for (C<symlink: check_postgres_checkpoint>). + +Example: + + postgres@db$./check_postgres.pl --action=checkpoint --datadir /var/lib/postgresql/8.3/main/ --assume-prod + POSTGRES_CHECKPOINT OK: Last checkpoint was 72 seconds ago | age=72;;300 mode=MASTER + =item B<-h> or B<--help> Displays a help screen with a summary of all actions and options. @@ -8132,6 +8189,8 @@ was run, as determined by parsing the call to C<pg_controldata>. Because of this pg_controldata executable must be available in the current path. Alternatively, you can set the environment variable C<PGCONTROLDATA> to the exact location of the pg_controldata executable, or you can specify C<PGBINDIR> as the directory that it lives in. +It is also possible to use the special options I<--assume-prod> or +I<--assume-standby-mode>, if the mode found is not the one expected, a CRITICAL is emitted. At least one warning or critical argument must be set. |