summaryrefslogtreecommitdiff
path: root/python/walmgr.py
diff options
context:
space:
mode:
authorMartin Pihlak2011-03-17 19:03:29 +0000
committerMartin Pihlak2011-03-17 19:13:57 +0000
commite90ad465727fcd18e46d5eb8971983db36ae0bd0 (patch)
tree2cf6be2fdc751c15dcfc712eead908c8d11ec23d /python/walmgr.py
parentd18347f18b5798dec90eb771c9489a6b28d84e55 (diff)
Couple of bugfixes for stale backup lock handling.
Diffstat (limited to 'python/walmgr.py')
-rwxr-xr-xpython/walmgr.py55
1 files changed, 32 insertions, 23 deletions
diff --git a/python/walmgr.py b/python/walmgr.py
index dfac72e2..3ee61de1 100755
--- a/python/walmgr.py
+++ b/python/walmgr.py
@@ -531,15 +531,14 @@ class WalMgr(skytools.DBScript):
self.exec_big_rsync(cmdline + [ source_dir, dst_loc ])
-
- def exec_cmd(self, cmdline,allow_error=False):
+ def exec_cmd(self, cmdline, allow_error=False):
cmd = "' '".join(cmdline)
self.log.debug("Execute cmd: '%s'" % (cmd))
if self.not_really:
return
- #res = os.spawnvp(os.P_WAIT, cmdline[0], cmdline)
+
process = subprocess.Popen(cmdline,stdout=subprocess.PIPE)
- output=process.communicate()
+ output = process.communicate()
res = process.returncode
if res != 0 and not allow_error:
@@ -752,7 +751,7 @@ class WalMgr(skytools.DBScript):
cmdline = ["ssh", "-nT", host, "mkdir", "-p", path]
self.exec_cmd(cmdline)
- def remote_walmgr(self, command, stdin_disabled = True,allow_error=False):
+ def remote_walmgr(self, command, stdin_disabled = True, allow_error=False):
"""Pass a command to slave WalManager"""
sshopt = "-T"
@@ -770,7 +769,33 @@ class WalMgr(skytools.DBScript):
if self.not_really:
cmdline += ["--not-really"]
- self.exec_cmd(cmdline)
+ return self.exec_cmd(cmdline, allow_error)
+
+ def remote_xlock(self):
+ """
+ Obtain the backup lock to ensure that several backups are not
+ run in parralel. If someone already has the lock we check if
+ this is from a previous (failed) backup. If that is the case,
+ the lock is released and re-obtained.
+ """
+ xlock_cmd = "xlock %d" % os.getpid()
+ ret = self.remote_walmgr(xlock_cmd, allow_error=True)
+ if ret[0] != 0:
+ # lock failed.
+ try:
+ lock_pid = int(ret[1])
+ except ValueError:
+ self.log.fatal("Invalid pid in backup lock")
+ sys.exit(1)
+
+ try:
+ os.kill(lock_pid, 0)
+ self.log.fatal("Backup lock already taken")
+ sys.exit(1)
+ except OSError:
+ # no process, carry on
+ self.remote_walmgr("xrelease")
+ self.remote_walmgr(xlock_cmd)
def override_cf_option(self, option, value):
"""Set a configuration option, if it is unset"""
@@ -1932,7 +1957,7 @@ STOP TIME: %(stop_time)s
pidstring = lockfilehandle.read();
try:
pid = int(pidstring)
- print("%d",pid)
+ print("%d" % pid)
except ValueError:
self.log.error("lock file does not contain a pid:" + pidstring)
return 1
@@ -2131,22 +2156,6 @@ STOP TIME: %(stop_time)s
os.remove(full)
cur_last = fname
return cur_last
- def remote_xlock(self):
- ret = self.remote_walmgr("xlock " + str(os.getpid()),allow_error=True)
- if ret[0] != 0:
- # lock failed.
- try:
- lock_pid = int(ret[1])
- if os.kill(lock_pid,0):
- #process exists.
- self.log.error("lock already obtained")
- else:
- self.remote_walmgr("xrelease")
- ret = self.remote_walmgr("xlock " + pid(),allow_error=True)
- if ret[0] != 0:
- self.log.error("unable to obtain lock")
- except ValueError:
- self.log.error("error obtaining lock")
if __name__ == "__main__":
script = WalMgr(sys.argv[1:])