diff options
-rw-r--r-- | python/skytools/config.py | 3 | ||||
-rwxr-xr-x | python/walmgr.py | 6 | ||||
-rwxr-xr-x | scripts/scriptmgr.py | 63 |
3 files changed, 51 insertions, 21 deletions
diff --git a/python/skytools/config.py b/python/skytools/config.py index c276a5d1..9f19306f 100644 --- a/python/skytools/config.py +++ b/python/skytools/config.py @@ -205,3 +205,6 @@ class Config(object): def items(self): """Returns list of (name, value) for each option in main section.""" return self.cf.items(self.main_section) + + # define some aliases (short-cuts / backward compatibility cruft) + getbool = getboolean diff --git a/python/walmgr.py b/python/walmgr.py index 7afc3c73..d4796401 100755 --- a/python/walmgr.py +++ b/python/walmgr.py @@ -1358,6 +1358,12 @@ STOP TIME: %(stop_time)s # copy data self.exec_rsync([ srcpath, dst_loc ], True) + # sync the buffers to disk - this is should reduce the chance + # of WAL file corruption in case the slave crashes. + slave = self.cf.get("slave") + cmdline = ["ssh", "-nT", slave, "sync" ] + self.exec_cmd(cmdline) + self.log.debug("%s: done", srcname) end_time = time.time() self.stat_add('count', 1) diff --git a/scripts/scriptmgr.py b/scripts/scriptmgr.py index 0eef0529..26317036 100755 --- a/scripts/scriptmgr.py +++ b/scripts/scriptmgr.py @@ -78,6 +78,7 @@ class ScriptMgr(skytools.DBScript): def init_optparse(self, p = None): p = skytools.DBScript.init_optparse(self, p) p.add_option("-a", "--all", action="store_true", help="apply command to all jobs") + p.add_option("-w", "--wait", action="store_true", help="wait for job(s) after signaling") p.set_usage(command_usage.strip()) return p @@ -170,18 +171,14 @@ class ScriptMgr(skytools.DBScript): print(job) def cmd_start(self, job_name): - if job_name not in self.job_map: - self.log.error('Unknown job: '+job_name) - return 1 - job = self.job_map[job_name] - if job['disabled']: - self.log.info("Skipping %s" % job_name) - return 0 + job = self.get_job_by_name (job_name) + if isinstance (job, int): + return job # ret.code self.log.info('Starting %s' % job_name) os.chdir(job['cwd']) pidfile = job['pidfile'] if not pidfile: - self.log.warning("No pidfile for %s cannot launch") + self.log.warning("No pidfile for %s, cannot launch" % job_name) return 0 if os.path.isfile(pidfile): if skytools.signal_pidfile(pidfile, 0): @@ -199,26 +196,42 @@ class ScriptMgr(skytools.DBScript): return 0 def cmd_stop(self, job_name): - if job_name not in self.job_map: - self.log.error('Unknown job: '+job_name) - return - job = self.job_map[job_name] - if job['disabled']: - self.log.info("Skipping %s" % job_name) - return + job = self.get_job_by_name (job_name) + if isinstance (job, int): + return job # ret.code self.log.info('Stopping %s' % job_name) self.signal_job(job, signal.SIGINT) def cmd_reload(self, job_name): + job = self.get_job_by_name (job_name) + if isinstance (job, int): + return job # ret.code + self.log.info('Reloading %s' % job_name) + self.signal_job(job, signal.SIGHUP) + + def get_job_by_name (self, job_name): if job_name not in self.job_map: - self.log.error('Unknown job: '+job_name) - return + self.log.error ("Unknown job: %s" % job_name) + return 1 job = self.job_map[job_name] if job['disabled']: - self.log.info("Skipping %s" % job_name) - return - self.log.info('Reloading %s' % job_name) - self.signal_job(job, signal.SIGHUP) + self.log.info ("Skipping %s" % job_name) + return 0 + return job + + def wait_for_stop (self, job_name): + job = self.get_job_by_name (job_name) + if isinstance (job, int): + return job # ret.code + msg = False + while True: + if skytools.signal_pidfile (job['pidfile'], 0): + if not msg: + self.log.info ("Waiting for %s to stop" % job_name) + msg = True + time.sleep (0.1) + else: + return 0 def signal_job(self, job, sig): os.chdir(job['cwd']) @@ -274,10 +287,18 @@ class ScriptMgr(skytools.DBScript): elif cmd == "stop": for n in jobs: self.cmd_stop(n) + if self.options.wait: + for n in jobs: + self.wait_for_stop(n) elif cmd == "restart": for n in jobs: self.cmd_stop(n) + if self.options.wait: + for n in jobs: + self.wait_for_stop(n) + else: time.sleep(2) + for n in jobs: self.cmd_start(n) elif cmd == "reload": for n in jobs: |