diff options
| author | Marko Kreen | 2012-11-01 09:28:50 +0000 |
|---|---|---|
| committer | Marko Kreen | 2012-11-02 08:40:09 +0000 |
| commit | 40a294b729a6815803457bde739ddf7a9bf6b170 (patch) | |
| tree | 2d62d6adccbf2eaab8937d9109bb769495bc0baf /python | |
| parent | 978bd17e1cd71f7e5cc0cdad1772c68c3c81dd81 (diff) | |
Londiste: try to show exact event info on failure.
This should help debugging replication problems.
It does not work on first error, as we batch together a lot of SQL
statements, but on retry Londiste start replaying events one-by-one,
the the exact event can be spotted.
DBScript unnecessarily overrided emsg, drop that.
Diffstat (limited to 'python')
| -rw-r--r-- | python/londiste/playback.py | 21 | ||||
| -rw-r--r-- | python/skytools/scripting.py | 1 |
2 files changed, 21 insertions, 1 deletions
diff --git a/python/londiste/playback.py b/python/londiste/playback.py index fddf3b5c..4129cbe1 100644 --- a/python/londiste/playback.py +++ b/python/londiste/playback.py @@ -300,6 +300,8 @@ class Replicator(CascadedWorker): copy_table_name = None # filled by Copytable() sql_list = [] + current_event = None + def __init__(self, args): """Replication init.""" CascadedWorker.__init__(self, 'londiste3', 'db', args) @@ -337,6 +339,8 @@ class Replicator(CascadedWorker): def process_remote_batch(self, src_db, tick_id, ev_list, dst_db): "All work for a batch. Entry point from SetConsumer." + self.current_event = None + # this part can play freely with transactions if not self.code_check_done: @@ -592,7 +596,13 @@ class Replicator(CascadedWorker): def process_remote_event(self, src_curs, dst_curs, ev): """handle one event""" + self.log.debug("New event: id=%s / type=%s / data=%s / extra1=%s" % (ev.id, ev.type, ev.data, ev.extra1)) + + # set current_event only if processing them one-by-one + if self.work_state < 0: + self.current_event = ev + if ev.type in ('I', 'U', 'D'): self.handle_data_event(ev, dst_curs) elif ev.type[:2] in ('I:', 'U:', 'D:'): @@ -618,6 +628,9 @@ class Replicator(CascadedWorker): else: CascadedWorker.process_remote_event(self, src_curs, dst_curs, ev) + # no point keeping it around longer + self.current_event = None + def handle_data_event(self, ev, dst_curs): """handle one truncate event""" t = self.get_table_by_name(ev.extra1) @@ -955,6 +968,14 @@ class Replicator(CascadedWorker): return CascadedWorker.copy_event(self, dst_curs, ev, filtered_copy) + def exception_hook(self, det, emsg): + # add event info to error message + if self.current_event: + ev = self.current_event + info = "[ev_id=%d,ev_txid=%d] " % (ev.ev_id,ev.ev_txid) + emsg = info + emsg + super(Replicator, self).exception_hook(det, emsg) + if __name__ == '__main__': script = Replicator(sys.argv[1:]) script.start() diff --git a/python/skytools/scripting.py b/python/skytools/scripting.py index 049da251..ff8eac79 100644 --- a/python/skytools/scripting.py +++ b/python/skytools/scripting.py @@ -774,7 +774,6 @@ class DBScript(BaseScript): sql = getattr(curs, 'query', '?') if len(sql) > 200: # avoid logging londiste huge batched queries sql = sql[:60] + " ..." - emsg = str(d).strip() self.log.exception("Job %s got error on connection '%s': %s. Query: %s" % ( self.job_name, cname, emsg, sql)) else: |
