summaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorMarko Kreen2012-11-01 09:28:50 +0000
committerMarko Kreen2012-11-02 08:40:09 +0000
commit40a294b729a6815803457bde739ddf7a9bf6b170 (patch)
tree2d62d6adccbf2eaab8937d9109bb769495bc0baf /python
parent978bd17e1cd71f7e5cc0cdad1772c68c3c81dd81 (diff)
Londiste: try to show exact event info on failure.
This should help debugging replication problems. It does not work on first error, as we batch together a lot of SQL statements, but on retry Londiste start replaying events one-by-one, the the exact event can be spotted. DBScript unnecessarily overrided emsg, drop that.
Diffstat (limited to 'python')
-rw-r--r--python/londiste/playback.py21
-rw-r--r--python/skytools/scripting.py1
2 files changed, 21 insertions, 1 deletions
diff --git a/python/londiste/playback.py b/python/londiste/playback.py
index fddf3b5c..4129cbe1 100644
--- a/python/londiste/playback.py
+++ b/python/londiste/playback.py
@@ -300,6 +300,8 @@ class Replicator(CascadedWorker):
copy_table_name = None # filled by Copytable()
sql_list = []
+ current_event = None
+
def __init__(self, args):
"""Replication init."""
CascadedWorker.__init__(self, 'londiste3', 'db', args)
@@ -337,6 +339,8 @@ class Replicator(CascadedWorker):
def process_remote_batch(self, src_db, tick_id, ev_list, dst_db):
"All work for a batch. Entry point from SetConsumer."
+ self.current_event = None
+
# this part can play freely with transactions
if not self.code_check_done:
@@ -592,7 +596,13 @@ class Replicator(CascadedWorker):
def process_remote_event(self, src_curs, dst_curs, ev):
"""handle one event"""
+
self.log.debug("New event: id=%s / type=%s / data=%s / extra1=%s" % (ev.id, ev.type, ev.data, ev.extra1))
+
+ # set current_event only if processing them one-by-one
+ if self.work_state < 0:
+ self.current_event = ev
+
if ev.type in ('I', 'U', 'D'):
self.handle_data_event(ev, dst_curs)
elif ev.type[:2] in ('I:', 'U:', 'D:'):
@@ -618,6 +628,9 @@ class Replicator(CascadedWorker):
else:
CascadedWorker.process_remote_event(self, src_curs, dst_curs, ev)
+ # no point keeping it around longer
+ self.current_event = None
+
def handle_data_event(self, ev, dst_curs):
"""handle one truncate event"""
t = self.get_table_by_name(ev.extra1)
@@ -955,6 +968,14 @@ class Replicator(CascadedWorker):
return
CascadedWorker.copy_event(self, dst_curs, ev, filtered_copy)
+ def exception_hook(self, det, emsg):
+ # add event info to error message
+ if self.current_event:
+ ev = self.current_event
+ info = "[ev_id=%d,ev_txid=%d] " % (ev.ev_id,ev.ev_txid)
+ emsg = info + emsg
+ super(Replicator, self).exception_hook(det, emsg)
+
if __name__ == '__main__':
script = Replicator(sys.argv[1:])
script.start()
diff --git a/python/skytools/scripting.py b/python/skytools/scripting.py
index 049da251..ff8eac79 100644
--- a/python/skytools/scripting.py
+++ b/python/skytools/scripting.py
@@ -774,7 +774,6 @@ class DBScript(BaseScript):
sql = getattr(curs, 'query', '?')
if len(sql) > 200: # avoid logging londiste huge batched queries
sql = sql[:60] + " ..."
- emsg = str(d).strip()
self.log.exception("Job %s got error on connection '%s': %s. Query: %s" % (
self.job_name, cname, emsg, sql))
else: