summaryrefslogtreecommitdiff
path: root/client/utils/misc.py
diff options
context:
space:
mode:
authorTomas Vondra2016-10-13 11:22:49 +0000
committerTomas Vondra2017-02-27 00:31:05 +0000
commit128ad6eb0c757c34b9332cd41e79efe6b73e887a (patch)
tree2463b8e67dc78ade6c8bb5548adc2fd191558f44 /client/utils/misc.py
parentafafb8549915da7b1dddb3e355efef70586a26aa (diff)
open connections in a way resilient to temporary failures
In case of connection failure (e.g. a backend getting terminated by OOM killer and taking down the whole server), the workers failed with an exception without putting a result into the queue, leading to hangs. This fixes that by making the workers resilient to connection issues by catching the exceptions and terminating nicely. Also, we retry connection several times, to continue with benchmarking even after OOM hits us unexpectedly, etc.
Diffstat (limited to 'client/utils/misc.py')
-rw-r--r--client/utils/misc.py46
1 files changed, 46 insertions, 0 deletions
diff --git a/client/utils/misc.py b/client/utils/misc.py
index 1d5436e..6f73998 100644
--- a/client/utils/misc.py
+++ b/client/utils/misc.py
@@ -1,4 +1,6 @@
import os
+import psycopg2
+import psycopg2.extras
import time
from subprocess import call, STDOUT
@@ -21,3 +23,47 @@ def run_cmd(args, env=None, cwd=None):
strout.seek(0)
return (retcode, strout.read(), (time.time() - start))
+
+
+def connect(dbname, conn, cursor, nretries = 60, delay = 1.0):
+ '''Try opening a connection and a cursor. If it does not succeed (e.g.
+ when the database is performing recovery after a crash, retry multiple
+ times (as specified by nretries and delay in seconds).
+ '''
+
+ # if we already have connection and a cursor, return it
+ if conn and cursor:
+ return (conn, cursor)
+
+ # we'll try repeatedly, with delays between the attempts
+ i = 0
+ while i < nretries:
+
+ i += 1
+
+ try:
+ conn = psycopg2.connect('host=localhost dbname=%s' % (dbname,))
+ # TODO do we actually need autocommit?
+ conn.autocommit = True
+ cursor = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
+
+ return (conn, cursor)
+ except:
+ # connection failure - sleep for a while, then try again
+ time.sleep(delay)
+
+ return (None, None)
+
+
+def disconnect(conn, cursor):
+ '''Make sure we're disconnected (but prevent exceptions)'''
+
+ try:
+ cursor.close()
+ except:
+ pass
+
+ try:
+ conn.close()
+ except:
+ pass