From 128ad6eb0c757c34b9332cd41e79efe6b73e887a Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Thu, 13 Oct 2016 13:22:49 +0200 Subject: open connections in a way resilient to temporary failures In case of connection failure (e.g. a backend getting terminated by OOM killer and taking down the whole server), the workers failed with an exception without putting a result into the queue, leading to hangs. This fixes that by making the workers resilient to connection issues by catching the exceptions and terminating nicely. Also, we retry connection several times, to continue with benchmarking even after OOM hits us unexpectedly, etc. --- client/utils/misc.py | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) (limited to 'client/utils/misc.py') diff --git a/client/utils/misc.py b/client/utils/misc.py index 1d5436e..6f73998 100644 --- a/client/utils/misc.py +++ b/client/utils/misc.py @@ -1,4 +1,6 @@ import os +import psycopg2 +import psycopg2.extras import time from subprocess import call, STDOUT @@ -21,3 +23,47 @@ def run_cmd(args, env=None, cwd=None): strout.seek(0) return (retcode, strout.read(), (time.time() - start)) + + +def connect(dbname, conn, cursor, nretries = 60, delay = 1.0): + '''Try opening a connection and a cursor. If it does not succeed (e.g. + when the database is performing recovery after a crash, retry multiple + times (as specified by nretries and delay in seconds). + ''' + + # if we already have connection and a cursor, return it + if conn and cursor: + return (conn, cursor) + + # we'll try repeatedly, with delays between the attempts + i = 0 + while i < nretries: + + i += 1 + + try: + conn = psycopg2.connect('host=localhost dbname=%s' % (dbname,)) + # TODO do we actually need autocommit? + conn.autocommit = True + cursor = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) + + return (conn, cursor) + except: + # connection failure - sleep for a while, then try again + time.sleep(delay) + + return (None, None) + + +def disconnect(conn, cursor): + '''Make sure we're disconnected (but prevent exceptions)''' + + try: + cursor.close() + except: + pass + + try: + conn.close() + except: + pass -- cgit v1.2.3