diff options
author | Magnus Hagander | 2019-01-03 20:52:58 +0000 |
---|---|---|
committer | Magnus Hagander | 2019-01-04 11:24:06 +0000 |
commit | 69af766f8fae8ef489d951c71a37966fc51d736b (patch) | |
tree | 1e394a959d7ec9b49797b6cb74ec2eac5dbd47e8 /loader/lib | |
parent | 1e165224612094e7605d9109e70a3034a0f5a864 (diff) |
Whitespace fixes
Diffstat (limited to 'loader/lib')
-rw-r--r-- | loader/lib/log.py | 2 | ||||
-rw-r--r-- | loader/lib/mbox.py | 1 | ||||
-rw-r--r-- | loader/lib/parser.py | 31 | ||||
-rw-r--r-- | loader/lib/storage.py | 19 | ||||
-rw-r--r-- | loader/lib/varnish.py | 2 |
5 files changed, 29 insertions, 26 deletions
diff --git a/loader/lib/log.py b/loader/lib/log.py index 48722c9..4995969 100644 --- a/loader/lib/log.py +++ b/loader/lib/log.py @@ -18,6 +18,7 @@ class Log(object): def print_status(self): opstatus.print_status() + class OpStatus(object): def __init__(self): self.stored = 0 @@ -32,4 +33,3 @@ class OpStatus(object): log = Log() opstatus = OpStatus() - diff --git a/loader/lib/mbox.py b/loader/lib/mbox.py index c097e72..278fd6c 100644 --- a/loader/lib/mbox.py +++ b/loader/lib/mbox.py @@ -10,6 +10,7 @@ from io import BytesIO SEPARATOR = "ABCARCHBREAK123" * 50 bSEPARATOR = bytes(SEPARATOR, 'ascii') + class MailboxBreakupParser(object): def __init__(self, fn): self.EOF = False diff --git a/loader/lib/parser.py b/loader/lib/parser.py index a727f0e..cef9468 100644 --- a/loader/lib/parser.py +++ b/loader/lib/parser.py @@ -13,6 +13,7 @@ import io from lib.exception import IgnorableException from lib.log import log + class ArchivesParser(object): def __init__(self): self.parser = BytesParser(policy=compat32) @@ -64,7 +65,6 @@ class ArchivesParser(object): if m and not m in self.parents: self.parents.append(m) - def clean_charset(self, charset): lcharset = charset.lower() if lcharset == 'unknown-8bit' or lcharset == 'x-unknown' or lcharset == 'unknown': @@ -139,7 +139,7 @@ class ArchivesParser(object): if not params: # No content-type, so we assume us-ascii return str(b, 'us-ascii', errors='ignore') - for k,v in params: + for k, v in params: if k.lower() == 'charset': charset = v break @@ -157,6 +157,7 @@ class ArchivesParser(object): # Regular expression matching the PostgreSQL custom mail footer that # is appended to all emails. _re_footer = re.compile('(.*)--\s+\nSent via [^\s]+ mailing list \([^\)]+\)\nTo make changes to your subscription:\nhttp://www\.postgresql\.org/mailpref/[^\s]+\s*$', re.DOTALL) + def get_body(self): b = self._get_body() if b: @@ -323,8 +324,8 @@ class ArchivesParser(object): # If it has a name, we consider it an attachments if not container.get_params(): return - for k,v in container.get_params(): - if k=='name' and v != '': + for k, v in container.get_params(): + if k == 'name' and v != '': # Yes, it has a name try: self.attachments.append((self._extract_filename(container), container.get_content_type(), container.get_payload(decode=True))) @@ -374,6 +375,7 @@ class ArchivesParser(object): # No name, and text/plain, so ignore it re_msgid = re.compile('^\s*<(.*)>\s*') + def clean_messageid(self, messageid, ignorebroken=False): m = self.re_msgid.match(messageid) if not m: @@ -381,7 +383,7 @@ class ArchivesParser(object): log.status("Could not parse messageid '%s', ignoring it" % messageid) return None raise IgnorableException("Could not parse message id '%s'" % messageid) - return m.groups(1)[0].replace(' ','') + return m.groups(1)[0].replace(' ', '') # _date_multi_re = re.compile(' \((\w+\s\w+(\s+\w+)*|)\)$') # Now using [^\s] instead of \w, to work with japanese chars @@ -389,6 +391,7 @@ class ArchivesParser(object): _date_multi_re2 = re.compile(' ([\+-]\d{4}) \([^)]+\)$') _date_multiminus_re = re.compile(' -(-\d+)$') _date_offsetnoplus_re = re.compile(' (\d{4})$') + def forgiving_date_decode(self, d): if d.strip() == '': raise IgnorableException("Failed to parse empty date") @@ -416,17 +419,17 @@ class ArchivesParser(object): if d.endswith('+0-900'): d = d.replace('+0-900', '-0900') if d.endswith('Mexico/General'): - d = d.replace('Mexico/General','CDT') + d = d.replace('Mexico/General', 'CDT') if d.endswith('Pacific Daylight Time'): d = d.replace('Pacific Daylight Time', 'PDT') if d.endswith(' ZE2'): - d = d.replace(' ZE2',' +0200') + d = d.replace(' ZE2', ' +0200') if d.find('-Juin-') > 0: - d = d.replace('-Juin-','-Jun-') + d = d.replace('-Juin-', '-Jun-') if d.find('-Juil-') > 0: - d = d.replace('-Juil-','-Jul-') + d = d.replace('-Juil-', '-Jul-') if d.find(' 0 (GMT)'): - d = d.replace(' 0 (GMT)',' +0000') + d = d.replace(' 0 (GMT)', ' +0000') if self._date_multiminus_re.search(d): d = self._date_multiminus_re.sub(' \\1', d) @@ -434,7 +437,6 @@ class ArchivesParser(object): if self._date_offsetnoplus_re.search(d): d = self._date_offsetnoplus_re.sub('+\\1', d) - # We have a number of dates in the format # "<full datespace> +0200 (MET DST)" # or similar. The problem coming from the space within the @@ -455,7 +457,7 @@ class ArchivesParser(object): # Some offsets are >16 hours, which postgresql will not # (for good reasons) accept - if dp.utcoffset() and abs(dp.utcoffset().days * (24*60*60) + dp.utcoffset().seconds) > 60*60*16-1: + if dp.utcoffset() and abs(dp.utcoffset().days * (24 * 60 * 60) + dp.utcoffset().seconds) > 60 * 60 * 16 - 1: # Convert it to a UTC timestamp using Python. It will give # us the right time, but the wrong timezone. Should be # enough... @@ -471,6 +473,7 @@ class ArchivesParser(object): # Workaround for broken quoting in some MUAs (see below) _re_mailworkaround = re.compile('"(=\?[^\?]+\?[QB]\?[^\?]+\?=)"', re.IGNORECASE) + def _decode_mime_header(self, hdr, email_workaround): if hdr == None: return None @@ -480,7 +483,7 @@ class ArchivesParser(object): # do this *before* doing any MIME decoding, we should be safe against # anybody *actually* putting that sequence in the header (since we # won't match the encoded contents) - hdr = hdr.replace("\n\t"," ") + hdr = hdr.replace("\n\t", " ") # In at least some cases, at least gmail (and possibly other MUAs) # incorrectly put double quotes in the name/email field even when @@ -516,7 +519,7 @@ class ArchivesParser(object): def get_mandatory(self, fieldname): try: x = self.msg[fieldname] - if x==None: + if x == None: raise Exception() return x except: diff --git a/loader/lib/storage.py b/loader/lib/storage.py index a358068..cf2c284 100644 --- a/loader/lib/storage.py +++ b/loader/lib/storage.py @@ -4,6 +4,7 @@ from .parser import ArchivesParser from lib.log import log, opstatus + class ArchivesParserStorage(ArchivesParser): def __init__(self): super(ArchivesParserStorage, self).__init__() @@ -82,7 +83,7 @@ class ArchivesParserStorage(ArchivesParser): 'message': pk, }) if len(self.attachments): - curs.executemany("INSERT INTO attachments (message, filename, contenttype, attachment) VALUES (%(message)s, %(filename)s, %(contenttype)s, %(attachment)s)",[ { + curs.executemany("INSERT INTO attachments (message, filename, contenttype, attachment) VALUES (%(message)s, %(filename)s, %(contenttype)s, %(attachment)s)", [{ 'message': pk, 'filename': a[0] or 'unknown_filename', 'contenttype': a[1], @@ -106,11 +107,11 @@ class ArchivesParserStorage(ArchivesParser): all_parents = curs.fetchall() if len(all_parents): # At least one of the parents exist. Now try to figure out which one - best_parent = len(self.parents)+1 + best_parent = len(self.parents) + 1 best_threadid = -1 best_parentid = None - for i in range(0,len(all_parents)): - for j in range(0,len(self.parents)): + for i in range(0, len(all_parents)): + for j in range(0, len(self.parents)): if self.parents[j] == all_parents[i][1]: # This messageid found. Better than the last one? if j < best_parent: @@ -226,7 +227,7 @@ class ArchivesParserStorage(ArchivesParser): self.msgid, id, self.threadid, self.parentid)) if len(self.attachments): # Insert attachments - curs.executemany("INSERT INTO attachments (message, filename, contenttype, attachment) VALUES (%(message)s, %(filename)s, %(contenttype)s, %(attachment)s)",[ { + curs.executemany("INSERT INTO attachments (message, filename, contenttype, attachment) VALUES (%(message)s, %(filename)s, %(contenttype)s, %(attachment)s)", [{ 'message': id, 'filename': a[0] or 'unknown_filename', 'contenttype': a[1], @@ -261,7 +262,6 @@ class ArchivesParserStorage(ArchivesParser): f.write("\n-------------------------------\n\n") return - if (_from.rstrip(), to.rstrip(), cc.rstrip(), subject.rstrip()) != (self._from, self.to, self.cc, self.subject): log.status("Message %s has header changes " % self.msgid) f.write("==== %s ====\n" % self.msgid) @@ -281,22 +281,21 @@ class ArchivesParserStorage(ArchivesParser): tofile='new', n=0, lineterm='')) - if (len(tempdiff)-2) % 3 == 0: + if (len(tempdiff) - 2) % 3 == 0: # 3 rows to a diff, two header rows. # Then verify that each slice of 3 contains one @@ row (header), one -From and one +>From, # which indicates the only change is in the From. ok = True tempdiff = tempdiff[2:] while tempdiff: - a,b,c = (tempdiff.pop(0), tempdiff.pop(0), tempdiff.pop(0)) + a, b, c = (tempdiff.pop(0), tempdiff.pop(0), tempdiff.pop(0)) if not (a.startswith('@@ ') and b.startswith('-From ') and c.startswith('+>From ')): - ok=False + ok = False break if ok: fromonlyf.write("%s\n" % self.msgid) return - # Generate a nicer diff d = list(difflib.unified_diff(bodytxt.splitlines(), self.bodytxt.splitlines(), diff --git a/loader/lib/varnish.py b/loader/lib/varnish.py index 99d2d50..2b2bf89 100644 --- a/loader/lib/varnish.py +++ b/loader/lib/varnish.py @@ -2,6 +2,7 @@ import requests from lib.log import log + class VarnishPurger(object): def __init__(self, cfg): self.cfg = cfg @@ -30,4 +31,3 @@ class VarnishPurger(object): }) if r.status_code != 200: log.error("Failed to send purge request!") - |