summaryrefslogtreecommitdiff
path: root/loader/lib/parser.py
diff options
context:
space:
mode:
authorMagnus Hagander2019-01-03 20:52:58 +0000
committerMagnus Hagander2019-01-04 11:24:06 +0000
commit69af766f8fae8ef489d951c71a37966fc51d736b (patch)
tree1e394a959d7ec9b49797b6cb74ec2eac5dbd47e8 /loader/lib/parser.py
parent1e165224612094e7605d9109e70a3034a0f5a864 (diff)
Whitespace fixes
Diffstat (limited to 'loader/lib/parser.py')
-rw-r--r--loader/lib/parser.py31
1 files changed, 17 insertions, 14 deletions
diff --git a/loader/lib/parser.py b/loader/lib/parser.py
index a727f0e..cef9468 100644
--- a/loader/lib/parser.py
+++ b/loader/lib/parser.py
@@ -13,6 +13,7 @@ import io
from lib.exception import IgnorableException
from lib.log import log
+
class ArchivesParser(object):
def __init__(self):
self.parser = BytesParser(policy=compat32)
@@ -64,7 +65,6 @@ class ArchivesParser(object):
if m and not m in self.parents:
self.parents.append(m)
-
def clean_charset(self, charset):
lcharset = charset.lower()
if lcharset == 'unknown-8bit' or lcharset == 'x-unknown' or lcharset == 'unknown':
@@ -139,7 +139,7 @@ class ArchivesParser(object):
if not params:
# No content-type, so we assume us-ascii
return str(b, 'us-ascii', errors='ignore')
- for k,v in params:
+ for k, v in params:
if k.lower() == 'charset':
charset = v
break
@@ -157,6 +157,7 @@ class ArchivesParser(object):
# Regular expression matching the PostgreSQL custom mail footer that
# is appended to all emails.
_re_footer = re.compile('(.*)--\s+\nSent via [^\s]+ mailing list \([^\)]+\)\nTo make changes to your subscription:\nhttp://www\.postgresql\.org/mailpref/[^\s]+\s*$', re.DOTALL)
+
def get_body(self):
b = self._get_body()
if b:
@@ -323,8 +324,8 @@ class ArchivesParser(object):
# If it has a name, we consider it an attachments
if not container.get_params():
return
- for k,v in container.get_params():
- if k=='name' and v != '':
+ for k, v in container.get_params():
+ if k == 'name' and v != '':
# Yes, it has a name
try:
self.attachments.append((self._extract_filename(container), container.get_content_type(), container.get_payload(decode=True)))
@@ -374,6 +375,7 @@ class ArchivesParser(object):
# No name, and text/plain, so ignore it
re_msgid = re.compile('^\s*<(.*)>\s*')
+
def clean_messageid(self, messageid, ignorebroken=False):
m = self.re_msgid.match(messageid)
if not m:
@@ -381,7 +383,7 @@ class ArchivesParser(object):
log.status("Could not parse messageid '%s', ignoring it" % messageid)
return None
raise IgnorableException("Could not parse message id '%s'" % messageid)
- return m.groups(1)[0].replace(' ','')
+ return m.groups(1)[0].replace(' ', '')
# _date_multi_re = re.compile(' \((\w+\s\w+(\s+\w+)*|)\)$')
# Now using [^\s] instead of \w, to work with japanese chars
@@ -389,6 +391,7 @@ class ArchivesParser(object):
_date_multi_re2 = re.compile(' ([\+-]\d{4}) \([^)]+\)$')
_date_multiminus_re = re.compile(' -(-\d+)$')
_date_offsetnoplus_re = re.compile(' (\d{4})$')
+
def forgiving_date_decode(self, d):
if d.strip() == '':
raise IgnorableException("Failed to parse empty date")
@@ -416,17 +419,17 @@ class ArchivesParser(object):
if d.endswith('+0-900'):
d = d.replace('+0-900', '-0900')
if d.endswith('Mexico/General'):
- d = d.replace('Mexico/General','CDT')
+ d = d.replace('Mexico/General', 'CDT')
if d.endswith('Pacific Daylight Time'):
d = d.replace('Pacific Daylight Time', 'PDT')
if d.endswith(' ZE2'):
- d = d.replace(' ZE2',' +0200')
+ d = d.replace(' ZE2', ' +0200')
if d.find('-Juin-') > 0:
- d = d.replace('-Juin-','-Jun-')
+ d = d.replace('-Juin-', '-Jun-')
if d.find('-Juil-') > 0:
- d = d.replace('-Juil-','-Jul-')
+ d = d.replace('-Juil-', '-Jul-')
if d.find(' 0 (GMT)'):
- d = d.replace(' 0 (GMT)',' +0000')
+ d = d.replace(' 0 (GMT)', ' +0000')
if self._date_multiminus_re.search(d):
d = self._date_multiminus_re.sub(' \\1', d)
@@ -434,7 +437,6 @@ class ArchivesParser(object):
if self._date_offsetnoplus_re.search(d):
d = self._date_offsetnoplus_re.sub('+\\1', d)
-
# We have a number of dates in the format
# "<full datespace> +0200 (MET DST)"
# or similar. The problem coming from the space within the
@@ -455,7 +457,7 @@ class ArchivesParser(object):
# Some offsets are >16 hours, which postgresql will not
# (for good reasons) accept
- if dp.utcoffset() and abs(dp.utcoffset().days * (24*60*60) + dp.utcoffset().seconds) > 60*60*16-1:
+ if dp.utcoffset() and abs(dp.utcoffset().days * (24 * 60 * 60) + dp.utcoffset().seconds) > 60 * 60 * 16 - 1:
# Convert it to a UTC timestamp using Python. It will give
# us the right time, but the wrong timezone. Should be
# enough...
@@ -471,6 +473,7 @@ class ArchivesParser(object):
# Workaround for broken quoting in some MUAs (see below)
_re_mailworkaround = re.compile('"(=\?[^\?]+\?[QB]\?[^\?]+\?=)"', re.IGNORECASE)
+
def _decode_mime_header(self, hdr, email_workaround):
if hdr == None:
return None
@@ -480,7 +483,7 @@ class ArchivesParser(object):
# do this *before* doing any MIME decoding, we should be safe against
# anybody *actually* putting that sequence in the header (since we
# won't match the encoded contents)
- hdr = hdr.replace("\n\t"," ")
+ hdr = hdr.replace("\n\t", " ")
# In at least some cases, at least gmail (and possibly other MUAs)
# incorrectly put double quotes in the name/email field even when
@@ -516,7 +519,7 @@ class ArchivesParser(object):
def get_mandatory(self, fieldname):
try:
x = self.msg[fieldname]
- if x==None:
+ if x == None:
raise Exception()
return x
except: