Tabs to 4 spaces

pep8 standard for indentation
author: Magnus Hagander 2019-01-03 20:15:38 +0000
committer: Magnus Hagander 2019-01-04 11:24:06 +0000
commit: 3fb227230c145c828888aa2e7c5d8b9a8c0760a0 (patch)
tree: 2eeba42da158e6e00c06d537dc6da3a895e39b6c /loader/lib
parent: 1e173c362aa105ab4397fb77f8c693a1e01efa11 (diff)
6 files changed, 910 insertions, 910 deletions
diff --git a/loader/lib/exception.py b/loader/lib/exception.py
index 02172a2..54a9efc 100644
--- a/loader/lib/exception.py
+++ b/loader/lib/exception.py
@@ -1,2 +1,2 @@
 class IgnorableException(Exception):
-	pass
+    pass
diff --git a/loader/lib/log.py b/loader/lib/log.py
index 5b6379a..48722c9 100644
--- a/loader/lib/log.py
+++ b/loader/lib/log.py
@@ -1,33 +1,33 @@
 class Log(object):
-	def __init__(self):
-		self.verbose = False
+    def __init__(self):
+        self.verbose = False
 
-	def set(self, verbose):
-		self.verbose = verbose
+    def set(self, verbose):
+        self.verbose = verbose
 
-	def status(self, msg):
-		if self.verbose:
-			print(msg)
+    def status(self, msg):
+        if self.verbose:
+            print(msg)
 
-	def log(self, msg):
-		print(msg)
+    def log(self, msg):
+        print(msg)
 
-	def error(self, msg):
-		print(msg)
+    def error(self, msg):
+        print(msg)
 
-	def print_status(self):
-		opstatus.print_status()
+    def print_status(self):
+        opstatus.print_status()
 
 class OpStatus(object):
-	def __init__(self):
-		self.stored = 0
-		self.dupes = 0
-		self.tagged = 0
-		self.failed = 0
-		self.overwritten = 0
-
-	def print_status(self):
-		print("%s stored, %s new-list tagged, %s dupes, %s failed, %s overwritten" % (self.stored, self.tagged, self.dupes, self.failed, self.overwritten))
+    def __init__(self):
+        self.stored = 0
+        self.dupes = 0
+        self.tagged = 0
+        self.failed = 0
+        self.overwritten = 0
+
+    def print_status(self):
+        print("%s stored, %s new-list tagged, %s dupes, %s failed, %s overwritten" % (self.stored, self.tagged, self.dupes, self.failed, self.overwritten))
 
 
 log = Log()
diff --git a/loader/lib/mbox.py b/loader/lib/mbox.py
index 77c83b0..c097e72 100644
--- a/loader/lib/mbox.py
+++ b/loader/lib/mbox.py
@@ -11,41 +11,41 @@ SEPARATOR = "ABCARCHBREAK123" * 50
 bSEPARATOR = bytes(SEPARATOR, 'ascii')
 
 class MailboxBreakupParser(object):
-	def __init__(self, fn):
-		self.EOF = False
+    def __init__(self, fn):
+        self.EOF = False
 
-		if fn.endswith(".gz"):
-			cat = "zcat"
-		else:
-			cat = "cat"
-		cmd = "%s %s | formail -s /bin/sh -c 'cat && echo %s'" % (cat, fn, SEPARATOR)
-		self.pipe = Popen(cmd, shell=True, stdout=PIPE, stderr=PIPE)
+        if fn.endswith(".gz"):
+            cat = "zcat"
+        else:
+            cat = "cat"
+        cmd = "%s %s | formail -s /bin/sh -c 'cat && echo %s'" % (cat, fn, SEPARATOR)
+        self.pipe = Popen(cmd, shell=True, stdout=PIPE, stderr=PIPE)
 
-	def returncode(self):
-		self.pipe.wait()
-		return self.pipe.returncode
+    def returncode(self):
+        self.pipe.wait()
+        return self.pipe.returncode
 
-	def stderr_output(self):
-		return self.pipe.stderr.read()
+    def stderr_output(self):
+        return self.pipe.stderr.read()
 
-	def __next__(self):
-		sio = BytesIO()
-		while True:
-			try:
-				l = next(self.pipe.stdout)
-			except StopIteration:
-				# End of file!
-				self.EOF = True
-				if sio.tell() == 0:
-					# Nothing read yet, so return None instead of an empty
-					# bytesio
-					return None
-				sio.seek(0)
-				return sio
-			if l.rstrip() == bSEPARATOR:
-				# Reached a separator. Meaning we're not at end of file,
-				# but we're at end of message.
-				sio.seek(0)
-				return sio
-			# Otherwise, append it to where we are now
-			sio.write(l)
+    def __next__(self):
+        sio = BytesIO()
+        while True:
+            try:
+                l = next(self.pipe.stdout)
+            except StopIteration:
+                # End of file!
+                self.EOF = True
+                if sio.tell() == 0:
+                    # Nothing read yet, so return None instead of an empty
+                    # bytesio
+                    return None
+                sio.seek(0)
+                return sio
+            if l.rstrip() == bSEPARATOR:
+                # Reached a separator. Meaning we're not at end of file,
+                # but we're at end of message.
+                sio.seek(0)
+                return sio
+            # Otherwise, append it to where we are now
+            sio.write(l)
diff --git a/loader/lib/parser.py b/loader/lib/parser.py
index b97c8b3..81192da 100644
--- a/loader/lib/parser.py
+++ b/loader/lib/parser.py
@@ -14,560 +14,560 @@ from lib.exception import IgnorableException
 from lib.log import log
 
 class ArchivesParser(object):
-	def __init__(self):
-		self.parser = BytesParser(policy=compat32)
-
-	def parse(self, stream):
-		self.rawtxt = stream.read()
-		self.msg = self.parser.parse(io.BytesIO(self.rawtxt))
-
-	def is_msgid(self, msgid):
-		# Look for a specific messageid. This means we might parse it twice,
-		# but so be it. Any exception means we know it's not this one...
-		try:
-			if self.clean_messageid(self.decode_mime_header(self.get_mandatory('Message-ID'))) == msgid:
-				return True
-		except Exception as e:
-			return False
-
-	def analyze(self, date_override=None):
-		self.msgid = self.clean_messageid(self.decode_mime_header(self.get_mandatory('Message-ID')))
-		self._from = self.decode_mime_header(self.get_mandatory('From'), True)
-		self.to = self.decode_mime_header(self.get_optional('To'), True)
-		self.cc = self.decode_mime_header(self.get_optional('CC'), True)
-		self.subject = self.decode_mime_header(self.get_optional('Subject'))
-		if date_override:
-			self.date = self.forgiving_date_decode(date_override)
-		else:
-			self.date = self.forgiving_date_decode(self.decode_mime_header(self.get_mandatory('Date')))
-		self.bodytxt = self.get_body()
-		self.attachments = []
-		self.get_attachments()
-		if len(self.attachments) > 0:
-			log.status("Found %s attachments" % len(self.attachments))
-
-		# Build an list of the message id's we are interested in
-		self.parents = []
-		# The first one is in-reply-to, if it exists
-		if self.get_optional('in-reply-to'):
-			m = self.clean_messageid(self.decode_mime_header(self.get_optional('in-reply-to')), True)
-			if m:
-				self.parents.append(m)
-
-		# Then we add all References values, in backwards order
-		if self.get_optional('references'):
-			cleaned_msgids = [self.clean_messageid(x, True) for x in reversed(self.decode_mime_header(self.get_optional('references')).split())]
-			# Can't do this with a simple self.parents.extend() due to broken
-			# mailers that add the same reference more than once. And we can't
-			# use a set() to make it unique, because order is very important
-			for m in cleaned_msgids:
-				if m and not m in self.parents:
-					self.parents.append(m)
-
-
-	def clean_charset(self, charset):
-		lcharset = charset.lower()
-		if lcharset == 'unknown-8bit' or lcharset == 'x-unknown' or lcharset == 'unknown':
-			# Special case where we don't know... We'll assume
-			# us-ascii and use replacements
-			return 'us-ascii'
-		if lcharset == '0' or lcharset == 'x-user-defined' or lcharset == '_autodetect_all' or lcharset == 'default_charset':
-			# Seriously broken charset definitions, map to us-ascii
-			# and throw away the rest with replacements
-			return 'us-ascii'
-		if lcharset == 'x-gbk':
-			# Some MUAs set it to x-gbk, but there is a valid
-			# declaratoin as gbk...
-			return 'gbk'
-		if lcharset == 'iso-8859-8-i':
-			# -I is a special logical version, but should be the
-			# same charset
-			return 'iso-8859-8'
-		if lcharset == 'windows-874':
-			# This is an alias for iso-8859-11
-			return 'iso-8859-11'
-		if lcharset == 'iso-88-59-1' or lcharset == 'iso-8858-1':
-			# Strange way of saying 8859....
-			return 'iso-8859-1'
-		if lcharset == 'iso885915':
-			return 'iso-8859-15'
-		if lcharset == 'iso-latin-2':
-			return 'iso-8859-2'
-		if lcharset == 'iso-850':
-			# Strange spelling of cp850 (windows charset)
-			return 'cp850'
-		if lcharset == 'koi8r':
-			return 'koi8-r'
-		if lcharset == 'cp 1252':
-			return 'cp1252'
-		if lcharset == 'iso-8859-1,iso-8859-2' or lcharset == 'iso-8859-1:utf8:us-ascii':
-			# Why did this show up more than once?!
-			return 'iso-8859-1'
-		if lcharset == 'x-windows-949':
-			return 'ms949'
-		if lcharset == 'pt_pt' or lcharset == 'de_latin' or lcharset == 'de':
-			# This is a locale, and not a charset, but most likely it's this one
-			return 'iso-8859-1'
-		if lcharset == 'iso-8858-15':
-			# How is this a *common* mistake?
-			return 'iso-8859-15'
-		if lcharset == 'macintosh':
-			return 'mac_roman'
-		if lcharset == 'cn-big5':
-			return 'big5'
-		if lcharset == 'x-unicode-2-0-utf-7':
-			return 'utf-7'
-		if lcharset == 'tscii':
-			# No support for this charset :S Map it down to ascii
-			# and throw away all the rest. sucks, but we have to
-			return 'us-ascii'
-		return charset
-
-	def get_payload_as_unicode(self, msg):
-		try:
-			b = msg.get_payload(decode=True)
-		except AssertionError:
-			# Badly encoded data can throw an exception here, where the python
-			# libraries fail to handle it and enters a cannot-happen path.
-			# In which case we just ignore it and hope for a better MIME part later.
-			b = None
-
-		if b:
-			# Find out if there is a charset
-			charset = None
-			params = msg.get_params()
-			if not params:
-				# No content-type, so we assume us-ascii
-				return str(b, 'us-ascii', errors='ignore')
-			for k,v in params:
-				if k.lower() == 'charset':
-					charset = v
-					break
-			if charset:
-				try:
-					return str(b, self.clean_charset(charset), errors='ignore')
-				except LookupError as e:
-					raise IgnorableException("Failed to get unicode payload: %s" % e)
-			else:
-				# XXX: reasonable default?
-				return str(b, errors='ignore')
-		# Return None or empty string, depending on what we got back
-		return b
-
-	# Regular expression matching the PostgreSQL custom mail footer that
-	# is appended to all emails.
-	_re_footer = re.compile('(.*)--\s+\nSent via [^\s]+ mailing list \([^\)]+\)\nTo make changes to your subscription:\nhttp://www\.postgresql\.org/mailpref/[^\s]+\s*$', re.DOTALL)
-	def get_body(self):
-		b = self._get_body()
-		if b:
-			# Python bug 9133, allows unicode surrogate pairs - which PostgreSQL will
-			# later reject..
-			if b.find('\udbff\n\udef8'):
-				b = b.replace('\udbff\n\udef8', '')
-
-		# Remove postgres specific mail footer - if it's there
-		m = self._re_footer.match(b)
-		if m:
-			b = m.group(1)
-
-		# Sometimes we end up with a trailing \0 when decoding long strings, so
-		# replace it if it's there.
-		# In fact, replace it everywhere, since it can also turn up in the middle
-		# of a text when it's a really broken decoding.
-		b = b.replace('\0', '')
-
-		return b
-
-	def _get_body(self):
-		# This is where the magic happens - try to figure out what the body
-		# of this message should render as.
-		hasempty = False
-
-		# First see if this is a single-part message that we can just
-		# decode and go.
-		b = self.get_payload_as_unicode(self.msg)
-		if b: return b
-		if b == '':
-			# We found something, but it was empty. We'll keep looking as
-			# there might be something better available, but make a note
-			# that empty exists.
-			hasempty = True
-
-		# Ok, it's multipart. Find the first part that is text/plain,
-		# and use that one. Do this recursively, since we may have something
-		# like:
-		# multipart/mixed:
-		#   multipart/alternative:
-		#      text/plain
-		#      text/html
-		#   application/octet-stream (attachment)
-		b = self.recursive_first_plaintext(self.msg)
-		if b: return b
-		if b == '':
-			hasempty = True
-
-		# Couldn't find a plaintext. Look for the first HTML in that case.
-		# Fallback, but what can we do at this point...
-		b = self.recursive_first_plaintext(self.msg, True)
-		if b:
-			b = self.html_clean(b)
-			if b: return b
-		if b == '' or b is None:
-			hasempty = True
-
-		if hasempty:
-			log.status('Found empty body in %s' % self.msgid)
-			return ''
-		raise IgnorableException("Don't know how to read the body from %s" % self.msgid)
-
-	def recursive_first_plaintext(self, container, html_instead=False):
-		pl = container.get_payload()
-		if isinstance(pl, str):
-			# This was not a multipart, but it leaked... Give up!
-			return None
-		for p in pl:
-			if p.get_params() == None:
-				# MIME multipart/mixed, but no MIME type on the part
-				log.status("Found multipart/mixed in message '%s', but no MIME type on part. Trying text/plain." % self.msgid)
-				return self.get_payload_as_unicode(p)
-			if p.get_params()[0][0].lower() == 'text/plain':
-				# Don't include it if it looks like an attachment
-				if 'Content-Disposition' in p and p['Content-Disposition'].startswith('attachment'):
-					continue
-				return self.get_payload_as_unicode(p)
-			if html_instead and p.get_params()[0][0].lower() == 'text/html':
-				# Don't include it if it looks like an attachment
-				if 'Content-Disposition' in p and p['Content-Disposition'].startswith('attachment'):
-					continue
-				return self.get_payload_as_unicode(p)
-			if p.is_multipart():
-				b = self.recursive_first_plaintext(p, html_instead)
-				if b or b == '': return b
-
-		# Yikes, nothing here! Hopefully we'll find something when
-		# we continue looping at a higher level.
-		return None
-
-	def get_attachments(self):
-		self.attachments_found_first_plaintext = False
-		self.recursive_get_attachments(self.msg)
-
-	# Clean a filenames encoding and return it as a unicode string
-	def _clean_filename_encoding(self, filename):
-		# If this is a header-encoded filename, start by decoding that
-		if filename.startswith('=?'):
-			decoded, encoding = decode_header(filename)[0]
-			return str(decoded, encoding, errors='ignore')
-
-		# If it's already unicode, just return it
-		if isinstance(filename, str):
-			return filename
-
-		# Anything that's not UTF8, we just get rid of. We can live with
-		# filenames slightly mangled in this case.
-		return str(filename, 'utf-8', errors='ignore')
-
-	def _extract_filename(self, container):
-		# Try to get the filename for an attachment in the container.
-		# If the standard library can figure one out, use that one.
-		f = container.get_filename()
-		if f: return self._clean_filename_encoding(f)
-
-		# Failing that, some mailers set Content-Description to the
-		# filename
-		if 'Content-Description' in container:
-			return self._clean_filename_encoding(container['Content-Description'])
-		return None
-
-	def recursive_get_attachments(self, container):
-		# We start recursion in the "multipart" container if any
-		if container.get_content_type() == 'multipart/mixed' or container.get_content_type() == 'multipart/signed':
-			# Multipart - worth scanning into
-			if not container.is_multipart():
-				# Wow, this is broken. It's multipart/mixed, but doesn't
-				# contain multiple parts.
-				# Since we're just looking for attachments, let's just
-				# ignore it...
-				return
-			for p in container.get_payload():
-				if p.get_params() == None:
-					continue
-				self.recursive_get_attachments(p)
-		elif container.get_content_type() == 'multipart/alternative':
-			# Alternative is not an attachment (we decide)
-			# It's typilcally plantext + html
-			self.attachments_found_first_plaintext = True
-			return
-		elif container.is_multipart():
-			# Other kinds of multipart, such as multipart/signed...
-			return
-		else:
-			# Not a multipart.
-			# Exclude specific contenttypes
-			if container.get_content_type() == 'application/pgp-signature':
-				return
-			if container.get_content_type() in ('application/pkcs7-signature', 'application/x-pkcs7-signature'):
-				return
-			# For now, accept anything not text/plain
-			if container.get_content_type() != 'text/plain':
-				try:
-					self.attachments.append((self._extract_filename(container), container.get_content_type(), container.get_payload(decode=True)))
-				except AssertionError:
-					# Badly encoded data can throw an exception here, where the python
-					# libraries fail to handle it and enters a cannot-happen path.
-					# In which case we just ignore this attachment.
-					return
-				return
-
-			# It's a text/plain, it might be worthwhile.
-			# If it has a name, we consider it an attachments
-			if not container.get_params():
-				return
-			for k,v in container.get_params():
-				if k=='name' and v != '':
-					# Yes, it has a name
-					try:
-						self.attachments.append((self._extract_filename(container), container.get_content_type(), container.get_payload(decode=True)))
-					except AssertionError:
-						# Badly encoded data can throw an exception here, where the python
-						# libraries fail to handle it and enters a cannot-happen path.
-						# In which case we just ignore this attachment.
-						return
-
-					return
-
-			# If it's content-disposition=attachment, we also want to save it
-			if 'Content-Disposition' in container and container['Content-Disposition'].startswith('attachment'):
-				try:
-					self.attachments.append((self._extract_filename(container), container.get_content_type(), container.get_payload(decode=True)))
-				except AssertionError:
-					# Badly encoded data can throw an exception here, where the python
-					# libraries fail to handle it and enters a cannot-happen path.
-					# In which case we just ignore this attachment.
-					return
-
-				return
-
-			# If we have already found one text/plain part, make all
-			# further text/plain parts attachments
-			if self.attachments_found_first_plaintext:
-				# However, this will also *always* catch the MIME part added
-				# by majordomo with the footer. So if that one is present,
-				# we need to explicitly exclude it again.
-				try:
-					b = container.get_payload(decode=True)
-				except AssertionError:
-					# Badly encoded data can throw an exception here, where the python
-					# libraries fail to handle it and enters a cannot-happen path.
-					# In which case we just ignore this attachment.
-					return
-
-				if isinstance(b, str) and not self._re_footer.match(b):
-					# We know there is no name for this one
-					self.attachments.append((None, container.get_content_type(), b))
-				return
-
-			# Ok, so this was a plaintext that we ignored. Set the flag
-			# that we have now ignored one, so we'll make the next one
-			# an attachment.
-			self.attachments_found_first_plaintext = True
-			# No name, and text/plain, so ignore it
-
-	re_msgid = re.compile('^\s*<(.*)>\s*')
-	def clean_messageid(self, messageid, ignorebroken=False):
-		m = self.re_msgid.match(messageid)
-		if not m:
-			if ignorebroken:
-				log.status("Could not parse messageid '%s', ignoring it" % messageid)
-				return None
-			raise IgnorableException("Could not parse message id '%s'" % messageid)
-		return m.groups(1)[0].replace(' ','')
-
-#	_date_multi_re = re.compile(' \((\w+\s\w+(\s+\w+)*|)\)$')
-	# Now using [^\s] instead of \w, to work with japanese chars
-	_date_multi_re = re.compile(' \(([^\s]+\s[^\s]+(\s+[^\s]+)*|)\)$')
-	_date_multi_re2 = re.compile(' ([\+-]\d{4}) \([^)]+\)$')
-	_date_multiminus_re = re.compile(' -(-\d+)$')
-	_date_offsetnoplus_re = re.compile(' (\d{4})$')
-	def forgiving_date_decode(self, d):
-		if d.strip() == '':
-			raise IgnorableException("Failed to parse empty date")
-		# Strange timezones requiring manual adjustments
-		if d.endswith('-7700 (EST)'):
-			d = d.replace('-7700 (EST)', 'EST')
-		if d.endswith('+6700 (EST)'):
-			d = d.replace('+6700 (EST)', 'EST')
-		if d.endswith('+-4-30'):
-			d = d.replace('+-4-30', '+0430')
-		if d.endswith('+1.00'):
-			d = d.replace('+1.00', '+0100')
-		if d.endswith('+-100'):
-			d = d.replace('+-100', '+0100')
-		if d.endswith('+500'):
-			d = d.replace('+500', '+0500')
-		if d.endswith('-500'):
-			d = d.replace('-500', '-0500')
-		if d.endswith('-700'):
-			d = d.replace('-700', '-0700')
-		if d.endswith('-800'):
-			d = d.replace('-800', '-0800')
-		if d.endswith('+05-30'):
-			d = d.replace('+05-30', '+0530')
-		if d.endswith('+0-900'):
-			d = d.replace('+0-900', '-0900')
-		if d.endswith('Mexico/General'):
-			d = d.replace('Mexico/General','CDT')
-		if d.endswith('Pacific Daylight Time'):
-			d = d.replace('Pacific Daylight Time', 'PDT')
-		if d.endswith(' ZE2'):
-			d = d.replace(' ZE2',' +0200')
-		if d.find('-Juin-') > 0:
-			d = d.replace('-Juin-','-Jun-')
-		if d.find('-Juil-') > 0:
-			d = d.replace('-Juil-','-Jul-')
-		if d.find(' 0 (GMT)'):
-			d = d.replace(' 0 (GMT)',' +0000')
-
-		if self._date_multiminus_re.search(d):
-			d = self._date_multiminus_re.sub(' \\1', d)
-
-		if self._date_offsetnoplus_re.search(d):
-			d = self._date_offsetnoplus_re.sub('+\\1', d)
-
-
-		# We have a number of dates in the format
-		# "<full datespace> +0200 (MET DST)"
-		# or similar. The problem coming from the space within the
-		# parenthesis, or if the contents of the parenthesis is
-		# completely empty
-		if self._date_multi_re.search(d):
-			d = self._date_multi_re.sub('', d)
-
-		# If the spec is instead
-		# "<full datespace> +0200 (...)"
-		# of any kind, we can just remove what's in the (), because the
-		# parser is just going to rely on the fixed offset anyway.
-		if self._date_multi_re2.search(d):
-			d = self._date_multi_re2.sub(' \\1', d)
-
-		try:
-			dp = dateutil.parser.parse(d, fuzzy=True)
-
-			# Some offsets are >16 hours, which postgresql will not
-			# (for good reasons) accept
-			if dp.utcoffset() and abs(dp.utcoffset().days * (24*60*60) + dp.utcoffset().seconds) > 60*60*16-1:
-				# Convert it to a UTC timestamp using Python. It will give
-				# us the right time, but the wrong timezone. Should be
-				# enough...
-				dp = datetime.datetime(*dp.utctimetuple()[:6])
-			return dp
-		except Exception as e:
-			raise IgnorableException("Failed to parse date '%s': %s" % (d, e))
-
-	def _maybe_decode(self, s, charset):
-		if isinstance(s, str):
-			return s.strip(' ')
-		return str(s, charset and self.clean_charset(charset) or 'us-ascii', errors='ignore').strip(' ')
-
-	# Workaround for broken quoting in some MUAs (see below)
-	_re_mailworkaround = re.compile('"(=\?[^\?]+\?[QB]\?[^\?]+\?=)"', re.IGNORECASE)
-	def _decode_mime_header(self, hdr, email_workaround):
-		if hdr == None:
-			return None
-
-		# Per http://bugs.python.org/issue504152 (and lots of testing), it seems
-		# we must get rid of the sequence \n\t at least in the header. If we
-		# do this *before* doing any MIME decoding, we should be safe against
-		# anybody *actually* putting that sequence in the header (since we
-		# won't match the encoded contents)
-		hdr = hdr.replace("\n\t"," ")
-
-		# In at least some cases, at least gmail (and possibly other MUAs)
-		# incorrectly put double quotes in the name/email field even when
-		# it's encoded. That's not allowed - they have to be escaped - but
-		# since there's a fair amount of those, we apply a regex to get
-		# rid of them.
-		m = self._re_mailworkaround.search(hdr)
-		if m:
-			hdr = self._re_mailworkaround.sub(r'\1', hdr)
-
-		try:
-			return " ".join([self._maybe_decode(s, charset) for s, charset in decode_header(hdr)])
-		except HeaderParseError as e:
-			# Parser error is typically someone specifying an encoding,
-			# but then not actually using that encoding. We'll do the best
-			# we can, which is cut it down to ascii and ignore errors
-			return str(hdr, 'us-ascii', errors='ignore').strip(' ')
-
-	def decode_mime_header(self, hdr, email_workaround=False):
-		try:
-			if isinstance(hdr, Header):
-				hdr = hdr.encode()
-
-			h = self._decode_mime_header(hdr, email_workaround)
-			if h:
-				return h.replace("\0", "")
-			return ''
-		except LookupError as e:
-			raise IgnorableException("Failed to decode header value '%s': %s" % (hdr, e))
-		except ValueError as ve:
-			raise IgnorableException("Failed to decode header value '%s': %s" % (hdr, ve))
-
-	def get_mandatory(self, fieldname):
-		try:
-			x = self.msg[fieldname]
-			if x==None:
-				raise Exception()
-			return x
-		except:
-			raise IgnorableException("Mandatory field '%s' is missing" % fieldname)
-
-	def get_optional(self, fieldname):
-		try:
-			return self.msg[fieldname]
-		except:
-			return ''
-
-	def html_clean(self, html):
-		# First we pass it through tidy
-		(html, errors) = tidylib.tidy_document(html,
-											   options={
-												   'drop-proprietary-attributes': 1,
-												   'alt-text': '',
-												   'hide-comments': 1,
-												   'output-xhtml': 1,
-												   'show-body-only': 1,
-												   'clean': 1,
-												   'char-encoding': 'utf8',
-												   'show-warnings': 0,
-												   'show-info': 0,
-												   })
-		if errors:
-			print(("HTML tidy failed for %s!" % self.msgid))
-			print(errors)
-			return None
-
-		try:
-			cleaner = HTMLCleaner()
-			cleaner.feed(html)
-			return cleaner.get_text()
-		except Exception as e:
-			# Failed to parse the html, thus failed to clean it. so we must
-			# give up...
-			return None
+    def __init__(self):
+        self.parser = BytesParser(policy=compat32)
+
+    def parse(self, stream):
+        self.rawtxt = stream.read()
+        self.msg = self.parser.parse(io.BytesIO(self.rawtxt))
+
+    def is_msgid(self, msgid):
+        # Look for a specific messageid. This means we might parse it twice,
+        # but so be it. Any exception means we know it's not this one...
+        try:
+            if self.clean_messageid(self.decode_mime_header(self.get_mandatory('Message-ID'))) == msgid:
+                return True
+        except Exception as e:
+            return False
+
+    def analyze(self, date_override=None):
+        self.msgid = self.clean_messageid(self.decode_mime_header(self.get_mandatory('Message-ID')))
+        self._from = self.decode_mime_header(self.get_mandatory('From'), True)
+        self.to = self.decode_mime_header(self.get_optional('To'), True)
+        self.cc = self.decode_mime_header(self.get_optional('CC'), True)
+        self.subject = self.decode_mime_header(self.get_optional('Subject'))
+        if date_override:
+            self.date = self.forgiving_date_decode(date_override)
+        else:
+            self.date = self.forgiving_date_decode(self.decode_mime_header(self.get_mandatory('Date')))
+        self.bodytxt = self.get_body()
+        self.attachments = []
+        self.get_attachments()
+        if len(self.attachments) > 0:
+            log.status("Found %s attachments" % len(self.attachments))
+
+        # Build an list of the message id's we are interested in
+        self.parents = []
+        # The first one is in-reply-to, if it exists
+        if self.get_optional('in-reply-to'):
+            m = self.clean_messageid(self.decode_mime_header(self.get_optional('in-reply-to')), True)
+            if m:
+                self.parents.append(m)
+
+        # Then we add all References values, in backwards order
+        if self.get_optional('references'):
+            cleaned_msgids = [self.clean_messageid(x, True) for x in reversed(self.decode_mime_header(self.get_optional('references')).split())]
+            # Can't do this with a simple self.parents.extend() due to broken
+            # mailers that add the same reference more than once. And we can't
+            # use a set() to make it unique, because order is very important
+            for m in cleaned_msgids:
+                if m and not m in self.parents:
+                    self.parents.append(m)
+
+
+    def clean_charset(self, charset):
+        lcharset = charset.lower()
+        if lcharset == 'unknown-8bit' or lcharset == 'x-unknown' or lcharset == 'unknown':
+            # Special case where we don't know... We'll assume
+            # us-ascii and use replacements
+            return 'us-ascii'
+        if lcharset == '0' or lcharset == 'x-user-defined' or lcharset == '_autodetect_all' or lcharset == 'default_charset':
+            # Seriously broken charset definitions, map to us-ascii
+            # and throw away the rest with replacements
+            return 'us-ascii'
+        if lcharset == 'x-gbk':
+            # Some MUAs set it to x-gbk, but there is a valid
+            # declaratoin as gbk...
+            return 'gbk'
+        if lcharset == 'iso-8859-8-i':
+            # -I is a special logical version, but should be the
+            # same charset
+            return 'iso-8859-8'
+        if lcharset == 'windows-874':
+            # This is an alias for iso-8859-11
+            return 'iso-8859-11'
+        if lcharset == 'iso-88-59-1' or lcharset == 'iso-8858-1':
+            # Strange way of saying 8859....
+            return 'iso-8859-1'
+        if lcharset == 'iso885915':
+            return 'iso-8859-15'
+        if lcharset == 'iso-latin-2':
+            return 'iso-8859-2'
+        if lcharset == 'iso-850':
+            # Strange spelling of cp850 (windows charset)
+            return 'cp850'
+        if lcharset == 'koi8r':
+            return 'koi8-r'
+        if lcharset == 'cp 1252':
+            return 'cp1252'
+        if lcharset == 'iso-8859-1,iso-8859-2' or lcharset == 'iso-8859-1:utf8:us-ascii':
+            # Why did this show up more than once?!
+            return 'iso-8859-1'
+        if lcharset == 'x-windows-949':
+            return 'ms949'
+        if lcharset == 'pt_pt' or lcharset == 'de_latin' or lcharset == 'de':
+            # This is a locale, and not a charset, but most likely it's this one
+            return 'iso-8859-1'
+        if lcharset == 'iso-8858-15':
+            # How is this a *common* mistake?
+            return 'iso-8859-15'
+        if lcharset == 'macintosh':
+            return 'mac_roman'
+        if lcharset == 'cn-big5':
+            return 'big5'
+        if lcharset == 'x-unicode-2-0-utf-7':
+            return 'utf-7'
+        if lcharset == 'tscii':
+            # No support for this charset :S Map it down to ascii
+            # and throw away all the rest. sucks, but we have to
+            return 'us-ascii'
+        return charset
+
+    def get_payload_as_unicode(self, msg):
+        try:
+            b = msg.get_payload(decode=True)
+        except AssertionError:
+            # Badly encoded data can throw an exception here, where the python
+            # libraries fail to handle it and enters a cannot-happen path.
+            # In which case we just ignore it and hope for a better MIME part later.
+            b = None
+
+        if b:
+            # Find out if there is a charset
+            charset = None
+            params = msg.get_params()
+            if not params:
+                # No content-type, so we assume us-ascii
+                return str(b, 'us-ascii', errors='ignore')
+            for k,v in params:
+                if k.lower() == 'charset':
+                    charset = v
+                    break
+            if charset:
+                try:
+                    return str(b, self.clean_charset(charset), errors='ignore')
+                except LookupError as e:
+                    raise IgnorableException("Failed to get unicode payload: %s" % e)
+            else:
+                # XXX: reasonable default?
+                return str(b, errors='ignore')
+        # Return None or empty string, depending on what we got back
+        return b
+
+    # Regular expression matching the PostgreSQL custom mail footer that
+    # is appended to all emails.
+    _re_footer = re.compile('(.*)--\s+\nSent via [^\s]+ mailing list \([^\)]+\)\nTo make changes to your subscription:\nhttp://www\.postgresql\.org/mailpref/[^\s]+\s*$', re.DOTALL)
+    def get_body(self):
+        b = self._get_body()
+        if b:
+            # Python bug 9133, allows unicode surrogate pairs - which PostgreSQL will
+            # later reject..
+            if b.find('\udbff\n\udef8'):
+                b = b.replace('\udbff\n\udef8', '')
+
+        # Remove postgres specific mail footer - if it's there
+        m = self._re_footer.match(b)
+        if m:
+            b = m.group(1)
+
+        # Sometimes we end up with a trailing \0 when decoding long strings, so
+        # replace it if it's there.
+        # In fact, replace it everywhere, since it can also turn up in the middle
+        # of a text when it's a really broken decoding.
+        b = b.replace('\0', '')
+
+        return b
+
+    def _get_body(self):
+        # This is where the magic happens - try to figure out what the body
+        # of this message should render as.
+        hasempty = False
+
+        # First see if this is a single-part message that we can just
+        # decode and go.
+        b = self.get_payload_as_unicode(self.msg)
+        if b: return b
+        if b == '':
+            # We found something, but it was empty. We'll keep looking as
+            # there might be something better available, but make a note
+            # that empty exists.
+            hasempty = True
+
+        # Ok, it's multipart. Find the first part that is text/plain,
+        # and use that one. Do this recursively, since we may have something
+        # like:
+        # multipart/mixed:
+        #   multipart/alternative:
+        #      text/plain
+        #      text/html
+        #   application/octet-stream (attachment)
+        b = self.recursive_first_plaintext(self.msg)
+        if b: return b
+        if b == '':
+            hasempty = True
+
+        # Couldn't find a plaintext. Look for the first HTML in that case.
+        # Fallback, but what can we do at this point...
+        b = self.recursive_first_plaintext(self.msg, True)
+        if b:
+            b = self.html_clean(b)
+            if b: return b
+        if b == '' or b is None:
+            hasempty = True
+
+        if hasempty:
+            log.status('Found empty body in %s' % self.msgid)
+            return ''
+        raise IgnorableException("Don't know how to read the body from %s" % self.msgid)
+
+    def recursive_first_plaintext(self, container, html_instead=False):
+        pl = container.get_payload()
+        if isinstance(pl, str):
+            # This was not a multipart, but it leaked... Give up!
+            return None
+        for p in pl:
+            if p.get_params() == None:
+                # MIME multipart/mixed, but no MIME type on the part
+                log.status("Found multipart/mixed in message '%s', but no MIME type on part. Trying text/plain." % self.msgid)
+                return self.get_payload_as_unicode(p)
+            if p.get_params()[0][0].lower() == 'text/plain':
+                # Don't include it if it looks like an attachment
+                if 'Content-Disposition' in p and p['Content-Disposition'].startswith('attachment'):
+                    continue
+                return self.get_payload_as_unicode(p)
+            if html_instead and p.get_params()[0][0].lower() == 'text/html':
+                # Don't include it if it looks like an attachment
+                if 'Content-Disposition' in p and p['Content-Disposition'].startswith('attachment'):
+                    continue
+                return self.get_payload_as_unicode(p)
+            if p.is_multipart():
+                b = self.recursive_first_plaintext(p, html_instead)
+                if b or b == '': return b
+
+        # Yikes, nothing here! Hopefully we'll find something when
+        # we continue looping at a higher level.
+        return None
+
+    def get_attachments(self):
+        self.attachments_found_first_plaintext = False
+        self.recursive_get_attachments(self.msg)
+
+    # Clean a filenames encoding and return it as a unicode string
+    def _clean_filename_encoding(self, filename):
+        # If this is a header-encoded filename, start by decoding that
+        if filename.startswith('=?'):
+            decoded, encoding = decode_header(filename)[0]
+            return str(decoded, encoding, errors='ignore')
+
+        # If it's already unicode, just return it
+        if isinstance(filename, str):
+            return filename
+
+        # Anything that's not UTF8, we just get rid of. We can live with
+        # filenames slightly mangled in this case.
+        return str(filename, 'utf-8', errors='ignore')
+
+    def _extract_filename(self, container):
+        # Try to get the filename for an attachment in the container.
+        # If the standard library can figure one out, use that one.
+        f = container.get_filename()
+        if f: return self._clean_filename_encoding(f)
+
+        # Failing that, some mailers set Content-Description to the
+        # filename
+        if 'Content-Description' in container:
+            return self._clean_filename_encoding(container['Content-Description'])
+        return None
+
+    def recursive_get_attachments(self, container):
+        # We start recursion in the "multipart" container if any
+        if container.get_content_type() == 'multipart/mixed' or container.get_content_type() == 'multipart/signed':
+            # Multipart - worth scanning into
+            if not container.is_multipart():
+                # Wow, this is broken. It's multipart/mixed, but doesn't
+                # contain multiple parts.
+                # Since we're just looking for attachments, let's just
+                # ignore it...
+                return
+            for p in container.get_payload():
+                if p.get_params() == None:
+                    continue
+                self.recursive_get_attachments(p)
+        elif container.get_content_type() == 'multipart/alternative':
+            # Alternative is not an attachment (we decide)
+            # It's typilcally plantext + html
+            self.attachments_found_first_plaintext = True
+            return
+        elif container.is_multipart():
+            # Other kinds of multipart, such as multipart/signed...
+            return
+        else:
+            # Not a multipart.
+            # Exclude specific contenttypes
+            if container.get_content_type() == 'application/pgp-signature':
+                return
+            if container.get_content_type() in ('application/pkcs7-signature', 'application/x-pkcs7-signature'):
+                return
+            # For now, accept anything not text/plain
+            if container.get_content_type() != 'text/plain':
+                try:
+                    self.attachments.append((self._extract_filename(container), container.get_content_type(), container.get_payload(decode=True)))
+                except AssertionError:
+                    # Badly encoded data can throw an exception here, where the python
+                    # libraries fail to handle it and enters a cannot-happen path.
+                    # In which case we just ignore this attachment.
+                    return
+                return
+
+            # It's a text/plain, it might be worthwhile.
+            # If it has a name, we consider it an attachments
+            if not container.get_params():
+                return
+            for k,v in container.get_params():
+                if k=='name' and v != '':
+                    # Yes, it has a name
+                    try:
+                        self.attachments.append((self._extract_filename(container), container.get_content_type(), container.get_payload(decode=True)))
+                    except AssertionError:
+                        # Badly encoded data can throw an exception here, where the python
+                        # libraries fail to handle it and enters a cannot-happen path.
+                        # In which case we just ignore this attachment.
+                        return
+
+                    return
+
+            # If it's content-disposition=attachment, we also want to save it
+            if 'Content-Disposition' in container and container['Content-Disposition'].startswith('attachment'):
+                try:
+                    self.attachments.append((self._extract_filename(container), container.get_content_type(), container.get_payload(decode=True)))
+                except AssertionError:
+                    # Badly encoded data can throw an exception here, where the python
+                    # libraries fail to handle it and enters a cannot-happen path.
+                    # In which case we just ignore this attachment.
+                    return
+
+                return
+
+            # If we have already found one text/plain part, make all
+            # further text/plain parts attachments
+            if self.attachments_found_first_plaintext:
+                # However, this will also *always* catch the MIME part added
+                # by majordomo with the footer. So if that one is present,
+                # we need to explicitly exclude it again.
+                try:
+                    b = container.get_payload(decode=True)
+                except AssertionError:
+                    # Badly encoded data can throw an exception here, where the python
+                    # libraries fail to handle it and enters a cannot-happen path.
+                    # In which case we just ignore this attachment.
+                    return
+
+                if isinstance(b, str) and not self._re_footer.match(b):
+                    # We know there is no name for this one
+                    self.attachments.append((None, container.get_content_type(), b))
+                return
+
+            # Ok, so this was a plaintext that we ignored. Set the flag
+            # that we have now ignored one, so we'll make the next one
+            # an attachment.
+            self.attachments_found_first_plaintext = True
+            # No name, and text/plain, so ignore it
+
+    re_msgid = re.compile('^\s*<(.*)>\s*')
+    def clean_messageid(self, messageid, ignorebroken=False):
+        m = self.re_msgid.match(messageid)
+        if not m:
+            if ignorebroken:
+                log.status("Could not parse messageid '%s', ignoring it" % messageid)
+                return None
+            raise IgnorableException("Could not parse message id '%s'" % messageid)
+        return m.groups(1)[0].replace(' ','')
+
+#    _date_multi_re = re.compile(' \((\w+\s\w+(\s+\w+)*|)\)$')
+    # Now using [^\s] instead of \w, to work with japanese chars
+    _date_multi_re = re.compile(' \(([^\s]+\s[^\s]+(\s+[^\s]+)*|)\)$')
+    _date_multi_re2 = re.compile(' ([\+-]\d{4}) \([^)]+\)$')
+    _date_multiminus_re = re.compile(' -(-\d+)$')
+    _date_offsetnoplus_re = re.compile(' (\d{4})$')
+    def forgiving_date_decode(self, d):
+        if d.strip() == '':
+            raise IgnorableException("Failed to parse empty date")
+        # Strange timezones requiring manual adjustments
+        if d.endswith('-7700 (EST)'):
+            d = d.replace('-7700 (EST)', 'EST')
+        if d.endswith('+6700 (EST)'):
+            d = d.replace('+6700 (EST)', 'EST')
+        if d.endswith('+-4-30'):
+            d = d.replace('+-4-30', '+0430')
+        if d.endswith('+1.00'):
+            d = d.replace('+1.00', '+0100')
+        if d.endswith('+-100'):
+            d = d.replace('+-100', '+0100')
+        if d.endswith('+500'):
+            d = d.replace('+500', '+0500')
+        if d.endswith('-500'):
+            d = d.replace('-500', '-0500')
+        if d.endswith('-700'):
+            d = d.replace('-700', '-0700')
+        if d.endswith('-800'):
+            d = d.replace('-800', '-0800')
+        if d.endswith('+05-30'):
+            d = d.replace('+05-30', '+0530')
+        if d.endswith('+0-900'):
+            d = d.replace('+0-900', '-0900')
+        if d.endswith('Mexico/General'):
+            d = d.replace('Mexico/General','CDT')
+        if d.endswith('Pacific Daylight Time'):
+            d = d.replace('Pacific Daylight Time', 'PDT')
+        if d.endswith(' ZE2'):
+            d = d.replace(' ZE2',' +0200')
+        if d.find('-Juin-') > 0:
+            d = d.replace('-Juin-','-Jun-')
+        if d.find('-Juil-') > 0:
+            d = d.replace('-Juil-','-Jul-')
+        if d.find(' 0 (GMT)'):
+            d = d.replace(' 0 (GMT)',' +0000')
+
+        if self._date_multiminus_re.search(d):
+            d = self._date_multiminus_re.sub(' \\1', d)
+
+        if self._date_offsetnoplus_re.search(d):
+            d = self._date_offsetnoplus_re.sub('+\\1', d)
+
+
+        # We have a number of dates in the format
+        # "<full datespace> +0200 (MET DST)"
+        # or similar. The problem coming from the space within the
+        # parenthesis, or if the contents of the parenthesis is
+        # completely empty
+        if self._date_multi_re.search(d):
+            d = self._date_multi_re.sub('', d)
+
+        # If the spec is instead
+        # "<full datespace> +0200 (...)"
+        # of any kind, we can just remove what's in the (), because the
+        # parser is just going to rely on the fixed offset anyway.
+        if self._date_multi_re2.search(d):
+            d = self._date_multi_re2.sub(' \\1', d)
+
+        try:
+            dp = dateutil.parser.parse(d, fuzzy=True)
+
+            # Some offsets are >16 hours, which postgresql will not
+            # (for good reasons) accept
+            if dp.utcoffset() and abs(dp.utcoffset().days * (24*60*60) + dp.utcoffset().seconds) > 60*60*16-1:
+                # Convert it to a UTC timestamp using Python. It will give
+                # us the right time, but the wrong timezone. Should be
+                # enough...
+                dp = datetime.datetime(*dp.utctimetuple()[:6])
+            return dp
+        except Exception as e:
+            raise IgnorableException("Failed to parse date '%s': %s" % (d, e))
+
+    def _maybe_decode(self, s, charset):
+        if isinstance(s, str):
+            return s.strip(' ')
+        return str(s, charset and self.clean_charset(charset) or 'us-ascii', errors='ignore').strip(' ')
+
+    # Workaround for broken quoting in some MUAs (see below)
+    _re_mailworkaround = re.compile('"(=\?[^\?]+\?[QB]\?[^\?]+\?=)"', re.IGNORECASE)
+    def _decode_mime_header(self, hdr, email_workaround):
+        if hdr == None:
+            return None
+
+        # Per http://bugs.python.org/issue504152 (and lots of testing), it seems
+        # we must get rid of the sequence \n\t at least in the header. If we
+        # do this *before* doing any MIME decoding, we should be safe against
+        # anybody *actually* putting that sequence in the header (since we
+        # won't match the encoded contents)
+        hdr = hdr.replace("\n\t"," ")
+
+        # In at least some cases, at least gmail (and possibly other MUAs)
+        # incorrectly put double quotes in the name/email field even when
+        # it's encoded. That's not allowed - they have to be escaped - but
+        # since there's a fair amount of those, we apply a regex to get
+        # rid of them.
+        m = self._re_mailworkaround.search(hdr)
+        if m:
+            hdr = self._re_mailworkaround.sub(r'\1', hdr)
+
+        try:
+            return " ".join([self._maybe_decode(s, charset) for s, charset in decode_header(hdr)])
+        except HeaderParseError as e:
+            # Parser error is typically someone specifying an encoding,
+            # but then not actually using that encoding. We'll do the best
+            # we can, which is cut it down to ascii and ignore errors
+            return str(hdr, 'us-ascii', errors='ignore').strip(' ')
+
+    def decode_mime_header(self, hdr, email_workaround=False):
+        try:
+            if isinstance(hdr, Header):
+                hdr = hdr.encode()
+
+            h = self._decode_mime_header(hdr, email_workaround)
+            if h:
+                return h.replace("\0", "")
+            return ''
+        except LookupError as e:
+            raise IgnorableException("Failed to decode header value '%s': %s" % (hdr, e))
+        except ValueError as ve:
+            raise IgnorableException("Failed to decode header value '%s': %s" % (hdr, ve))
+
+    def get_mandatory(self, fieldname):
+        try:
+            x = self.msg[fieldname]
+            if x==None:
+                raise Exception()
+            return x
+        except:
+            raise IgnorableException("Mandatory field '%s' is missing" % fieldname)
+
+    def get_optional(self, fieldname):
+        try:
+            return self.msg[fieldname]
+        except:
+            return ''
+
+    def html_clean(self, html):
+        # First we pass it through tidy
+        (html, errors) = tidylib.tidy_document(html,
+                                               options={
+                                                   'drop-proprietary-attributes': 1,
+                                                   'alt-text': '',
+                                                   'hide-comments': 1,
+                                                   'output-xhtml': 1,
+                                                   'show-body-only': 1,
+                                                   'clean': 1,
+                                                   'char-encoding': 'utf8',
+                                                   'show-warnings': 0,
+                                                   'show-info': 0,
+                                                   })
+        if errors:
+            print(("HTML tidy failed for %s!" % self.msgid))
+            print(errors)
+            return None
+
+        try:
+            cleaner = HTMLCleaner()
+            cleaner.feed(html)
+            return cleaner.get_text()
+        except Exception as e:
+            # Failed to parse the html, thus failed to clean it. so we must
+            # give up...
+            return None
 
 
 class HTMLCleaner(HTMLParser):
-	def __init__(self):
-		HTMLParser.__init__(self)
-		self.io = io.StringIO()
+    def __init__(self):
+        HTMLParser.__init__(self)
+        self.io = io.StringIO()
 
-	def get_text(self):
-		return self.io.getvalue()
+    def get_text(self):
+        return self.io.getvalue()
 
-	def handle_data(self, data):
-		self.io.write(data)
+    def handle_data(self, data):
+        self.io.write(data)
 
-	def handle_starttag(self, tag, attrs):
-		if tag == "p" or tag == "br":
-			self.io.write("\n")
+    def handle_starttag(self, tag, attrs):
+        if tag == "p" or tag == "br":
+            self.io.write("\n")
diff --git a/loader/lib/storage.py b/loader/lib/storage.py
index 8962b87..2303fee 100644
--- a/loader/lib/storage.py
+++ b/loader/lib/storage.py
@@ -5,307 +5,307 @@ from .parser import ArchivesParser
 from lib.log import log, opstatus
 
 class ArchivesParserStorage(ArchivesParser):
-	def __init__(self):
-		super(ArchivesParserStorage, self).__init__()
-		self.purges = set()
+    def __init__(self):
+        super(ArchivesParserStorage, self).__init__()
+        self.purges = set()
 
-	def purge_list(self, listid, year, month):
-		self.purges.add((int(listid), int(year), int(month)))
+    def purge_list(self, listid, year, month):
+        self.purges.add((int(listid), int(year), int(month)))
 
-	def purge_thread(self, threadid):
-		self.purges.add(int(threadid))
+    def purge_thread(self, threadid):
+        self.purges.add(int(threadid))
 
-	def store(self, conn, listid, overwrite=False):
-		curs = conn.cursor()
+    def store(self, conn, listid, overwrite=False):
+        curs = conn.cursor()
 
-		# Potentially add the information that there exists a mail for
-		# this month. We do that this early since we're always going to
-		# make the check anyway, and this keeps the code in one place..
-		if not overwrite:
-			curs.execute("INSERT INTO list_months (listid, year, month) SELECT %(listid)s, %(year)s, %(month)s WHERE NOT EXISTS (SELECT listid FROM list_months WHERE listid=%(listid)s AND year=%(year)s AND month=%(month)s)", {
-					'listid': listid,
-					'year': self.date.year,
-					'month': self.date.month,
-					})
+        # Potentially add the information that there exists a mail for
+        # this month. We do that this early since we're always going to
+        # make the check anyway, and this keeps the code in one place..
+        if not overwrite:
+            curs.execute("INSERT INTO list_months (listid, year, month) SELECT %(listid)s, %(year)s, %(month)s WHERE NOT EXISTS (SELECT listid FROM list_months WHERE listid=%(listid)s AND year=%(year)s AND month=%(month)s)", {
+                    'listid': listid,
+                    'year': self.date.year,
+                    'month': self.date.month,
+                    })
 
-		curs.execute("SELECT threadid, EXISTS(SELECT threadid FROM list_threads lt WHERE lt.listid=%(listid)s AND lt.threadid=m.threadid), id FROM messages m WHERE m.messageid=%(messageid)s", {
-				'messageid': self.msgid,
-				'listid': listid,
-				})
-		r = curs.fetchall()
-		if len(r) > 0:
-			# Has to be 1 row, since we have a unique index on id
-			if not r[0][1] and not overwrite:
-				log.status("Tagging message %s with list %s" % (self.msgid, listid))
-				curs.execute("INSERT INTO list_threads (threadid, listid) VALUES (%(threadid)s, %(listid)s)", {
-						'threadid': r[0][0],
-						'listid': listid,
-						})
-				opstatus.tagged += 1
-				self.purge_list(listid, self.date.year, self.date.month)
-				self.purge_thread(r[0][0])
-			else:
-				opstatus.dupes += 1
+        curs.execute("SELECT threadid, EXISTS(SELECT threadid FROM list_threads lt WHERE lt.listid=%(listid)s AND lt.threadid=m.threadid), id FROM messages m WHERE m.messageid=%(messageid)s", {
+                'messageid': self.msgid,
+                'listid': listid,
+                })
+        r = curs.fetchall()
+        if len(r) > 0:
+            # Has to be 1 row, since we have a unique index on id
+            if not r[0][1] and not overwrite:
+                log.status("Tagging message %s with list %s" % (self.msgid, listid))
+                curs.execute("INSERT INTO list_threads (threadid, listid) VALUES (%(threadid)s, %(listid)s)", {
+                        'threadid': r[0][0],
+                        'listid': listid,
+                        })
+                opstatus.tagged += 1
+                self.purge_list(listid, self.date.year, self.date.month)
+                self.purge_thread(r[0][0])
+            else:
+                opstatus.dupes += 1
 
-			if overwrite:
-				pk = r[0][2]
-				self.purge_thread(r[0][0])
-				# Overwrite an existing message. We do not attempt to
-				# "re-thread" a message, we just update the contents. We
-				# do remove all attachments and rewrite them. Of course, we
-				# don't change the messageid (since it's our primary
-				# identifyer), and we don't update the raw text of the message.
-				# (since we are expected to have used that raw text to do
-				# the re-parsing initially)
-				# We update bodytext as a separate step so as not to rewrite
-				# the TOAST table unnecessarily...
-				curs.execute("UPDATE messages SET bodytxt=%(bodytxt)s WHERE id=%(id)s AND NOT (bodytxt=%(bodytxt)s) RETURNING id", {
-						'id': pk,
-						'bodytxt': self.bodytxt,
-						})
-				rc = curs.rowcount
-				curs.execute("UPDATE messages SET _from=%(from)s, _to=%(to)s, cc=%(cc)s, subject=%(subject)s, date=%(date)s, has_attachment=%(has_attachment)s WHERE id=%(id)s AND NOT (_from=%(from)s AND _to=%(to)s AND cc=%(cc)s AND subject=%(subject)s AND date=%(date)s AND has_attachment=%(has_attachment)s) RETURNING id", {
-						'id': pk,
-						'from': self._from,
-						'to': self.to or '',
-						'cc': self.cc or '',
-						'subject': self.subject or '',
-						'date': self.date,
-						'has_attachment': len(self.attachments) > 0,
-						})
-				rc += curs.rowcount
-				if rc == 0:
-					log.status("Message %s unchanged" % self.msgid)
-					return False
+            if overwrite:
+                pk = r[0][2]
+                self.purge_thread(r[0][0])
+                # Overwrite an existing message. We do not attempt to
+                # "re-thread" a message, we just update the contents. We
+                # do remove all attachments and rewrite them. Of course, we
+                # don't change the messageid (since it's our primary
+                # identifyer), and we don't update the raw text of the message.
+                # (since we are expected to have used that raw text to do
+                # the re-parsing initially)
+                # We update bodytext as a separate step so as not to rewrite
+                # the TOAST table unnecessarily...
+                curs.execute("UPDATE messages SET bodytxt=%(bodytxt)s WHERE id=%(id)s AND NOT (bodytxt=%(bodytxt)s) RETURNING id", {
+                        'id': pk,
+                        'bodytxt': self.bodytxt,
+                        })
+                rc = curs.rowcount
+                curs.execute("UPDATE messages SET _from=%(from)s, _to=%(to)s, cc=%(cc)s, subject=%(subject)s, date=%(date)s, has_attachment=%(has_attachment)s WHERE id=%(id)s AND NOT (_from=%(from)s AND _to=%(to)s AND cc=%(cc)s AND subject=%(subject)s AND date=%(date)s AND has_attachment=%(has_attachment)s) RETURNING id", {
+                        'id': pk,
+                        'from': self._from,
+                        'to': self.to or '',
+                        'cc': self.cc or '',
+                        'subject': self.subject or '',
+                        'date': self.date,
+                        'has_attachment': len(self.attachments) > 0,
+                        })
+                rc += curs.rowcount
+                if rc == 0:
+                    log.status("Message %s unchanged" % self.msgid)
+                    return False
 
-				curs.execute("DELETE FROM attachments WHERE message=%(message)s", {
-						'message': pk,
-						})
-				if len(self.attachments):
-					curs.executemany("INSERT INTO attachments (message, filename, contenttype, attachment) VALUES (%(message)s, %(filename)s, %(contenttype)s, %(attachment)s)",[ {
-								'message': pk,
-								'filename': a[0] or 'unknown_filename',
-								'contenttype': a[1],
-								'attachment': bytearray(a[2]),
-								} for a in self.attachments])
-				opstatus.overwritten += 1
-				log.status("Message %s overwritten" % self.msgid)
-			else:
-				log.status("Message %s already stored" % self.msgid)
-			return True
+                curs.execute("DELETE FROM attachments WHERE message=%(message)s", {
+                        'message': pk,
+                        })
+                if len(self.attachments):
+                    curs.executemany("INSERT INTO attachments (message, filename, contenttype, attachment) VALUES (%(message)s, %(filename)s, %(contenttype)s, %(attachment)s)",[ {
+                                'message': pk,
+                                'filename': a[0] or 'unknown_filename',
+                                'contenttype': a[1],
+                                'attachment': bytearray(a[2]),
+                                } for a in self.attachments])
+                opstatus.overwritten += 1
+                log.status("Message %s overwritten" % self.msgid)
+            else:
+                log.status("Message %s already stored" % self.msgid)
+            return True
 
-		if overwrite:
-			raise Exception("Attempt to overwrite message (%s) that doesn't exist on list %s!" % (self.msgid, listid))
-		# Always purge the primary list for this thread
-		self.purge_list(listid, self.date.year, self.date.month)
+        if overwrite:
+            raise Exception("Attempt to overwrite message (%s) that doesn't exist on list %s!" % (self.msgid, listid))
+        # Always purge the primary list for this thread
+        self.purge_list(listid, self.date.year, self.date.month)
 
-		# Resolve own thread
-		curs.execute("SELECT id, messageid, threadid FROM messages WHERE messageid=ANY(%(parents)s)", {
-				'parents': self.parents,
-				})
-		all_parents = curs.fetchall()
-		if len(all_parents):
-			# At least one of the parents exist. Now try to figure out which one
-			best_parent = len(self.parents)+1
-			best_threadid = -1
-			best_parentid = None
-			for i in range(0,len(all_parents)):
-				for j in range(0,len(self.parents)):
-					if self.parents[j] == all_parents[i][1]:
-						# This messageid found. Better than the last one?
-						if j < best_parent:
-							best_parent = j
-							best_parentid = all_parents[i][0]
-							best_threadid = all_parents[i][2]
-			if best_threadid == -1:
-				raise Exception("Message %s, resolve failed in a way it shouldn't :P" % selg.msgid)
-			self.parentid = best_parentid
-			self.threadid = best_threadid
-			# Slice away all matches that are worse than the one we wanted
-			self.parents = self.parents[:best_parent]
+        # Resolve own thread
+        curs.execute("SELECT id, messageid, threadid FROM messages WHERE messageid=ANY(%(parents)s)", {
+                'parents': self.parents,
+                })
+        all_parents = curs.fetchall()
+        if len(all_parents):
+            # At least one of the parents exist. Now try to figure out which one
+            best_parent = len(self.parents)+1
+            best_threadid = -1
+            best_parentid = None
+            for i in range(0,len(all_parents)):
+                for j in range(0,len(self.parents)):
+                    if self.parents[j] == all_parents[i][1]:
+                        # This messageid found. Better than the last one?
+                        if j < best_parent:
+                            best_parent = j
+                            best_parentid = all_parents[i][0]
+                            best_threadid = all_parents[i][2]
+            if best_threadid == -1:
+                raise Exception("Message %s, resolve failed in a way it shouldn't :P" % selg.msgid)
+            self.parentid = best_parentid
+            self.threadid = best_threadid
+            # Slice away all matches that are worse than the one we wanted
+            self.parents = self.parents[:best_parent]
 
-			log.status("Message %s resolved to existing thread %s, waiting for %s better messages" % (self.msgid, self.threadid, len(self.parents)))
-		else:
-			# No parent exist. But don't create the threadid just yet, since
-			# it's possible that we're somebody elses parent!
-			self.parentid = None
-			self.threadid = None
+            log.status("Message %s resolved to existing thread %s, waiting for %s better messages" % (self.msgid, self.threadid, len(self.parents)))
+        else:
+            # No parent exist. But don't create the threadid just yet, since
+            # it's possible that we're somebody elses parent!
+            self.parentid = None
+            self.threadid = None
 
-		# Now see if we are somebody elses *parent*...
-		curs.execute("SELECT message, priority, threadid FROM unresolved_messages INNER JOIN messages ON messages.id=unresolved_messages.message WHERE unresolved_messages.msgid=%(msgid)s ORDER BY threadid", {
-				'msgid': self.msgid,
-				})
-		childrows = curs.fetchall()
-		if len(childrows):
-			# We are some already existing message's parent (meaning the
-			# messages arrived out of order)
-			# In the best case, the threadid is the same for all threads.
-			# But it might be different if this it the "glue message" that's
-			# holding other threads together.
-			if self.threadid:
-				# Already have a threadid, means that we have a glue message
-				print("Message %s resolved to existing thread %s, while being somebodys parent" % (self.msgid, self.threadid))
-			else:
-				print("Message %s did not resolve to existing thread, but is somebodys parent" % self.msgid)
-				# In this case, just pick the first thread from the list and merge into that
-				# one.
-				self.threadid = childrows[0][2]
+        # Now see if we are somebody elses *parent*...
+        curs.execute("SELECT message, priority, threadid FROM unresolved_messages INNER JOIN messages ON messages.id=unresolved_messages.message WHERE unresolved_messages.msgid=%(msgid)s ORDER BY threadid", {
+                'msgid': self.msgid,
+                })
+        childrows = curs.fetchall()
+        if len(childrows):
+            # We are some already existing message's parent (meaning the
+            # messages arrived out of order)
+            # In the best case, the threadid is the same for all threads.
+            # But it might be different if this it the "glue message" that's
+            # holding other threads together.
+            if self.threadid:
+                # Already have a threadid, means that we have a glue message
+                print("Message %s resolved to existing thread %s, while being somebodys parent" % (self.msgid, self.threadid))
+            else:
+                print("Message %s did not resolve to existing thread, but is somebodys parent" % self.msgid)
+                # In this case, just pick the first thread from the list and merge into that
+                # one.
+                self.threadid = childrows[0][2]
 
-			# Get a unique list (set) of all threads *except* the primary one,
-			# because we'll be merging into that one.
-			mergethreads = set([r[2] for r in childrows]).difference(set((self.threadid,)))
-			if len(mergethreads):
-				# We have one or more merge threads
-				log.status("Merging threads %s into thread %s" % (",".join(str(s) for s in mergethreads), self.threadid))
-				curs.execute("UPDATE messages SET threadid=%(threadid)s WHERE threadid=ANY(%(oldthreadids)s)", {
-						'threadid': self.threadid,
-						'oldthreadids': list(mergethreads),
-						})
-				# Insert any lists that were tagged on the merged threads
-				curs.execute("INSERT INTO list_threads (threadid, listid) SELECT DISTINCT %(threadid)s,listid FROM list_threads lt2 WHERE lt2.threadid=ANY(%(oldthreadids)s) AND listid NOT IN (SELECT listid FROM list_threads lt3 WHERE lt3.threadid=%(threadid)s)", {
-						'threadid': self.threadid,
-						'oldthreadids': list(mergethreads),
-						})
-				# Remove all old leftovers
-				curs.execute("DELETE FROM list_threads WHERE threadid=ANY(%(oldthreadids)s)", {
-						'oldthreadids': list(mergethreads),
-						})
-				# Purge varnish records for all the threads we just removed
-				for t in mergethreads:
-					self.purge_thread(t)
+            # Get a unique list (set) of all threads *except* the primary one,
+            # because we'll be merging into that one.
+            mergethreads = set([r[2] for r in childrows]).difference(set((self.threadid,)))
+            if len(mergethreads):
+                # We have one or more merge threads
+                log.status("Merging threads %s into thread %s" % (",".join(str(s) for s in mergethreads), self.threadid))
+                curs.execute("UPDATE messages SET threadid=%(threadid)s WHERE threadid=ANY(%(oldthreadids)s)", {
+                        'threadid': self.threadid,
+                        'oldthreadids': list(mergethreads),
+                        })
+                # Insert any lists that were tagged on the merged threads
+                curs.execute("INSERT INTO list_threads (threadid, listid) SELECT DISTINCT %(threadid)s,listid FROM list_threads lt2 WHERE lt2.threadid=ANY(%(oldthreadids)s) AND listid NOT IN (SELECT listid FROM list_threads lt3 WHERE lt3.threadid=%(threadid)s)", {
+                        'threadid': self.threadid,
+                        'oldthreadids': list(mergethreads),
+                        })
+                # Remove all old leftovers
+                curs.execute("DELETE FROM list_threads WHERE threadid=ANY(%(oldthreadids)s)", {
+                        'oldthreadids': list(mergethreads),
+                        })
+                # Purge varnish records for all the threads we just removed
+                for t in mergethreads:
+                    self.purge_thread(t)
 
-			# Batch all the children for repointing. We can't do the actual
-			# repointing until later, since we don't know our own id yet.
-			self.children = [r[0] for r in childrows]
-			log.status("Children set to %s with mergethreads being %s (from childrows %s and threadid %s)" % (
-					self.children, mergethreads, childrows, self.threadid))
+            # Batch all the children for repointing. We can't do the actual
+            # repointing until later, since we don't know our own id yet.
+            self.children = [r[0] for r in childrows]
+            log.status("Children set to %s with mergethreads being %s (from childrows %s and threadid %s)" % (
+                    self.children, mergethreads, childrows, self.threadid))
 
-			# Finally, remove all the pending messages that had a higher
-			# priority value (meaning less important) than us
-			curs.executemany("DELETE FROM unresolved_messages WHERE message=%(msg)s AND priority >= %(prio)s", [{
-						'msg': msg,
-						'prio': prio,
-						} for msg, prio, tid in childrows])
-		else:
-			self.children = []
+            # Finally, remove all the pending messages that had a higher
+            # priority value (meaning less important) than us
+            curs.executemany("DELETE FROM unresolved_messages WHERE message=%(msg)s AND priority >= %(prio)s", [{
+                        'msg': msg,
+                        'prio': prio,
+                        } for msg, prio, tid in childrows])
+        else:
+            self.children = []
 
-		if not self.threadid:
-			# No parent and no child exists - create a new threadid, just for us!
-			curs.execute("SELECT nextval('threadid_seq')")
-			self.threadid = curs.fetchall()[0][0]
-			log.status("Message %s resolved to no parent (out of %s) and no child, new thread %s" % (self.msgid, len(self.parents), self.threadid))
-		else:
-			# We have a threadid already, so we're not a new thread. Thus,
-			# we need to purge the old thread
-			self.purge_thread(self.threadid)
+        if not self.threadid:
+            # No parent and no child exists - create a new threadid, just for us!
+            curs.execute("SELECT nextval('threadid_seq')")
+            self.threadid = curs.fetchall()[0][0]
+            log.status("Message %s resolved to no parent (out of %s) and no child, new thread %s" % (self.msgid, len(self.parents), self.threadid))
+        else:
+            # We have a threadid already, so we're not a new thread. Thus,
+            # we need to purge the old thread
+            self.purge_thread(self.threadid)
 
-		# Insert a thread tag if we're on a new list
-		curs.execute("INSERT INTO list_threads (threadid, listid) SELECT %(threadid)s, %(listid)s WHERE NOT EXISTS (SELECT * FROM list_threads t2 WHERE t2.threadid=%(threadid)s AND t2.listid=%(listid)s) RETURNING threadid", {
-			'threadid': self.threadid,
-			'listid': listid,
-			})
-		if len(curs.fetchall()):
-			log.status("Tagged thread %s with listid %s" % (self.threadid, listid))
+        # Insert a thread tag if we're on a new list
+        curs.execute("INSERT INTO list_threads (threadid, listid) SELECT %(threadid)s, %(listid)s WHERE NOT EXISTS (SELECT * FROM list_threads t2 WHERE t2.threadid=%(threadid)s AND t2.listid=%(listid)s) RETURNING threadid", {
+            'threadid': self.threadid,
+            'listid': listid,
+            })
+        if len(curs.fetchall()):
+            log.status("Tagged thread %s with listid %s" % (self.threadid, listid))
 
-		curs.execute("INSERT INTO messages (parentid, threadid, _from, _to, cc, subject, date, has_attachment, messageid, bodytxt, rawtxt) VALUES (%(parentid)s, %(threadid)s, %(from)s, %(to)s, %(cc)s, %(subject)s, %(date)s, %(has_attachment)s, %(messageid)s, %(bodytxt)s, %(rawtxt)s) RETURNING id", {
-				'parentid': self.parentid,
-				'threadid': self.threadid,
-				'from': self._from,
-				'to': self.to or '',
-				'cc': self.cc or '',
-				'subject': self.subject or '',
-				'date': self.date,
-				'has_attachment': len(self.attachments) > 0,
-				'messageid': self.msgid,
-				'bodytxt': self.bodytxt,
-				'rawtxt': bytearray(self.rawtxt),
-				})
-		id = curs.fetchall()[0][0]
-		log.status("Message %s, got id %s, set thread %s, parent %s" % (
-				self.msgid, id, self.threadid, self.parentid))
-		if len(self.attachments):
-			# Insert attachments
-			curs.executemany("INSERT INTO attachments (message, filename, contenttype, attachment) VALUES (%(message)s, %(filename)s, %(contenttype)s, %(attachment)s)",[ {
-						'message': id,
-						'filename': a[0] or 'unknown_filename',
-						'contenttype': a[1],
-						'attachment': bytearray(a[2]),
-						} for a in self.attachments])
+        curs.execute("INSERT INTO messages (parentid, threadid, _from, _to, cc, subject, date, has_attachment, messageid, bodytxt, rawtxt) VALUES (%(parentid)s, %(threadid)s, %(from)s, %(to)s, %(cc)s, %(subject)s, %(date)s, %(has_attachment)s, %(messageid)s, %(bodytxt)s, %(rawtxt)s) RETURNING id", {
+                'parentid': self.parentid,
+                'threadid': self.threadid,
+                'from': self._from,
+                'to': self.to or '',
+                'cc': self.cc or '',
+                'subject': self.subject or '',
+                'date': self.date,
+                'has_attachment': len(self.attachments) > 0,
+                'messageid': self.msgid,
+                'bodytxt': self.bodytxt,
+                'rawtxt': bytearray(self.rawtxt),
+                })
+        id = curs.fetchall()[0][0]
+        log.status("Message %s, got id %s, set thread %s, parent %s" % (
+                self.msgid, id, self.threadid, self.parentid))
+        if len(self.attachments):
+            # Insert attachments
+            curs.executemany("INSERT INTO attachments (message, filename, contenttype, attachment) VALUES (%(message)s, %(filename)s, %(contenttype)s, %(attachment)s)",[ {
+                        'message': id,
+                        'filename': a[0] or 'unknown_filename',
+                        'contenttype': a[1],
+                        'attachment': bytearray(a[2]),
+                        } for a in self.attachments])
 
-		if len(self.children):
-			log.status("Setting %s other messages to children of %s" % (len(self.children), self.msgid))
-			curs.executemany("UPDATE messages SET parentid=%(parent)s WHERE id=%(id)s",
-							 [{'parent': id, 'id': c} for c in self.children])
-		if len(self.parents):
-			# There are remaining parents we'd rather have to get ourselves
-			# properly threaded - so store them in the db.
-			curs.executemany("INSERT INTO unresolved_messages (message, priority, msgid) VALUES (%(id)s, %(priority)s, %(msgid)s)",
-							 [{'id': id, 'priority': i, 'msgid': self.parents[i]} for i in range(0, len(self.parents))])
+        if len(self.children):
+            log.status("Setting %s other messages to children of %s" % (len(self.children), self.msgid))
+            curs.executemany("UPDATE messages SET parentid=%(parent)s WHERE id=%(id)s",
+                             [{'parent': id, 'id': c} for c in self.children])
+        if len(self.parents):
+            # There are remaining parents we'd rather have to get ourselves
+            # properly threaded - so store them in the db.
+            curs.executemany("INSERT INTO unresolved_messages (message, priority, msgid) VALUES (%(id)s, %(priority)s, %(msgid)s)",
+                             [{'id': id, 'priority': i, 'msgid': self.parents[i]} for i in range(0, len(self.parents))])
 
-		opstatus.stored += 1
-		return True
+        opstatus.stored += 1
+        return True
 
-	def diff(self, conn, f, fromonlyf, oldid):
-		curs = conn.cursor()
+    def diff(self, conn, f, fromonlyf, oldid):
+        curs = conn.cursor()
 
-		# Fetch the old one so we have something to diff against
-		curs.execute("SELECT id, _from, _to, cc, subject, date, has_attachment, bodytxt FROM messages WHERE messageid=%(msgid)s", {
-			'msgid': self.msgid,
-			})
-		try:
-			id, _from, to, cc, subject, date, has_attachment, bodytxt = curs.fetchone()
-		except TypeError as e:
-			f.write("---- %s ----\n" % self.msgid)
-			f.write("Could not re-find in archives (old id was %s): %s\n" % (oldid, e))
-			f.write("\n-------------------------------\n\n")
-			return
+        # Fetch the old one so we have something to diff against
+        curs.execute("SELECT id, _from, _to, cc, subject, date, has_attachment, bodytxt FROM messages WHERE messageid=%(msgid)s", {
+            'msgid': self.msgid,
+            })
+        try:
+            id, _from, to, cc, subject, date, has_attachment, bodytxt = curs.fetchone()
+        except TypeError as e:
+            f.write("---- %s ----\n" % self.msgid)
+            f.write("Could not re-find in archives (old id was %s): %s\n" % (oldid, e))
+            f.write("\n-------------------------------\n\n")
+            return
 
 
-		if (_from.rstrip(), to.rstrip(), cc.rstrip(), subject.rstrip()) != (self._from, self.to, self.cc, self.subject):
-			log.status("Message %s has header changes " % self.msgid)
-			f.write("==== %s ====\n" % self.msgid)
-			for fn in ['_from', 'to', 'cc', 'subject']:
-				if getattr(self, fn) != eval(fn):
-					s = "- {0}: {1}\n".format(fn, eval(fn))
-					d = "+ {0}: {1}\n".format(fn, getattr(self, fn))
-					f.write(s)
-					f.write(d)
-			f.write("\n\n")
+        if (_from.rstrip(), to.rstrip(), cc.rstrip(), subject.rstrip()) != (self._from, self.to, self.cc, self.subject):
+            log.status("Message %s has header changes " % self.msgid)
+            f.write("==== %s ====\n" % self.msgid)
+            for fn in ['_from', 'to', 'cc', 'subject']:
+                if getattr(self, fn) != eval(fn):
+                    s = "- {0}: {1}\n".format(fn, eval(fn))
+                    d = "+ {0}: {1}\n".format(fn, getattr(self, fn))
+                    f.write(s)
+                    f.write(d)
+            f.write("\n\n")
 
-		if bodytxt != self.bodytxt:
-			log.status("Message %s has body changes " % self.msgid)
-			tempdiff = list(difflib.unified_diff(bodytxt.splitlines(),
-												 self.bodytxt.splitlines(),
-												 fromfile='old',
-												 tofile='new',
-												 n=0,
-												 lineterm=''))
-			if (len(tempdiff)-2) % 3 == 0:
-				# 3 rows to a diff, two header rows.
-				# Then verify that each slice of 3 contains one @@ row (header), one -From and one +>From,
-				# which indicates the only change is in the From.
-				ok = True
-				tempdiff = tempdiff[2:]
-				while tempdiff:
-					a,b,c = (tempdiff.pop(0), tempdiff.pop(0), tempdiff.pop(0))
-					if not (a.startswith('@@ ') and b.startswith('-From ') and c.startswith('+>From ')):
-						ok=False
-						break
-				if ok:
-					fromonlyf.write("%s\n" % self.msgid)
-					return
+        if bodytxt != self.bodytxt:
+            log.status("Message %s has body changes " % self.msgid)
+            tempdiff = list(difflib.unified_diff(bodytxt.splitlines(),
+                                                 self.bodytxt.splitlines(),
+                                                 fromfile='old',
+                                                 tofile='new',
+                                                 n=0,
+                                                 lineterm=''))
+            if (len(tempdiff)-2) % 3 == 0:
+                # 3 rows to a diff, two header rows.
+                # Then verify that each slice of 3 contains one @@ row (header), one -From and one +>From,
+                # which indicates the only change is in the From.
+                ok = True
+                tempdiff = tempdiff[2:]
+                while tempdiff:
+                    a,b,c = (tempdiff.pop(0), tempdiff.pop(0), tempdiff.pop(0))
+                    if not (a.startswith('@@ ') and b.startswith('-From ') and c.startswith('+>From ')):
+                        ok=False
+                        break
+                if ok:
+                    fromonlyf.write("%s\n" % self.msgid)
+                    return
 
 
-			# Generate a nicer diff
-			d = list(difflib.unified_diff(bodytxt.splitlines(),
-										  self.bodytxt.splitlines(),
-										  fromfile='old',
-										  tofile='new',
-										  n=0,
-										  lineterm=''))
-			if len(d) > 0:
-				f.write("---- %s ----\n" % self.msgid)
-				f.write("\n".join(d))
-				f.write("\n\n")
-		else:
-			log.status("Message %s unchanged." % self.msgid)
+            # Generate a nicer diff
+            d = list(difflib.unified_diff(bodytxt.splitlines(),
+                                          self.bodytxt.splitlines(),
+                                          fromfile='old',
+                                          tofile='new',
+                                          n=0,
+                                          lineterm=''))
+            if len(d) > 0:
+                f.write("---- %s ----\n" % self.msgid)
+                f.write("\n".join(d))
+                f.write("\n\n")
+        else:
+            log.status("Message %s unchanged." % self.msgid)
diff --git a/loader/lib/varnish.py b/loader/lib/varnish.py
index f2a06c3..99d2d50 100644
--- a/loader/lib/varnish.py
+++ b/loader/lib/varnish.py
@@ -3,31 +3,31 @@ import requests
 from lib.log import log
 
 class VarnishPurger(object):
-	def __init__(self, cfg):
-		self.cfg = cfg
+    def __init__(self, cfg):
+        self.cfg = cfg
 
-	def purge(self, purges):
-		if not len(purges):
-			return
+    def purge(self, purges):
+        if not len(purges):
+            return
 
-		if not self.cfg.has_option('varnish', 'purgeurl'):
-			return
+        if not self.cfg.has_option('varnish', 'purgeurl'):
+            return
 
-		purgeurl = self.cfg.get('varnish', 'purgeurl')
-		exprlist = []
-		for p in purges:
-			if isinstance(p, tuple):
-				# Purging a list
-				exprlist.append('obj.http.x-pglm ~ :%s/%s/%s:' % p)
-			else:
-				# Purging individual thread
-				exprlist.append('obj.http.x-pgthread ~ :%s:' % p)
-		purgedict = dict(list(zip(['p%s' % n for n in range(0, len(exprlist))], exprlist)))
-		purgedict['n'] = len(exprlist)
-		r = requests.post(purgeurl, data=purgedict, headers={
-			'Content-type': 'application/x-www-form-urlencoded',
-			'Host': 'www.postgresql.org',
-		})
-		if r.status_code != 200:
-			log.error("Failed to send purge request!")
+        purgeurl = self.cfg.get('varnish', 'purgeurl')
+        exprlist = []
+        for p in purges:
+            if isinstance(p, tuple):
+                # Purging a list
+                exprlist.append('obj.http.x-pglm ~ :%s/%s/%s:' % p)
+            else:
+                # Purging individual thread
+                exprlist.append('obj.http.x-pgthread ~ :%s:' % p)
+        purgedict = dict(list(zip(['p%s' % n for n in range(0, len(exprlist))], exprlist)))
+        purgedict['n'] = len(exprlist)
+        r = requests.post(purgeurl, data=purgedict, headers={
+            'Content-type': 'application/x-www-form-urlencoded',
+            'Host': 'www.postgresql.org',
+        })
+        if r.status_code != 200:
+            log.error("Failed to send purge request!")
author	Magnus Hagander	2019-01-03 20:15:38 +0000
committer	Magnus Hagander	2019-01-04 11:24:06 +0000
commit	3fb227230c145c828888aa2e7c5d8b9a8c0760a0 (patch)
tree	2eeba42da158e6e00c06d537dc6da3a895e39b6c /loader/lib
parent	1e173c362aa105ab4397fb77f8c693a1e01efa11 (diff)