Update loader scripts to use python3 syntax

author Magnus Hagander <magnus@hagander.net>

Thu, 3 Jan 2019 10:04:29 +0000 (11:04 +0100)

committer Magnus Hagander <magnus@hagander.net>

Thu, 3 Jan 2019 10:04:29 +0000 (11:04 +0100)
author Magnus Hagander <magnus@hagander.net>
Thu, 3 Jan 2019 10:04:29 +0000 (11:04 +0100)
committer Magnus Hagander <magnus@hagander.net>
Thu, 3 Jan 2019 10:04:29 +0000 (11:04 +0100)
diff --git a/loader/clean_date.py b/loader/clean_date.py

index faac7ab2f05b5a1197191b091f4408fd7ad703a7..4ea295100435992770c55c8b17b6095a3df70421 100755 (executable)
--- a/loader/clean_date.py
+++ b/loader/clean_date.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
  #
  # Clean up old, broken, dates
  #
@@ -7,17 +7,17 @@ import os
  import sys
  import re
  
-from ConfigParser import ConfigParser
+from configparser import ConfigParser
  
  from email.parser import Parser
-from urllib import urlopen
+from urllib.request import urlopen
  import dateutil.parser
  
  import psycopg2
  
  def scan_message(messageid, olddate, curs):
         u = "http://archives.postgresql.org/msgtxt.php?id=%s" % messageid
-       print "Scanning message at %s (date reported as %s)..." % (u, olddate)
+       print("Scanning message at %s (date reported as %s)..." % (u, olddate))
  
         f = urlopen(u)
         p = Parser()
@@ -26,10 +26,10 @@ def scan_message(messageid, olddate, curs):
  
         # Can be either one of them, but we really don't care...
         ds = None
-       for k,r in msg.items():
+       for k,r in list(msg.items()):
                 if k != 'Received': continue
  
-               print "Trying on %s" % r
+               print("Trying on %s" % r)
                 m = re.search(';\s*(.*)$', r)
                 if m:
                         ds = m.group(1)
@@ -40,23 +40,23 @@ def scan_message(messageid, olddate, curs):
                         break
  
         if not ds:
-               print "Could not find date. Sorry."
+               print("Could not find date. Sorry.")
                 return False
         d = None
         try:
                 d = dateutil.parser.parse(ds)
         except:
-               print "Could not parse date '%s', sorry." % ds
+               print("Could not parse date '%s', sorry." % ds)
                 return
  
         while True:
-               x = raw_input("Parsed this as date %s. Update? " % d)
+               x = input("Parsed this as date %s. Update? " % d)
                 if x.upper() == 'Y':
                         curs.execute("UPDATE messages SET date=%(d)s WHERE messageid=%(m)s", {
                                         'd': d,
                                         'm': messageid,
                                         })
-                       print "Updated."
+                       print("Updated.")
                         break
                 elif x.upper() == 'N':
                         break
@@ -74,4 +74,4 @@ if __name__ == "__main__":
                 scan_message(messageid, date, curs)
  
         conn.commit()
-       print "Done."
+       print("Done.")
diff --git a/loader/generate_mbox.py b/loader/generate_mbox.py

index 42404c45e0549012af345eba24d84b5338a99d21..c2299e1f6d1610bb0c744a6face0ea2df64639e7 100755 (executable)
--- a/loader/generate_mbox.py
+++ b/loader/generate_mbox.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
  #
  # generate_mbox.py - generate an mbox file from the rawtxt stored
  #                    in the datatabase.
@@ -11,27 +11,34 @@ import calendar
  import re
  
  import argparse
-from ConfigParser import ConfigParser
+from configparser import ConfigParser
  import email.parser
+import email.policy
  import email.generator
-from StringIO import StringIO
+from io import BytesIO
  
  import psycopg2
  
  
  def generate_single_mbox(conn, listid, year, month, destination):
         curs = conn.cursor()
-       curs.execute("SELECT rawtxt FROM messages m INNER JOIN list_threads t ON t.threadid=m.threadid WHERE hiddenstatus IS NULL AND listid=%(listid)s AND date>=%(startdate)s AND date <= %(enddate)s ORDER BY date", {
+       curs.execute("SELECT id, rawtxt FROM messages m INNER JOIN list_threads t ON t.threadid=m.threadid WHERE hiddenstatus IS NULL AND listid=%(listid)s AND date>=%(startdate)s AND date <= %(enddate)s ORDER BY date", {
                 'listid': listid,
                 'startdate': date(year, month, 1),
                 'enddate': date(year, month, calendar.monthrange(year, month)[1]),
         })
-       with open(destination, 'w') as f:
-               for raw, in curs:
-                       s = StringIO(raw)
-                       parser = email.parser.Parser()
+       with open(destination, 'w', encoding='utf8') as f:
+               for id, raw, in curs:
+                       s = BytesIO(raw)
+                       parser = email.parser.BytesParser(policy=email.policy.compat32)
                         msg = parser.parse(s)
-                       f.write(msg.as_string(unixfrom=True))
+                       try:
+                               x = msg.as_string(unixfrom=True)
+                               f.write(x)
+                       except UnicodeEncodeError as e:
+                               print("Not including {0}, unicode error".format(msg['message-id']))
+                       except Exception as e:
+                               print("Not including {0}, exception {1}".format(msg['message-id'], e))
  
  
  if __name__ == "__main__":
@@ -46,14 +53,14 @@ if __name__ == "__main__":
  
         if args.auto:
                 if (args.list or args.month):
-                       print "Must not specify list and month when auto-generating!"
+                       print("Must not specify list and month when auto-generating!")
                         sys.exit(1)
                 if not os.path.isdir(args.destination):
-                       print "Destination must be a directory, and exist, when auto-generating"
+                       print("Destination must be a directory, and exist, when auto-generating")
                         sys.exit(1)
         else:
                 if not (args.list and args.month and args.destination):
-                       print "Must specify list, month and destination when generating a single mailbox"
+                       print("Must specify list, month and destination when generating a single mailbox")
                         parser.print_help()
                         sys.exit(1)
  
@@ -85,14 +92,14 @@ if __name__ == "__main__":
                                 if not os.path.isdir(fullpath):
                                         os.makedirs(fullpath)
                                 if not args.quiet:
-                                       print "Generating {0}-{1} for {2}".format(year, month, lname)
+                                       print("Generating {0}-{1} for {2}".format(year, month, lname))
                                 generate_single_mbox(conn, lid, year, month,
                                                                          os.path.join(fullpath, "{0}.{0:04d}{1:02d}".format(year, month)))
         else:
                 # Parse year and month
                 m = re.match('^(\d{4})-(\d{2})$', args.month)
                 if not m:
-                       print "Month must be specified on format YYYY-MM, not {0}".format(args.month)
+                       print("Month must be specified on format YYYY-MM, not {0}".format(args.month))
                         sys.exit(1)
                 year = int(m.group(1))
                 month = int(m.group(2))
@@ -101,9 +108,9 @@ if __name__ == "__main__":
                         'name': args.list,
                 })
                 if curs.rowcount != 1:
-                       print "List {0} not found.".format(args.list)
+                       print("List {0} not found.".format(args.list))
                         sys.exit(1)
  
                 if not args.quiet:
-                       print "Generating {0}-{1} for {2}".format(year, month, args.list)
+                       print("Generating {0}-{1} for {2}".format(year, month, args.list))
                 generate_single_mbox(conn, curs.fetchone()[0], year, month, args.destination)
diff --git a/loader/hide_message.py b/loader/hide_message.py

index 51bffc6b1267c6996ba003746756fd599330d716..8bb9359662070f78de776af7d0736539fa76e4bd 100755 (executable)
--- a/loader/hide_message.py
+++ b/loader/hide_message.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
  #
  # hide_message.py - hide a message (spam etc) in the archives, including
  # frontend expiry.
@@ -8,7 +8,7 @@ import os
  import sys
  
  from optparse import OptionParser
-from ConfigParser import ConfigParser
+from configparser import ConfigParser
  
  import psycopg2
  
@@ -29,12 +29,12 @@ if __name__ == "__main__":
         (opt, args) = optparser.parse_args()
  
         if (len(args)):
-               print "No bare arguments accepted"
+               print("No bare arguments accepted")
                 optparser.print_help()
                 sys.exit(1)
  
         if not opt.msgid:
-               print "Message-id must be specified"
+               print("Message-id must be specified")
                 optparser.print_help()
                 sys.exit(1)
  
@@ -52,34 +52,34 @@ if __name__ == "__main__":
                 'msgid': opt.msgid,
         })
         if curs.rowcount <= 0:
-               print "Message not found."
+               print("Message not found.")
                 sys.exit(1)
  
         id, threadid, previous = curs.fetchone()
  
         # Message found, ask for reason
         reason = 0
-       print "Current status: %s" % reasons[previous or 0]
-       print "\n".join("%s - %s " % (n, reasons[n]) for n in range(len(reasons)))
+       print("Current status: %s" % reasons[previous or 0])
+       print("\n".join("%s - %s " % (n, reasons[n]) for n in range(len(reasons))))
         while True:
-               reason = raw_input('Reason for hiding message? ')
+               reason = input('Reason for hiding message? ')
                 try:
                         reason = int(reason)
                 except ValueError:
                         continue
  
                 if reason == 0:
-                       print "Un-hiding message"
+                       print("Un-hiding message")
                         reason = None
                         break
                 else:
                         try:
-                               print "Hiding message for reason: %s" % reasons[reason]
+                               print("Hiding message for reason: %s" % reasons[reason])
                         except:
                                 continue
                         break
         if previous == reason:
-               print "No change in status, not updating"
+               print("No change in status, not updating")
                 conn.close()
                 sys.exit(0)
  
@@ -88,7 +88,7 @@ if __name__ == "__main__":
                 'id': id,
         })
         if curs.rowcount != 1:
-               print "Failed to update! Not hiding!"
+               print("Failed to update! Not hiding!")
                 conn.rollback()
                 sys.exit(0)
         conn.commit()
@@ -96,4 +96,4 @@ if __name__ == "__main__":
         VarnishPurger(cfg).purge([int(threadid), ])
         conn.close()
  
-       print "Message hidden and varnish purge triggered."
+       print("Message hidden and varnish purge triggered.")
diff --git a/loader/lib/log.py b/loader/lib/log.py

index 82e72fbc0227846b7663a571849f4766553ed422..5b6379a01b2354d8e98bff6a9929a75212fc3204 100644 (file)
--- a/loader/lib/log.py
+++ b/loader/lib/log.py
@@ -7,13 +7,13 @@ class Log(object):
  
         def status(self, msg):
                 if self.verbose:
-                       print msg
+                       print(msg)
  
         def log(self, msg):
-               print msg
+               print(msg)
  
         def error(self, msg):
-               print msg
+               print(msg)
  
         def print_status(self):
                 opstatus.print_status()
@@ -27,7 +27,7 @@ class OpStatus(object):
                 self.overwritten = 0
  
         def print_status(self):
-               print "%s stored, %s new-list tagged, %s dupes, %s failed, %s overwritten" % (self.stored, self.tagged, self.dupes, self.failed, self.overwritten)
+               print("%s stored, %s new-list tagged, %s dupes, %s failed, %s overwritten" % (self.stored, self.tagged, self.dupes, self.failed, self.overwritten))
  
  
  log = Log()
diff --git a/loader/lib/mbox.py b/loader/lib/mbox.py

index c4982eda37a55762f9a9d96500ceeb682da62791..77c83b0fb52d7a0f6550ac6612b277ff8889ec7b 100644 (file)
--- a/loader/lib/mbox.py
+++ b/loader/lib/mbox.py
@@ -1,5 +1,5 @@
  from subprocess import Popen, PIPE
-import cStringIO as StringIO
+from io import BytesIO
  
  # The hack of all hacks...
  # The python mbox parser fails to split some messages from mj2
@@ -8,6 +8,7 @@ import cStringIO as StringIO
  # reassemble it to one long stream with a unique separator,
  # and then split it apart again in python.. Isn't it cute?
  SEPARATOR = "ABCARCHBREAK123" * 50
+bSEPARATOR = bytes(SEPARATOR, 'ascii')
  
  class MailboxBreakupParser(object):
         def __init__(self, fn):
@@ -27,21 +28,21 @@ class MailboxBreakupParser(object):
         def stderr_output(self):
                 return self.pipe.stderr.read()
  
-       def next(self):
-               sio = StringIO.StringIO()
+       def __next__(self):
+               sio = BytesIO()
                 while True:
                         try:
-                               l = self.pipe.stdout.next()
+                               l = next(self.pipe.stdout)
                         except StopIteration:
                                 # End of file!
                                 self.EOF = True
                                 if sio.tell() == 0:
                                         # Nothing read yet, so return None instead of an empty
-                                       # stringio
+                                       # bytesio
                                         return None
                                 sio.seek(0)
                                 return sio
-                       if l.rstrip() == SEPARATOR:
+                       if l.rstrip() == bSEPARATOR:
                                 # Reached a separator. Meaning we're not at end of file,
                                 # but we're at end of message.
                                 sio.seek(0)
diff --git a/loader/lib/parser.py b/loader/lib/parser.py

index 15009c448db5e763db118ee313e1f10296c0ffed..8ee25c5ad3a339467916d1efe98f688738c8be53 100644 (file)
--- a/loader/lib/parser.py
+++ b/loader/lib/parser.py
@@ -2,23 +2,24 @@ import re
  import datetime
  import dateutil.parser
  
-from email.parser import Parser
-from email.header import decode_header
+from email.parser import BytesParser
+from email.header import decode_header, Header
  from email.errors import HeaderParseError
-from HTMLParser import HTMLParser, HTMLParseError
+from email.policy import compat32
+from html.parser import HTMLParser
  import tidylib
-import StringIO
+import io
  
  from lib.exception import IgnorableException
  from lib.log import log
  
  class ArchivesParser(object):
         def __init__(self):
-               self.parser = Parser()
+               self.parser = BytesParser(policy=compat32)
  
         def parse(self, stream):
                 self.rawtxt = stream.read()
-               self.msg = self.parser.parse(StringIO.StringIO(self.rawtxt))
+               self.msg = self.parser.parse(io.BytesIO(self.rawtxt))
  
         def is_msgid(self, msgid):
                 # Look for a specific messageid. This means we might parse it twice,
@@ -26,7 +27,7 @@ class ArchivesParser(object):
                 try:
                         if self.clean_messageid(self.decode_mime_header(self.get_mandatory('Message-ID'))) == msgid:
                                 return True
-               except Exception, e:
+               except Exception as e:
                         return False
  
         def analyze(self, date_override=None):
@@ -49,13 +50,13 @@ class ArchivesParser(object):
                 self.parents = []
                 # The first one is in-reply-to, if it exists
                 if self.get_optional('in-reply-to'):
-                       m = self.clean_messageid(self.get_optional('in-reply-to'), True)
+                       m = self.clean_messageid(self.decode_mime_header(self.get_optional('in-reply-to')), True)
                         if m:
                                 self.parents.append(m)
  
                 # Then we add all References values, in backwards order
                 if self.get_optional('references'):
-                       cleaned_msgids = [self.clean_messageid(x, True) for x in reversed(self.get_optional('references').split())]
+                       cleaned_msgids = [self.clean_messageid(x, True) for x in reversed(self.decode_mime_header(self.get_optional('references')).split())]
                         # Can't do this with a simple self.parents.extend() due to broken
                         # mailers that add the same reference more than once. And we can't
                         # use a set() to make it unique, because order is very important
@@ -130,19 +131,19 @@ class ArchivesParser(object):
                         params = msg.get_params()
                         if not params:
                                 # No content-type, so we assume us-ascii
-                               return unicode(b, 'us-ascii', errors='ignore')
+                               return str(b, 'us-ascii', errors='ignore')
                         for k,v in params:
                                 if k.lower() == 'charset':
                                         charset = v
                                         break
                         if charset:
                                 try:
-                                       return unicode(b, self.clean_charset(charset), errors='ignore')
-                               except LookupError, e:
+                                       return str(b, self.clean_charset(charset), errors='ignore')
+                               except LookupError as e:
                                         raise IgnorableException("Failed to get unicode payload: %s" % e)
                         else:
                                 # XXX: reasonable default?
-                               return unicode(b, errors='ignore')
+                               return str(b, errors='ignore')
                 # Return None or empty string, depending on what we got back
                 return b
  
@@ -154,8 +155,8 @@ class ArchivesParser(object):
                 if b:
                         # Python bug 9133, allows unicode surrogate pairs - which PostgreSQL will
                         # later reject..
-                       if b.find(u'\udbff\n\udef8'):
-                               b = b.replace(u'\udbff\n\udef8', '')
+                       if b.find('\udbff\n\udef8'):
+                               b = b.replace('\udbff\n\udef8', '')
  
                 # Remove postgres specific mail footer - if it's there
                 m = self._re_footer.match(b)
@@ -249,15 +250,15 @@ class ArchivesParser(object):
                 # If this is a header-encoded filename, start by decoding that
                 if filename.startswith('=?'):
                         decoded, encoding = decode_header(filename)[0]
-                       return unicode(decoded, encoding, errors='ignore')
+                       return str(decoded, encoding, errors='ignore')
  
                 # If it's already unicode, just return it
-               if isinstance(filename, unicode):
+               if isinstance(filename, str):
                         return filename
  
                 # Anything that's not UTF8, we just get rid of. We can live with
                 # filenames slightly mangled in this case.
-               return unicode(filename, 'utf-8', errors='ignore')
+               return str(filename, 'utf-8', errors='ignore')
  
         def _extract_filename(self, container):
                 # Try to get the filename for an attachment in the container.
@@ -324,7 +325,7 @@ class ArchivesParser(object):
                                 # by majordomo with the footer. So if that one is present,
                                 # we need to explicitly exclude it again.
                                 b = container.get_payload(decode=True)
-                               if not self._re_footer.match(b):
+                               if isinstance(b, str) and not self._re_footer.match(b):
                                         # We know there is no name for this one
                                         self.attachments.append((None, container.get_content_type(), b))
                                 return
@@ -423,9 +424,14 @@ class ArchivesParser(object):
                                 # enough...
                                 dp = datetime.datetime(*dp.utctimetuple()[:6])
                         return dp
-               except Exception, e:
+               except Exception as e:
                         raise IgnorableException("Failed to parse date '%s': %s" % (d, e))
  
+       def _maybe_decode(self, s, charset):
+               if isinstance(s, str):
+                       return s.strip(' ')
+               return str(s, charset and self.clean_charset(charset) or 'us-ascii', errors='ignore').strip(' ')
+
         # Workaround for broken quoting in some MUAs (see below)
         _re_mailworkaround = re.compile('"(=\?[^\?]+\?[QB]\?[^\?]+\?=)"', re.IGNORECASE)
         def _decode_mime_header(self, hdr, email_workaround):
@@ -449,28 +455,32 @@ class ArchivesParser(object):
                         hdr = self._re_mailworkaround.sub(r'\1', hdr)
  
                 try:
-                       return " ".join([unicode(s, charset and self.clean_charset(charset) or 'us-ascii', errors='ignore') for s,charset in decode_header(hdr)])
-               except HeaderParseError, e:
+                       return " ".join([self._maybe_decode(s, charset) for s, charset in decode_header(hdr)])
+               except HeaderParseError as e:
                         # Parser error is typically someone specifying an encoding,
                         # but then not actually using that encoding. We'll do the best
                         # we can, which is cut it down to ascii and ignore errors
-                       return unicode(hdr, 'us-ascii', errors='ignore')
+                       return str(hdr, 'us-ascii', errors='ignore').strip(' ')
  
         def decode_mime_header(self, hdr, email_workaround=False):
                 try:
+                       if isinstance(hdr, Header):
+                               hdr = hdr.encode()
+
                         h = self._decode_mime_header(hdr, email_workaround)
                         if h:
                                 return h.replace("\0", "")
                         return ''
-               except LookupError, e:
+               except LookupError as e:
                         raise IgnorableException("Failed to decode header value '%s': %s" % (hdr, e))
-               except ValueError, ve:
+               except ValueError as ve:
                         raise IgnorableException("Failed to decode header value '%s': %s" % (hdr, ve))
  
         def get_mandatory(self, fieldname):
                 try:
                         x = self.msg[fieldname]
-                       if x==None: raise Exception()
+                       if x==None:
+                               raise Exception()
                         return x
                 except:
                         raise IgnorableException("Mandatory field '%s' is missing" % fieldname)
@@ -496,17 +506,15 @@ class ArchivesParser(object):
                                                                                                    'show-info': 0,
                                                                                                    })
                 if errors:
-                       print("HTML tidy failed for %s!" % self.msgid)
+                       print(("HTML tidy failed for %s!" % self.msgid))
                         print(errors)
                         return None
-               if type(html) == str:
-                       html = unicode(html, 'utf8')
  
                 try:
                         cleaner = HTMLCleaner()
                         cleaner.feed(html)
                         return cleaner.get_text()
-               except HTMLParseError, e:
+               except Exception as e:
                         # Failed to parse the html, thus failed to clean it. so we must
                         # give up...
                         return None
@@ -515,7 +523,7 @@ class ArchivesParser(object):
  class HTMLCleaner(HTMLParser):
         def __init__(self):
                 HTMLParser.__init__(self)
-               self.io = StringIO.StringIO()
+               self.io = io.StringIO()
  
         def get_text(self):
                 return self.io.getvalue()
diff --git a/loader/lib/storage.py b/loader/lib/storage.py

index 92ffa45f217c23c92194a540021b5e8895cdb9db..8962b879bae82756e7b5a282b390a280ff1559be 100644 (file)
--- a/loader/lib/storage.py
+++ b/loader/lib/storage.py
@@ -1,6 +1,6 @@
  import difflib
  
-from parser import ArchivesParser
+from .parser import ArchivesParser
  
  from lib.log import log, opstatus
  
@@ -144,9 +144,9 @@ class ArchivesParserStorage(ArchivesParser):
                         # holding other threads together.
                         if self.threadid:
                                 # Already have a threadid, means that we have a glue message
-                               print "Message %s resolved to existing thread %s, while being somebodys parent" % (self.msgid, self.threadid)
+                               print("Message %s resolved to existing thread %s, while being somebodys parent" % (self.msgid, self.threadid))
                         else:
-                               print "Message %s did not resolve to existing thread, but is somebodys parent" % self.msgid
+                               print("Message %s did not resolve to existing thread, but is somebodys parent" % self.msgid)
                                 # In this case, just pick the first thread from the list and merge into that
                                 # one.
                                 self.threadid = childrows[0][2]
@@ -254,31 +254,27 @@ class ArchivesParserStorage(ArchivesParser):
                         })
                 try:
                         id, _from, to, cc, subject, date, has_attachment, bodytxt = curs.fetchone()
-               except TypeError, e:
+               except TypeError as e:
                         f.write("---- %s ----\n" % self.msgid)
                         f.write("Could not re-find in archives (old id was %s): %s\n" % (oldid, e))
                         f.write("\n-------------------------------\n\n")
                         return
  
  
-               _from = _from.decode('utf8')
-               to = to.decode('utf8')
-               cc = cc.decode('utf8')
-               subject = subject.decode('utf8')
-               if (_from, to, cc, subject) != (self._from, self.to, self.cc, self.subject):
+               if (_from.rstrip(), to.rstrip(), cc.rstrip(), subject.rstrip()) != (self._from, self.to, self.cc, self.subject):
                         log.status("Message %s has header changes " % self.msgid)
                         f.write("==== %s ====\n" % self.msgid)
                         for fn in ['_from', 'to', 'cc', 'subject']:
                                 if getattr(self, fn) != eval(fn):
-                                       s = u"- {0}: {1}\n".format(fn, eval(fn))
-                                       d = u"+ {0}: {1}\n".format(fn, getattr(self, fn))
+                                       s = "- {0}: {1}\n".format(fn, eval(fn))
+                                       d = "+ {0}: {1}\n".format(fn, getattr(self, fn))
                                         f.write(s)
                                         f.write(d)
                         f.write("\n\n")
  
-               if bodytxt.decode('utf8') != self.bodytxt:
+               if bodytxt != self.bodytxt:
                         log.status("Message %s has body changes " % self.msgid)
-                       tempdiff = list(difflib.unified_diff(bodytxt.decode('utf8').splitlines(),
+                       tempdiff = list(difflib.unified_diff(bodytxt.splitlines(),
                                                                                                  self.bodytxt.splitlines(),
                                                                                                  fromfile='old',
                                                                                                  tofile='new',
@@ -289,7 +285,9 @@ class ArchivesParserStorage(ArchivesParser):
                                 # Then verify that each slice of 3 contains one @@ row (header), one -From and one +>From,
                                 # which indicates the only change is in the From.
                                 ok = True
-                               for a,b,c in map(None, *([iter(tempdiff[2:])] * 3)):
+                               tempdiff = tempdiff[2:]
+                               while tempdiff:
+                                       a,b,c = (tempdiff.pop(0), tempdiff.pop(0), tempdiff.pop(0))
                                         if not (a.startswith('@@ ') and b.startswith('-From ') and c.startswith('+>From ')):
                                                 ok=False
                                                 break
@@ -299,12 +297,12 @@ class ArchivesParserStorage(ArchivesParser):
  
  
                         # Generate a nicer diff
-                       d = list(difflib.unified_diff(bodytxt.decode('utf8').splitlines(),
-                                                                                                  self.bodytxt.splitlines(),
-                                                                                                  fromfile='old',
-                                                                                                  tofile='new',
-                                                                                                  n=0,
-                                                                                                  lineterm=''))
+                       d = list(difflib.unified_diff(bodytxt.splitlines(),
+                                                                                 self.bodytxt.splitlines(),
+                                                                                 fromfile='old',
+                                                                                 tofile='new',
+                                                                                 n=0,
+                                                                                 lineterm=''))
                         if len(d) > 0:
                                 f.write("---- %s ----\n" % self.msgid)
                                 f.write("\n".join(d))
diff --git a/loader/lib/varnish.py b/loader/lib/varnish.py

index b49938b8bed5f95be4142b6214a6dd424314ffee..f2a06c3d74e9ae39c37b78dbf13f2cfbc4eb8803 100644 (file)
--- a/loader/lib/varnish.py
+++ b/loader/lib/varnish.py
@@ -1,5 +1,4 @@
-import urllib
-import urllib2
+import requests
  
  from lib.log import log
  
@@ -23,13 +22,12 @@ class VarnishPurger(object):
                         else:
                                 # Purging individual thread
                                 exprlist.append('obj.http.x-pgthread ~ :%s:' % p)
-               purgedict = dict(zip(['p%s' % n for n in range(0, len(exprlist))], exprlist))
+               purgedict = dict(list(zip(['p%s' % n for n in range(0, len(exprlist))], exprlist)))
                 purgedict['n'] = len(exprlist)
-               r = urllib2.Request(purgeurl, data=urllib.urlencode(purgedict))
-               r.add_header('Content-type', 'application/x-www-form-urlencoded')
-               r.add_header('Host', 'www.postgresql.org')
-               r.get_method = lambda: 'POST'
-               u = urllib2.urlopen(r)
-               if u.getcode() != 200:
+               r = requests.post(purgeurl, data=purgedict, headers={
+                       'Content-type': 'application/x-www-form-urlencoded',
+                       'Host': 'www.postgresql.org',
+               })
+               if r.status_code != 200:
                         log.error("Failed to send purge request!")
  
diff --git a/loader/load_message.py b/loader/load_message.py

index b4668603d520b4f05ea810b003223049f58754e7..efb8626336ebedae9a1fb8bbad0748872d43f47f 100755 (executable)
--- a/loader/load_message.py
+++ b/loader/load_message.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
  #
  # load_message.py - takes a single email or mbox formatted
  # file on stdin or in a file and reads it into the database.
@@ -8,9 +8,9 @@ import os
  import sys
  
  from optparse import OptionParser
-from ConfigParser import ConfigParser
-import urllib
-import urllib2
+from configparser import ConfigParser
+import urllib.request, urllib.parse, urllib.error
+import urllib.request, urllib.error, urllib.parse
  
  import psycopg2
  
@@ -25,7 +25,7 @@ def log_failed_message(listid, srctype, src, msg, err):
                 msgid = msg.msgid
         except:
                 msgid = "<unknown>"
-       log.error("Failed to load message (msgid %s) from %s, spec %s: %s" % (msgid.encode('us-ascii', 'replace'), srctype, src, unicode(str(err), 'us-ascii', 'replace')))
+       log.error("Failed to load message (msgid %s) from %s, spec %s: %s" % (msgid.encode('us-ascii', 'replace'), srctype, src, str(str(err), 'us-ascii', 'replace')))
  
         # We also put the data in the db. This happens in the main transaction
         # so if the whole script dies, it goes away...
@@ -34,7 +34,7 @@ def log_failed_message(listid, srctype, src, msg, err):
                         'msgid': msgid,
                         'srctype': srctype,
                         'src': src,
-                       'err': unicode(str(err), 'us-ascii', 'replace'),
+                       'err': str(str(err), 'us-ascii', 'replace'),
                         })
  
  
@@ -51,27 +51,27 @@ if __name__ == "__main__":
         (opt, args) = optparser.parse_args()
  
         if (len(args)):
-               print "No bare arguments accepted"
+               print("No bare arguments accepted")
                 optparser.print_usage()
                 sys.exit(1)
  
         if not opt.list:
-               print "List must be specified"
+               print("List must be specified")
                 optparser.print_usage()
                 sys.exit(1)
  
         if opt.directory and opt.mbox:
-               print "Can't specify both directory and mbox!"
+               print("Can't specify both directory and mbox!")
                 optparser.print_usage()
                 sys.exit(1)
  
         if opt.force_date and (opt.directory or opt.mbox) and not opt.filter_msgid:
-               print "Can't use force_date with directory or mbox - only individual messages"
+               print("Can't use force_date with directory or mbox - only individual messages")
                 optparser.print_usage()
                 sys.exit(1)
  
         if opt.filter_msgid and not (opt.directory or opt.mbox):
-               print "filter_msgid makes no sense without directory or mbox!"
+               print("filter_msgid makes no sense without directory or mbox!")
                 optparser.print_usage()
                 sys.exit(1)
  
@@ -93,8 +93,8 @@ if __name__ == "__main__":
         try:
                 curs.execute("SET statement_timeout='30s'")
                 curs.execute("SELECT pg_advisory_xact_lock(8059944559669076)")
-       except Exception, e:
-               print("Failed to wait on advisory lock: %s" % e)
+       except Exception as e:
+               print(("Failed to wait on advisory lock: %s" % e))
                 sys.exit(1)
  
         # Get the listid we're working on
@@ -121,36 +121,37 @@ if __name__ == "__main__":
                                         continue
                                 try:
                                         ap.analyze(date_override=opt.force_date)
-                               except IgnorableException, e:
+                               except IgnorableException as e:
                                         log_failed_message(listid, "directory", os.path.join(opt.directory, x), ap, e)
                                         opstatus.failed += 1
                                         continue
                                 ap.store(conn, listid)
                                 purges.update(ap.purges)
                         if opt.interactive:
-                               print "Interactive mode, committing transaction"
+                               print("Interactive mode, committing transaction")
                                 conn.commit()
-                               print "Proceed to next message with Enter, or input a period (.) to stop processing"
-                               x = raw_input()
+                               print("Proceed to next message with Enter, or input a period (.) to stop processing")
+                               x = input()
                                 if x == '.':
-                                       print "Ok, aborting!"
+                                       print("Ok, aborting!")
                                         break
-                               print "---------------------------------"
+                               print("---------------------------------")
         elif opt.mbox:
                 if not os.path.isfile(opt.mbox):
-                       print "File %s does not exist" % opt.mbox
+                       print("File %s does not exist" % opt.mbox)
                         sys.exit(1)
                 mboxparser = MailboxBreakupParser(opt.mbox)
                 while not mboxparser.EOF:
                         ap = ArchivesParserStorage()
-                       msg = mboxparser.next()
-                       if not msg: break
+                       msg = next(mboxparser)
+                       if not msg:
+                               break
                         ap.parse(msg)
                         if opt.filter_msgid and not ap.is_msgid(opt.filter_msgid):
                                 continue
                         try:
                                 ap.analyze(date_override=opt.force_date)
-                       except IgnorableException, e:
+                       except IgnorableException as e:
                                 log_failed_message(listid, "mbox", opt.mbox, ap, e)
                                 opstatus.failed += 1
                                 continue
@@ -163,10 +164,10 @@ if __name__ == "__main__":
         else:
                 # Parse single message on stdin
                 ap = ArchivesParserStorage()
-               ap.parse(sys.stdin)
+               ap.parse(sys.stdin.buffer)
                 try:
                         ap.analyze(date_override=opt.force_date)
-               except IgnorableException, e:
+               except IgnorableException as e:
                         log_failed_message(listid, "stdin","", ap, e)
                         conn.close()
                         sys.exit(1)
diff --git a/loader/pglister_sync.py b/loader/pglister_sync.py

index 32c68209fd5425dfaa6e027374d19967c3f39e67..e38cdd4baee48e4471334eb3f7b10c62f2beb892 100755 (executable)
--- a/loader/pglister_sync.py
+++ b/loader/pglister_sync.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
  # -*- coding: utf-8 -*-
  
  # Synchronize list info from pglister
@@ -6,7 +6,7 @@
  import os
  import sys
  import argparse
-from ConfigParser import ConfigParser
+from configparser import ConfigParser
  import psycopg2
  import requests
  
@@ -44,7 +44,7 @@ if __name__=="__main__":
         # For groups, just add them if they don't exist
         groups = {g['group']['id']:g['group']['groupname'] for g in obj}
  
-       for id,name in groups.items():
+       for id,name in list(groups.items()):
                 curs.execute("SELECT EXISTS (SELECT 1 FROM listgroups WHERE groupname=%(group)s)", {
                         'group': name,
                 })
@@ -52,7 +52,7 @@ if __name__=="__main__":
                         curs.execute("INSERT INTO listgroups (groupname, sortkey) VALUES (%(group)s, 100) RETURNING groupname", {
                                 'group': name,
                         })
-                       print "Added group %s" % name
+                       print("Added group %s" % name)
  
         # Add any missing lists, and synchronize their contents.
         for l in obj:
@@ -66,7 +66,7 @@ if __name__=="__main__":
                                 'groupname': l['group']['groupname'],
                         })
                         listid, name = curs.fetchone()
-                       print "Added list %s" % name
+                       print("Added list %s" % name)
                 else:
                         listid, name = curs.fetchone()
                         curs.execute("UPDATE lists SET shortdesc=%(name)s, description=%(desc)s, groupid=(SELECT groupid FROM listgroups WHERE groupname=%(groupname)s), active=true WHERE listid=%(id)s AND NOT (active AND shortdesc=%(name)s AND description=%(desc)s AND groupid=(SELECT groupid FROM listgroups WHERE groupname=%(groupname)s)) RETURNING listname", {
@@ -76,7 +76,7 @@ if __name__=="__main__":
                                 'groupname': l['group']['groupname'],
                         })
                         for n, in curs.fetchall():
-                               print "Updated list %s " % n
+                               print("Updated list %s " % n)
  
                 if do_subscribers:
                         # If we synchronize subscribers, we do so on all lists for now.
@@ -86,9 +86,9 @@ if __name__=="__main__":
                         })
                         for what, who in curs.fetchall():
                                 if what == 'ins':
-                                       print "Added subscriber %s to list %s" % (who, name)
+                                       print("Added subscriber %s to list %s" % (who, name))
                                 else:
-                                       print "Removed subscriber %s from list %s" % (who, name)
+                                       print("Removed subscriber %s from list %s" % (who, name))
  
  
         # We don't remove lists ever, because we probably want to keep archives around.
@@ -97,10 +97,10 @@ if __name__=="__main__":
                 'lists': [l['listname'] for l in obj],
         })
         for n, in curs.fetchall():
-               print "List %s exists in archives, but not in upstream! Should it be marked inactive?" % n
+               print("List %s exists in archives, but not in upstream! Should it be marked inactive?" % n)
  
         if args.dryrun:
-               print "Dry-run, rolling back"
+               print("Dry-run, rolling back")
                 conn.rollback()
         else:
                 conn.commit()
diff --git a/loader/purge_frontend_message.py b/loader/purge_frontend_message.py

index edab70cf7ec01e92a9b15f27c169d99d16494a14..72899e80c4d5af8c8b308d1f6f8c39d7bbc963a6 100755 (executable)
--- a/loader/purge_frontend_message.py
+++ b/loader/purge_frontend_message.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
  #
  # purge_frontend_message.py - issue varnish purge for the message
  # in question, to for example force an expire of a hidden message.
@@ -8,7 +8,7 @@ import os
  import sys
  
  from optparse import OptionParser
-from ConfigParser import ConfigParser
+from configparser import ConfigParser
  
  import psycopg2
  
@@ -21,12 +21,12 @@ if __name__ == "__main__":
         (opt, args) = optparser.parse_args()
  
         if (len(args)):
-               print "No bare arguments accepted"
+               print("No bare arguments accepted")
                 optparser.print_help()
                 sys.exit(1)
  
         if not opt.msgid:
-               print "Message-id must be specified"
+               print("Message-id must be specified")
                 optparser.print_help()
                 sys.exit(1)
  
diff --git a/loader/reparse_message.py b/loader/reparse_message.py

index 802705840adb8bd6b4f78cceefac05acbbda017e..df4501a307b0e4670a31eb690614d57b9ed1dd9e 100755 (executable)
--- a/loader/reparse_message.py
+++ b/loader/reparse_message.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
  #
  # reparse_message.py - using the rawtxt stored in the database,
  # redo the parsing of it and overwrite it with itself. Used when
@@ -10,8 +10,8 @@ import sys
  import codecs
  
  from optparse import OptionParser
-from ConfigParser import ConfigParser
-from StringIO import StringIO
+from configparser import ConfigParser
+from io import BytesIO
  from datetime import datetime, timedelta
  
  import psycopg2
@@ -45,16 +45,16 @@ if __name__ == "__main__":
         (opt, args) = optparser.parse_args()
  
         if (len(args)):
-               print "No bare arguments accepted"
+               print("No bare arguments accepted")
                 optparser.print_usage()
                 sys.exit(1)
  
         if sum([1 for x in [opt.all, opt.sample, opt.msgid] if x]) != 1:
-               print "Must specify exactly one of --msgid, --all and --sample"
+               print("Must specify exactly one of --msgid, --all and --sample")
                 sys.exit(1)
  
         if not opt.update and os.path.exists('reparse.diffs'):
-               print "File reparse.diffs already exists. Remove or rename and try again."
+               print("File reparse.diffs already exists. Remove or rename and try again.")
                 sys.exit(1)
  
         log.set(opt.verbose)
@@ -97,10 +97,10 @@ if __name__ == "__main__":
         for id, rawtxt in ResultIter(curs):
                 num += 1
                 ap = ArchivesParserStorage()
-               ap.parse(StringIO(rawtxt))
+               ap.parse(BytesIO(rawtxt))
                 try:
                         ap.analyze(date_override=opt.force_date)
-               except IgnorableException, e:
+               except IgnorableException as e:
                         if opt.update:
                                 raise e
                         f.write("Exception loading %s: %s" % (id, e))
@@ -119,14 +119,14 @@ if __name__ == "__main__":
                         sys.stdout.flush()
                         laststatus = datetime.now()
  
-       print ""
+       print("")
  
         if opt.update:
                 opstatus.print_status()
                 if not opt.commit:
                         while True:
                                 print("OK to commit transaction? ")
-                               a = raw_input().lower().strip()
+                               a = input().lower().strip()
                                 if a == 'y' or a == 'yes':
                                         print("Ok, committing.")
                                         break
author	Magnus Hagander <magnus@hagander.net>
	Thu, 3 Jan 2019 10:04:29 +0000 (11:04 +0100)
committer	Magnus Hagander <magnus@hagander.net>
	Thu, 3 Jan 2019 10:04:29 +0000 (11:04 +0100)
loader/clean_date.py		patch \| blob \| blame \| history
loader/generate_mbox.py		patch \| blob \| blame \| history
loader/hide_message.py		patch \| blob \| blame \| history
loader/lib/log.py		patch \| blob \| blame \| history
loader/lib/mbox.py		patch \| blob \| blame \| history
loader/lib/parser.py		patch \| blob \| blame \| history
loader/lib/storage.py		patch \| blob \| blame \| history
loader/lib/varnish.py		patch \| blob \| blame \| history
loader/load_message.py		patch \| blob \| blame \| history
loader/pglister_sync.py		patch \| blob \| blame \| history
loader/purge_frontend_message.py		patch \| blob \| blame \| history
loader/reparse_message.py		patch \| blob \| blame \| history