Update loader scripts to use python3 syntax
authorMagnus Hagander <magnus@hagander.net>
Thu, 3 Jan 2019 10:04:29 +0000 (11:04 +0100)
committerMagnus Hagander <magnus@hagander.net>
Thu, 3 Jan 2019 10:04:29 +0000 (11:04 +0100)
Some minor cleanups as well, but mostly just the output of the 2to3 tool
and some manual changes.

12 files changed:
loader/clean_date.py
loader/generate_mbox.py
loader/hide_message.py
loader/lib/log.py
loader/lib/mbox.py
loader/lib/parser.py
loader/lib/storage.py
loader/lib/varnish.py
loader/load_message.py
loader/pglister_sync.py
loader/purge_frontend_message.py
loader/reparse_message.py

index faac7ab2f05b5a1197191b091f4408fd7ad703a7..4ea295100435992770c55c8b17b6095a3df70421 100755 (executable)
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 #
 # Clean up old, broken, dates
 #
@@ -7,17 +7,17 @@ import os
 import sys
 import re
 
-from ConfigParser import ConfigParser
+from configparser import ConfigParser
 
 from email.parser import Parser
-from urllib import urlopen
+from urllib.request import urlopen
 import dateutil.parser
 
 import psycopg2
 
 def scan_message(messageid, olddate, curs):
        u = "http://archives.postgresql.org/msgtxt.php?id=%s" % messageid
-       print "Scanning message at %s (date reported as %s)..." % (u, olddate)
+       print("Scanning message at %s (date reported as %s)..." % (u, olddate))
 
        f = urlopen(u)
        p = Parser()
@@ -26,10 +26,10 @@ def scan_message(messageid, olddate, curs):
 
        # Can be either one of them, but we really don't care...
        ds = None
-       for k,r in msg.items():
+       for k,r in list(msg.items()):
                if k != 'Received': continue
 
-               print "Trying on %s" % r
+               print("Trying on %s" % r)
                m = re.search(';\s*(.*)$', r)
                if m:
                        ds = m.group(1)
@@ -40,23 +40,23 @@ def scan_message(messageid, olddate, curs):
                        break
 
        if not ds:
-               print "Could not find date. Sorry."
+               print("Could not find date. Sorry.")
                return False
        d = None
        try:
                d = dateutil.parser.parse(ds)
        except:
-               print "Could not parse date '%s', sorry." % ds
+               print("Could not parse date '%s', sorry." % ds)
                return
 
        while True:
-               x = raw_input("Parsed this as date %s. Update? " % d)
+               x = input("Parsed this as date %s. Update? " % d)
                if x.upper() == 'Y':
                        curs.execute("UPDATE messages SET date=%(d)s WHERE messageid=%(m)s", {
                                        'd': d,
                                        'm': messageid,
                                        })
-                       print "Updated."
+                       print("Updated.")
                        break
                elif x.upper() == 'N':
                        break
@@ -74,4 +74,4 @@ if __name__ == "__main__":
                scan_message(messageid, date, curs)
 
        conn.commit()
-       print "Done."
+       print("Done.")
index 42404c45e0549012af345eba24d84b5338a99d21..c2299e1f6d1610bb0c744a6face0ea2df64639e7 100755 (executable)
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 #
 # generate_mbox.py - generate an mbox file from the rawtxt stored
 #                    in the datatabase.
@@ -11,27 +11,34 @@ import calendar
 import re
 
 import argparse
-from ConfigParser import ConfigParser
+from configparser import ConfigParser
 import email.parser
+import email.policy
 import email.generator
-from StringIO import StringIO
+from io import BytesIO
 
 import psycopg2
 
 
 def generate_single_mbox(conn, listid, year, month, destination):
        curs = conn.cursor()
-       curs.execute("SELECT rawtxt FROM messages m INNER JOIN list_threads t ON t.threadid=m.threadid WHERE hiddenstatus IS NULL AND listid=%(listid)s AND date>=%(startdate)s AND date <= %(enddate)s ORDER BY date", {
+       curs.execute("SELECT id, rawtxt FROM messages m INNER JOIN list_threads t ON t.threadid=m.threadid WHERE hiddenstatus IS NULL AND listid=%(listid)s AND date>=%(startdate)s AND date <= %(enddate)s ORDER BY date", {
                'listid': listid,
                'startdate': date(year, month, 1),
                'enddate': date(year, month, calendar.monthrange(year, month)[1]),
        })
-       with open(destination, 'w') as f:
-               for raw, in curs:
-                       s = StringIO(raw)
-                       parser = email.parser.Parser()
+       with open(destination, 'w', encoding='utf8') as f:
+               for id, raw, in curs:
+                       s = BytesIO(raw)
+                       parser = email.parser.BytesParser(policy=email.policy.compat32)
                        msg = parser.parse(s)
-                       f.write(msg.as_string(unixfrom=True))
+                       try:
+                               x = msg.as_string(unixfrom=True)
+                               f.write(x)
+                       except UnicodeEncodeError as e:
+                               print("Not including {0}, unicode error".format(msg['message-id']))
+                       except Exception as e:
+                               print("Not including {0}, exception {1}".format(msg['message-id'], e))
 
 
 if __name__ == "__main__":
@@ -46,14 +53,14 @@ if __name__ == "__main__":
 
        if args.auto:
                if (args.list or args.month):
-                       print "Must not specify list and month when auto-generating!"
+                       print("Must not specify list and month when auto-generating!")
                        sys.exit(1)
                if not os.path.isdir(args.destination):
-                       print "Destination must be a directory, and exist, when auto-generating"
+                       print("Destination must be a directory, and exist, when auto-generating")
                        sys.exit(1)
        else:
                if not (args.list and args.month and args.destination):
-                       print "Must specify list, month and destination when generating a single mailbox"
+                       print("Must specify list, month and destination when generating a single mailbox")
                        parser.print_help()
                        sys.exit(1)
 
@@ -85,14 +92,14 @@ if __name__ == "__main__":
                                if not os.path.isdir(fullpath):
                                        os.makedirs(fullpath)
                                if not args.quiet:
-                                       print "Generating {0}-{1} for {2}".format(year, month, lname)
+                                       print("Generating {0}-{1} for {2}".format(year, month, lname))
                                generate_single_mbox(conn, lid, year, month,
                                                                         os.path.join(fullpath, "{0}.{0:04d}{1:02d}".format(year, month)))
        else:
                # Parse year and month
                m = re.match('^(\d{4})-(\d{2})$', args.month)
                if not m:
-                       print "Month must be specified on format YYYY-MM, not {0}".format(args.month)
+                       print("Month must be specified on format YYYY-MM, not {0}".format(args.month))
                        sys.exit(1)
                year = int(m.group(1))
                month = int(m.group(2))
@@ -101,9 +108,9 @@ if __name__ == "__main__":
                        'name': args.list,
                })
                if curs.rowcount != 1:
-                       print "List {0} not found.".format(args.list)
+                       print("List {0} not found.".format(args.list))
                        sys.exit(1)
 
                if not args.quiet:
-                       print "Generating {0}-{1} for {2}".format(year, month, args.list)
+                       print("Generating {0}-{1} for {2}".format(year, month, args.list))
                generate_single_mbox(conn, curs.fetchone()[0], year, month, args.destination)
index 51bffc6b1267c6996ba003746756fd599330d716..8bb9359662070f78de776af7d0736539fa76e4bd 100755 (executable)
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 #
 # hide_message.py - hide a message (spam etc) in the archives, including
 # frontend expiry.
@@ -8,7 +8,7 @@ import os
 import sys
 
 from optparse import OptionParser
-from ConfigParser import ConfigParser
+from configparser import ConfigParser
 
 import psycopg2
 
@@ -29,12 +29,12 @@ if __name__ == "__main__":
        (opt, args) = optparser.parse_args()
 
        if (len(args)):
-               print "No bare arguments accepted"
+               print("No bare arguments accepted")
                optparser.print_help()
                sys.exit(1)
 
        if not opt.msgid:
-               print "Message-id must be specified"
+               print("Message-id must be specified")
                optparser.print_help()
                sys.exit(1)
 
@@ -52,34 +52,34 @@ if __name__ == "__main__":
                'msgid': opt.msgid,
        })
        if curs.rowcount <= 0:
-               print "Message not found."
+               print("Message not found.")
                sys.exit(1)
 
        id, threadid, previous = curs.fetchone()
 
        # Message found, ask for reason
        reason = 0
-       print "Current status: %s" % reasons[previous or 0]
-       print "\n".join("%s - %s " % (n, reasons[n]) for n in range(len(reasons)))
+       print("Current status: %s" % reasons[previous or 0])
+       print("\n".join("%s - %s " % (n, reasons[n]) for n in range(len(reasons))))
        while True:
-               reason = raw_input('Reason for hiding message? ')
+               reason = input('Reason for hiding message? ')
                try:
                        reason = int(reason)
                except ValueError:
                        continue
 
                if reason == 0:
-                       print "Un-hiding message"
+                       print("Un-hiding message")
                        reason = None
                        break
                else:
                        try:
-                               print "Hiding message for reason: %s" % reasons[reason]
+                               print("Hiding message for reason: %s" % reasons[reason])
                        except:
                                continue
                        break
        if previous == reason:
-               print "No change in status, not updating"
+               print("No change in status, not updating")
                conn.close()
                sys.exit(0)
 
@@ -88,7 +88,7 @@ if __name__ == "__main__":
                'id': id,
        })
        if curs.rowcount != 1:
-               print "Failed to update! Not hiding!"
+               print("Failed to update! Not hiding!")
                conn.rollback()
                sys.exit(0)
        conn.commit()
@@ -96,4 +96,4 @@ if __name__ == "__main__":
        VarnishPurger(cfg).purge([int(threadid), ])
        conn.close()
 
-       print "Message hidden and varnish purge triggered."
+       print("Message hidden and varnish purge triggered.")
index 82e72fbc0227846b7663a571849f4766553ed422..5b6379a01b2354d8e98bff6a9929a75212fc3204 100644 (file)
@@ -7,13 +7,13 @@ class Log(object):
 
        def status(self, msg):
                if self.verbose:
-                       print msg
+                       print(msg)
 
        def log(self, msg):
-               print msg
+               print(msg)
 
        def error(self, msg):
-               print msg
+               print(msg)
 
        def print_status(self):
                opstatus.print_status()
@@ -27,7 +27,7 @@ class OpStatus(object):
                self.overwritten = 0
 
        def print_status(self):
-               print "%s stored, %s new-list tagged, %s dupes, %s failed, %s overwritten" % (self.stored, self.tagged, self.dupes, self.failed, self.overwritten)
+               print("%s stored, %s new-list tagged, %s dupes, %s failed, %s overwritten" % (self.stored, self.tagged, self.dupes, self.failed, self.overwritten))
 
 
 log = Log()
index c4982eda37a55762f9a9d96500ceeb682da62791..77c83b0fb52d7a0f6550ac6612b277ff8889ec7b 100644 (file)
@@ -1,5 +1,5 @@
 from subprocess import Popen, PIPE
-import cStringIO as StringIO
+from io import BytesIO
 
 # The hack of all hacks...
 # The python mbox parser fails to split some messages from mj2
@@ -8,6 +8,7 @@ import cStringIO as StringIO
 # reassemble it to one long stream with a unique separator,
 # and then split it apart again in python.. Isn't it cute?
 SEPARATOR = "ABCARCHBREAK123" * 50
+bSEPARATOR = bytes(SEPARATOR, 'ascii')
 
 class MailboxBreakupParser(object):
        def __init__(self, fn):
@@ -27,21 +28,21 @@ class MailboxBreakupParser(object):
        def stderr_output(self):
                return self.pipe.stderr.read()
 
-       def next(self):
-               sio = StringIO.StringIO()
+       def __next__(self):
+               sio = BytesIO()
                while True:
                        try:
-                               l = self.pipe.stdout.next()
+                               l = next(self.pipe.stdout)
                        except StopIteration:
                                # End of file!
                                self.EOF = True
                                if sio.tell() == 0:
                                        # Nothing read yet, so return None instead of an empty
-                                       # stringio
+                                       # bytesio
                                        return None
                                sio.seek(0)
                                return sio
-                       if l.rstrip() == SEPARATOR:
+                       if l.rstrip() == bSEPARATOR:
                                # Reached a separator. Meaning we're not at end of file,
                                # but we're at end of message.
                                sio.seek(0)
index 15009c448db5e763db118ee313e1f10296c0ffed..8ee25c5ad3a339467916d1efe98f688738c8be53 100644 (file)
@@ -2,23 +2,24 @@ import re
 import datetime
 import dateutil.parser
 
-from email.parser import Parser
-from email.header import decode_header
+from email.parser import BytesParser
+from email.header import decode_header, Header
 from email.errors import HeaderParseError
-from HTMLParser import HTMLParser, HTMLParseError
+from email.policy import compat32
+from html.parser import HTMLParser
 import tidylib
-import StringIO
+import io
 
 from lib.exception import IgnorableException
 from lib.log import log
 
 class ArchivesParser(object):
        def __init__(self):
-               self.parser = Parser()
+               self.parser = BytesParser(policy=compat32)
 
        def parse(self, stream):
                self.rawtxt = stream.read()
-               self.msg = self.parser.parse(StringIO.StringIO(self.rawtxt))
+               self.msg = self.parser.parse(io.BytesIO(self.rawtxt))
 
        def is_msgid(self, msgid):
                # Look for a specific messageid. This means we might parse it twice,
@@ -26,7 +27,7 @@ class ArchivesParser(object):
                try:
                        if self.clean_messageid(self.decode_mime_header(self.get_mandatory('Message-ID'))) == msgid:
                                return True
-               except Exception, e:
+               except Exception as e:
                        return False
 
        def analyze(self, date_override=None):
@@ -49,13 +50,13 @@ class ArchivesParser(object):
                self.parents = []
                # The first one is in-reply-to, if it exists
                if self.get_optional('in-reply-to'):
-                       m = self.clean_messageid(self.get_optional('in-reply-to'), True)
+                       m = self.clean_messageid(self.decode_mime_header(self.get_optional('in-reply-to')), True)
                        if m:
                                self.parents.append(m)
 
                # Then we add all References values, in backwards order
                if self.get_optional('references'):
-                       cleaned_msgids = [self.clean_messageid(x, True) for x in reversed(self.get_optional('references').split())]
+                       cleaned_msgids = [self.clean_messageid(x, True) for x in reversed(self.decode_mime_header(self.get_optional('references')).split())]
                        # Can't do this with a simple self.parents.extend() due to broken
                        # mailers that add the same reference more than once. And we can't
                        # use a set() to make it unique, because order is very important
@@ -130,19 +131,19 @@ class ArchivesParser(object):
                        params = msg.get_params()
                        if not params:
                                # No content-type, so we assume us-ascii
-                               return unicode(b, 'us-ascii', errors='ignore')
+                               return str(b, 'us-ascii', errors='ignore')
                        for k,v in params:
                                if k.lower() == 'charset':
                                        charset = v
                                        break
                        if charset:
                                try:
-                                       return unicode(b, self.clean_charset(charset), errors='ignore')
-                               except LookupError, e:
+                                       return str(b, self.clean_charset(charset), errors='ignore')
+                               except LookupError as e:
                                        raise IgnorableException("Failed to get unicode payload: %s" % e)
                        else:
                                # XXX: reasonable default?
-                               return unicode(b, errors='ignore')
+                               return str(b, errors='ignore')
                # Return None or empty string, depending on what we got back
                return b
 
@@ -154,8 +155,8 @@ class ArchivesParser(object):
                if b:
                        # Python bug 9133, allows unicode surrogate pairs - which PostgreSQL will
                        # later reject..
-                       if b.find(u'\udbff\n\udef8'):
-                               b = b.replace(u'\udbff\n\udef8', '')
+                       if b.find('\udbff\n\udef8'):
+                               b = b.replace('\udbff\n\udef8', '')
 
                # Remove postgres specific mail footer - if it's there
                m = self._re_footer.match(b)
@@ -249,15 +250,15 @@ class ArchivesParser(object):
                # If this is a header-encoded filename, start by decoding that
                if filename.startswith('=?'):
                        decoded, encoding = decode_header(filename)[0]
-                       return unicode(decoded, encoding, errors='ignore')
+                       return str(decoded, encoding, errors='ignore')
 
                # If it's already unicode, just return it
-               if isinstance(filename, unicode):
+               if isinstance(filename, str):
                        return filename
 
                # Anything that's not UTF8, we just get rid of. We can live with
                # filenames slightly mangled in this case.
-               return unicode(filename, 'utf-8', errors='ignore')
+               return str(filename, 'utf-8', errors='ignore')
 
        def _extract_filename(self, container):
                # Try to get the filename for an attachment in the container.
@@ -324,7 +325,7 @@ class ArchivesParser(object):
                                # by majordomo with the footer. So if that one is present,
                                # we need to explicitly exclude it again.
                                b = container.get_payload(decode=True)
-                               if not self._re_footer.match(b):
+                               if isinstance(b, str) and not self._re_footer.match(b):
                                        # We know there is no name for this one
                                        self.attachments.append((None, container.get_content_type(), b))
                                return
@@ -423,9 +424,14 @@ class ArchivesParser(object):
                                # enough...
                                dp = datetime.datetime(*dp.utctimetuple()[:6])
                        return dp
-               except Exception, e:
+               except Exception as e:
                        raise IgnorableException("Failed to parse date '%s': %s" % (d, e))
 
+       def _maybe_decode(self, s, charset):
+               if isinstance(s, str):
+                       return s.strip(' ')
+               return str(s, charset and self.clean_charset(charset) or 'us-ascii', errors='ignore').strip(' ')
+
        # Workaround for broken quoting in some MUAs (see below)
        _re_mailworkaround = re.compile('"(=\?[^\?]+\?[QB]\?[^\?]+\?=)"', re.IGNORECASE)
        def _decode_mime_header(self, hdr, email_workaround):
@@ -449,28 +455,32 @@ class ArchivesParser(object):
                        hdr = self._re_mailworkaround.sub(r'\1', hdr)
 
                try:
-                       return " ".join([unicode(s, charset and self.clean_charset(charset) or 'us-ascii', errors='ignore') for s,charset in decode_header(hdr)])
-               except HeaderParseError, e:
+                       return " ".join([self._maybe_decode(s, charset) for s, charset in decode_header(hdr)])
+               except HeaderParseError as e:
                        # Parser error is typically someone specifying an encoding,
                        # but then not actually using that encoding. We'll do the best
                        # we can, which is cut it down to ascii and ignore errors
-                       return unicode(hdr, 'us-ascii', errors='ignore')
+                       return str(hdr, 'us-ascii', errors='ignore').strip(' ')
 
        def decode_mime_header(self, hdr, email_workaround=False):
                try:
+                       if isinstance(hdr, Header):
+                               hdr = hdr.encode()
+
                        h = self._decode_mime_header(hdr, email_workaround)
                        if h:
                                return h.replace("\0", "")
                        return ''
-               except LookupError, e:
+               except LookupError as e:
                        raise IgnorableException("Failed to decode header value '%s': %s" % (hdr, e))
-               except ValueError, ve:
+               except ValueError as ve:
                        raise IgnorableException("Failed to decode header value '%s': %s" % (hdr, ve))
 
        def get_mandatory(self, fieldname):
                try:
                        x = self.msg[fieldname]
-                       if x==None: raise Exception()
+                       if x==None:
+                               raise Exception()
                        return x
                except:
                        raise IgnorableException("Mandatory field '%s' is missing" % fieldname)
@@ -496,17 +506,15 @@ class ArchivesParser(object):
                                                                                                   'show-info': 0,
                                                                                                   })
                if errors:
-                       print("HTML tidy failed for %s!" % self.msgid)
+                       print(("HTML tidy failed for %s!" % self.msgid))
                        print(errors)
                        return None
-               if type(html) == str:
-                       html = unicode(html, 'utf8')
 
                try:
                        cleaner = HTMLCleaner()
                        cleaner.feed(html)
                        return cleaner.get_text()
-               except HTMLParseError, e:
+               except Exception as e:
                        # Failed to parse the html, thus failed to clean it. so we must
                        # give up...
                        return None
@@ -515,7 +523,7 @@ class ArchivesParser(object):
 class HTMLCleaner(HTMLParser):
        def __init__(self):
                HTMLParser.__init__(self)
-               self.io = StringIO.StringIO()
+               self.io = io.StringIO()
 
        def get_text(self):
                return self.io.getvalue()
index 92ffa45f217c23c92194a540021b5e8895cdb9db..8962b879bae82756e7b5a282b390a280ff1559be 100644 (file)
@@ -1,6 +1,6 @@
 import difflib
 
-from parser import ArchivesParser
+from .parser import ArchivesParser
 
 from lib.log import log, opstatus
 
@@ -144,9 +144,9 @@ class ArchivesParserStorage(ArchivesParser):
                        # holding other threads together.
                        if self.threadid:
                                # Already have a threadid, means that we have a glue message
-                               print "Message %s resolved to existing thread %s, while being somebodys parent" % (self.msgid, self.threadid)
+                               print("Message %s resolved to existing thread %s, while being somebodys parent" % (self.msgid, self.threadid))
                        else:
-                               print "Message %s did not resolve to existing thread, but is somebodys parent" % self.msgid
+                               print("Message %s did not resolve to existing thread, but is somebodys parent" % self.msgid)
                                # In this case, just pick the first thread from the list and merge into that
                                # one.
                                self.threadid = childrows[0][2]
@@ -254,31 +254,27 @@ class ArchivesParserStorage(ArchivesParser):
                        })
                try:
                        id, _from, to, cc, subject, date, has_attachment, bodytxt = curs.fetchone()
-               except TypeError, e:
+               except TypeError as e:
                        f.write("---- %s ----\n" % self.msgid)
                        f.write("Could not re-find in archives (old id was %s): %s\n" % (oldid, e))
                        f.write("\n-------------------------------\n\n")
                        return
 
 
-               _from = _from.decode('utf8')
-               to = to.decode('utf8')
-               cc = cc.decode('utf8')
-               subject = subject.decode('utf8')
-               if (_from, to, cc, subject) != (self._from, self.to, self.cc, self.subject):
+               if (_from.rstrip(), to.rstrip(), cc.rstrip(), subject.rstrip()) != (self._from, self.to, self.cc, self.subject):
                        log.status("Message %s has header changes " % self.msgid)
                        f.write("==== %s ====\n" % self.msgid)
                        for fn in ['_from', 'to', 'cc', 'subject']:
                                if getattr(self, fn) != eval(fn):
-                                       s = u"- {0}: {1}\n".format(fn, eval(fn))
-                                       d = u"+ {0}: {1}\n".format(fn, getattr(self, fn))
+                                       s = "- {0}: {1}\n".format(fn, eval(fn))
+                                       d = "+ {0}: {1}\n".format(fn, getattr(self, fn))
                                        f.write(s)
                                        f.write(d)
                        f.write("\n\n")
 
-               if bodytxt.decode('utf8') != self.bodytxt:
+               if bodytxt != self.bodytxt:
                        log.status("Message %s has body changes " % self.msgid)
-                       tempdiff = list(difflib.unified_diff(bodytxt.decode('utf8').splitlines(),
+                       tempdiff = list(difflib.unified_diff(bodytxt.splitlines(),
                                                                                                 self.bodytxt.splitlines(),
                                                                                                 fromfile='old',
                                                                                                 tofile='new',
@@ -289,7 +285,9 @@ class ArchivesParserStorage(ArchivesParser):
                                # Then verify that each slice of 3 contains one @@ row (header), one -From and one +>From,
                                # which indicates the only change is in the From.
                                ok = True
-                               for a,b,c in map(None, *([iter(tempdiff[2:])] * 3)):
+                               tempdiff = tempdiff[2:]
+                               while tempdiff:
+                                       a,b,c = (tempdiff.pop(0), tempdiff.pop(0), tempdiff.pop(0))
                                        if not (a.startswith('@@ ') and b.startswith('-From ') and c.startswith('+>From ')):
                                                ok=False
                                                break
@@ -299,12 +297,12 @@ class ArchivesParserStorage(ArchivesParser):
 
 
                        # Generate a nicer diff
-                       d = list(difflib.unified_diff(bodytxt.decode('utf8').splitlines(),
-                                                                                                  self.bodytxt.splitlines(),
-                                                                                                  fromfile='old',
-                                                                                                  tofile='new',
-                                                                                                  n=0,
-                                                                                                  lineterm=''))
+                       d = list(difflib.unified_diff(bodytxt.splitlines(),
+                                                                                 self.bodytxt.splitlines(),
+                                                                                 fromfile='old',
+                                                                                 tofile='new',
+                                                                                 n=0,
+                                                                                 lineterm=''))
                        if len(d) > 0:
                                f.write("---- %s ----\n" % self.msgid)
                                f.write("\n".join(d))
index b49938b8bed5f95be4142b6214a6dd424314ffee..f2a06c3d74e9ae39c37b78dbf13f2cfbc4eb8803 100644 (file)
@@ -1,5 +1,4 @@
-import urllib
-import urllib2
+import requests
 
 from lib.log import log
 
@@ -23,13 +22,12 @@ class VarnishPurger(object):
                        else:
                                # Purging individual thread
                                exprlist.append('obj.http.x-pgthread ~ :%s:' % p)
-               purgedict = dict(zip(['p%s' % n for n in range(0, len(exprlist))], exprlist))
+               purgedict = dict(list(zip(['p%s' % n for n in range(0, len(exprlist))], exprlist)))
                purgedict['n'] = len(exprlist)
-               r = urllib2.Request(purgeurl, data=urllib.urlencode(purgedict))
-               r.add_header('Content-type', 'application/x-www-form-urlencoded')
-               r.add_header('Host', 'www.postgresql.org')
-               r.get_method = lambda: 'POST'
-               u = urllib2.urlopen(r)
-               if u.getcode() != 200:
+               r = requests.post(purgeurl, data=purgedict, headers={
+                       'Content-type': 'application/x-www-form-urlencoded',
+                       'Host': 'www.postgresql.org',
+               })
+               if r.status_code != 200:
                        log.error("Failed to send purge request!")
 
index b4668603d520b4f05ea810b003223049f58754e7..efb8626336ebedae9a1fb8bbad0748872d43f47f 100755 (executable)
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 #
 # load_message.py - takes a single email or mbox formatted
 # file on stdin or in a file and reads it into the database.
@@ -8,9 +8,9 @@ import os
 import sys
 
 from optparse import OptionParser
-from ConfigParser import ConfigParser
-import urllib
-import urllib2
+from configparser import ConfigParser
+import urllib.request, urllib.parse, urllib.error
+import urllib.request, urllib.error, urllib.parse
 
 import psycopg2
 
@@ -25,7 +25,7 @@ def log_failed_message(listid, srctype, src, msg, err):
                msgid = msg.msgid
        except:
                msgid = "<unknown>"
-       log.error("Failed to load message (msgid %s) from %s, spec %s: %s" % (msgid.encode('us-ascii', 'replace'), srctype, src, unicode(str(err), 'us-ascii', 'replace')))
+       log.error("Failed to load message (msgid %s) from %s, spec %s: %s" % (msgid.encode('us-ascii', 'replace'), srctype, src, str(str(err), 'us-ascii', 'replace')))
 
        # We also put the data in the db. This happens in the main transaction
        # so if the whole script dies, it goes away...
@@ -34,7 +34,7 @@ def log_failed_message(listid, srctype, src, msg, err):
                        'msgid': msgid,
                        'srctype': srctype,
                        'src': src,
-                       'err': unicode(str(err), 'us-ascii', 'replace'),
+                       'err': str(str(err), 'us-ascii', 'replace'),
                        })
 
 
@@ -51,27 +51,27 @@ if __name__ == "__main__":
        (opt, args) = optparser.parse_args()
 
        if (len(args)):
-               print "No bare arguments accepted"
+               print("No bare arguments accepted")
                optparser.print_usage()
                sys.exit(1)
 
        if not opt.list:
-               print "List must be specified"
+               print("List must be specified")
                optparser.print_usage()
                sys.exit(1)
 
        if opt.directory and opt.mbox:
-               print "Can't specify both directory and mbox!"
+               print("Can't specify both directory and mbox!")
                optparser.print_usage()
                sys.exit(1)
 
        if opt.force_date and (opt.directory or opt.mbox) and not opt.filter_msgid:
-               print "Can't use force_date with directory or mbox - only individual messages"
+               print("Can't use force_date with directory or mbox - only individual messages")
                optparser.print_usage()
                sys.exit(1)
 
        if opt.filter_msgid and not (opt.directory or opt.mbox):
-               print "filter_msgid makes no sense without directory or mbox!"
+               print("filter_msgid makes no sense without directory or mbox!")
                optparser.print_usage()
                sys.exit(1)
 
@@ -93,8 +93,8 @@ if __name__ == "__main__":
        try:
                curs.execute("SET statement_timeout='30s'")
                curs.execute("SELECT pg_advisory_xact_lock(8059944559669076)")
-       except Exception, e:
-               print("Failed to wait on advisory lock: %s" % e)
+       except Exception as e:
+               print(("Failed to wait on advisory lock: %s" % e))
                sys.exit(1)
 
        # Get the listid we're working on
@@ -121,36 +121,37 @@ if __name__ == "__main__":
                                        continue
                                try:
                                        ap.analyze(date_override=opt.force_date)
-                               except IgnorableException, e:
+                               except IgnorableException as e:
                                        log_failed_message(listid, "directory", os.path.join(opt.directory, x), ap, e)
                                        opstatus.failed += 1
                                        continue
                                ap.store(conn, listid)
                                purges.update(ap.purges)
                        if opt.interactive:
-                               print "Interactive mode, committing transaction"
+                               print("Interactive mode, committing transaction")
                                conn.commit()
-                               print "Proceed to next message with Enter, or input a period (.) to stop processing"
-                               x = raw_input()
+                               print("Proceed to next message with Enter, or input a period (.) to stop processing")
+                               x = input()
                                if x == '.':
-                                       print "Ok, aborting!"
+                                       print("Ok, aborting!")
                                        break
-                               print "---------------------------------"
+                               print("---------------------------------")
        elif opt.mbox:
                if not os.path.isfile(opt.mbox):
-                       print "File %s does not exist" % opt.mbox
+                       print("File %s does not exist" % opt.mbox)
                        sys.exit(1)
                mboxparser = MailboxBreakupParser(opt.mbox)
                while not mboxparser.EOF:
                        ap = ArchivesParserStorage()
-                       msg = mboxparser.next()
-                       if not msg: break
+                       msg = next(mboxparser)
+                       if not msg:
+                               break
                        ap.parse(msg)
                        if opt.filter_msgid and not ap.is_msgid(opt.filter_msgid):
                                continue
                        try:
                                ap.analyze(date_override=opt.force_date)
-                       except IgnorableException, e:
+                       except IgnorableException as e:
                                log_failed_message(listid, "mbox", opt.mbox, ap, e)
                                opstatus.failed += 1
                                continue
@@ -163,10 +164,10 @@ if __name__ == "__main__":
        else:
                # Parse single message on stdin
                ap = ArchivesParserStorage()
-               ap.parse(sys.stdin)
+               ap.parse(sys.stdin.buffer)
                try:
                        ap.analyze(date_override=opt.force_date)
-               except IgnorableException, e:
+               except IgnorableException as e:
                        log_failed_message(listid, "stdin","", ap, e)
                        conn.close()
                        sys.exit(1)
index 32c68209fd5425dfaa6e027374d19967c3f39e67..e38cdd4baee48e4471334eb3f7b10c62f2beb892 100755 (executable)
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 # Synchronize list info from pglister
@@ -6,7 +6,7 @@
 import os
 import sys
 import argparse
-from ConfigParser import ConfigParser
+from configparser import ConfigParser
 import psycopg2
 import requests
 
@@ -44,7 +44,7 @@ if __name__=="__main__":
        # For groups, just add them if they don't exist
        groups = {g['group']['id']:g['group']['groupname'] for g in obj}
 
-       for id,name in groups.items():
+       for id,name in list(groups.items()):
                curs.execute("SELECT EXISTS (SELECT 1 FROM listgroups WHERE groupname=%(group)s)", {
                        'group': name,
                })
@@ -52,7 +52,7 @@ if __name__=="__main__":
                        curs.execute("INSERT INTO listgroups (groupname, sortkey) VALUES (%(group)s, 100) RETURNING groupname", {
                                'group': name,
                        })
-                       print "Added group %s" % name
+                       print("Added group %s" % name)
 
        # Add any missing lists, and synchronize their contents.
        for l in obj:
@@ -66,7 +66,7 @@ if __name__=="__main__":
                                'groupname': l['group']['groupname'],
                        })
                        listid, name = curs.fetchone()
-                       print "Added list %s" % name
+                       print("Added list %s" % name)
                else:
                        listid, name = curs.fetchone()
                        curs.execute("UPDATE lists SET shortdesc=%(name)s, description=%(desc)s, groupid=(SELECT groupid FROM listgroups WHERE groupname=%(groupname)s), active=true WHERE listid=%(id)s AND NOT (active AND shortdesc=%(name)s AND description=%(desc)s AND groupid=(SELECT groupid FROM listgroups WHERE groupname=%(groupname)s)) RETURNING listname", {
@@ -76,7 +76,7 @@ if __name__=="__main__":
                                'groupname': l['group']['groupname'],
                        })
                        for n, in curs.fetchall():
-                               print "Updated list %s " % n
+                               print("Updated list %s " % n)
 
                if do_subscribers:
                        # If we synchronize subscribers, we do so on all lists for now.
@@ -86,9 +86,9 @@ if __name__=="__main__":
                        })
                        for what, who in curs.fetchall():
                                if what == 'ins':
-                                       print "Added subscriber %s to list %s" % (who, name)
+                                       print("Added subscriber %s to list %s" % (who, name))
                                else:
-                                       print "Removed subscriber %s from list %s" % (who, name)
+                                       print("Removed subscriber %s from list %s" % (who, name))
 
 
        # We don't remove lists ever, because we probably want to keep archives around.
@@ -97,10 +97,10 @@ if __name__=="__main__":
                'lists': [l['listname'] for l in obj],
        })
        for n, in curs.fetchall():
-               print "List %s exists in archives, but not in upstream! Should it be marked inactive?" % n
+               print("List %s exists in archives, but not in upstream! Should it be marked inactive?" % n)
 
        if args.dryrun:
-               print "Dry-run, rolling back"
+               print("Dry-run, rolling back")
                conn.rollback()
        else:
                conn.commit()
index edab70cf7ec01e92a9b15f27c169d99d16494a14..72899e80c4d5af8c8b308d1f6f8c39d7bbc963a6 100755 (executable)
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 #
 # purge_frontend_message.py - issue varnish purge for the message
 # in question, to for example force an expire of a hidden message.
@@ -8,7 +8,7 @@ import os
 import sys
 
 from optparse import OptionParser
-from ConfigParser import ConfigParser
+from configparser import ConfigParser
 
 import psycopg2
 
@@ -21,12 +21,12 @@ if __name__ == "__main__":
        (opt, args) = optparser.parse_args()
 
        if (len(args)):
-               print "No bare arguments accepted"
+               print("No bare arguments accepted")
                optparser.print_help()
                sys.exit(1)
 
        if not opt.msgid:
-               print "Message-id must be specified"
+               print("Message-id must be specified")
                optparser.print_help()
                sys.exit(1)
 
index 802705840adb8bd6b4f78cceefac05acbbda017e..df4501a307b0e4670a31eb690614d57b9ed1dd9e 100755 (executable)
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 #
 # reparse_message.py - using the rawtxt stored in the database,
 # redo the parsing of it and overwrite it with itself. Used when
@@ -10,8 +10,8 @@ import sys
 import codecs
 
 from optparse import OptionParser
-from ConfigParser import ConfigParser
-from StringIO import StringIO
+from configparser import ConfigParser
+from io import BytesIO
 from datetime import datetime, timedelta
 
 import psycopg2
@@ -45,16 +45,16 @@ if __name__ == "__main__":
        (opt, args) = optparser.parse_args()
 
        if (len(args)):
-               print "No bare arguments accepted"
+               print("No bare arguments accepted")
                optparser.print_usage()
                sys.exit(1)
 
        if sum([1 for x in [opt.all, opt.sample, opt.msgid] if x]) != 1:
-               print "Must specify exactly one of --msgid, --all and --sample"
+               print("Must specify exactly one of --msgid, --all and --sample")
                sys.exit(1)
 
        if not opt.update and os.path.exists('reparse.diffs'):
-               print "File reparse.diffs already exists. Remove or rename and try again."
+               print("File reparse.diffs already exists. Remove or rename and try again.")
                sys.exit(1)
 
        log.set(opt.verbose)
@@ -97,10 +97,10 @@ if __name__ == "__main__":
        for id, rawtxt in ResultIter(curs):
                num += 1
                ap = ArchivesParserStorage()
-               ap.parse(StringIO(rawtxt))
+               ap.parse(BytesIO(rawtxt))
                try:
                        ap.analyze(date_override=opt.force_date)
-               except IgnorableException, e:
+               except IgnorableException as e:
                        if opt.update:
                                raise e
                        f.write("Exception loading %s: %s" % (id, e))
@@ -119,14 +119,14 @@ if __name__ == "__main__":
                        sys.stdout.flush()
                        laststatus = datetime.now()
 
-       print ""
+       print("")
 
        if opt.update:
                opstatus.print_status()
                if not opt.commit:
                        while True:
                                print("OK to commit transaction? ")
-                               a = raw_input().lower().strip()
+                               a = input().lower().strip()
                                if a == 'y' or a == 'yes':
                                        print("Ok, committing.")
                                        break