summaryrefslogtreecommitdiff
path: root/loader/clean_date.py
blob: 2297be6428d344cd3eca663146a055f60df60aed (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#!/usr/bin/env python3
#
# Clean up old, broken, dates
#

import os
import sys
import re

from configparser import ConfigParser

from email.parser import Parser
from urllib.request import urlopen
import dateutil.parser

import psycopg2


def scan_message(messageid, olddate, curs):
    u = "http://archives.postgresql.org/msgtxt.php?id=%s" % messageid
    print("Scanning message at %s (date reported as %s)..." % (u, olddate))

    f = urlopen(u)
    p = Parser()
    msg = p.parse(f)
    f.close()

    # Can be either one of them, but we really don't care...
    ds = None
    for k, r in list(msg.items()):
        if k != 'Received':
            continue

        print("Trying on %s" % r)
        m = re.search(';\s*(.*)$', r)
        if m:
            ds = m.group(1)
            break
        m = re.search(';\s*(.*)\s*\(envelope-from [^\)]+\)$', r)
        if m:
            ds = m.group(1)
            break

    if not ds:
        print("Could not find date. Sorry.")
        return False
    d = None
    try:
        d = dateutil.parser.parse(ds)
    except:
        print("Could not parse date '%s', sorry." % ds)
        return

    while True:
        x = input("Parsed this as date %s. Update? " % d)
        if x.upper() == 'Y':
            curs.execute("UPDATE messages SET date=%(d)s WHERE messageid=%(m)s", {
                'd': d,
                'm': messageid,
            })
            print("Updated.")
            break
        elif x.upper() == 'N':
            break


if __name__ == "__main__":
    cfg = ConfigParser()
    cfg.read('%s/archives.ini' % os.path.realpath(os.path.dirname(sys.argv[0])))
    connstr = cfg.get('db', 'connstr')

    conn = psycopg2.connect(connstr)

    curs = conn.cursor()
    curs.execute("SELECT messageid, date FROM messages WHERE date>(CURRENT_TIMESTAMP+'1 day'::interval) OR date < '1997-01-01'")
    for messageid, date in curs.fetchall():
        scan_message(messageid, date, curs)

    conn.commit()
    print("Done.")