from Crypto import Random
import time
+
class AuthBackend(ModelBackend):
# We declare a fake backend that always fails direct authentication -
# since we should never be using direct authentication in the first place!
r = Random.new()
iv = r.read(16)
encryptor = AES.new(SHA.new(settings.SECRET_KEY.encode('ascii')).digest()[:16], AES.MODE_CBC, iv)
- cipher = encryptor.encrypt(s + ' ' * (16-(len(s) % 16))) # pad to 16 bytes
+ cipher = encryptor.encrypt(s + ' ' * (16 - (len(s) % 16))) # pad to 16 bytes
return HttpResponseRedirect("%s?d=%s$%s" % (
settings.PGAUTH_REDIRECT,
else:
return HttpResponseRedirect(settings.PGAUTH_REDIRECT)
+
# Handle logout requests by logging out of this site and then
# redirecting to log out from the main site as well.
def logout(request):
django_logout(request)
return HttpResponseRedirect("%slogout/" % settings.PGAUTH_REDIRECT)
+
# Receive an authentication response from the main website and try
# to log the user in.
def auth_receive(request):
changed = True
if user.email != data['e'][0]:
user.email = data['e'][0]
- changed= True
+ changed = True
if changed:
user.save()
except User.DoesNotExist:
return j
+
# Import a user into the local authentication system. Will initially
# make a search for it, and if anything other than one entry is returned
# the import will fail.
return resp
+
@cache(hours=4)
def latest(request, listname):
if not settings.PUBLIC_ARCHIVES:
if limit <= 0 or limit > 100:
limit = 50
- extrawhere=[]
- extraparams=[]
+ extrawhere = []
+ extraparams = []
# Return only messages that have attachments?
if 'a' in request.GET:
extrawhere.append("threadid IN (SELECT threadid FROM list_threads WHERE listid=%s)" % list.listid)
else:
list = None
- extrawhere=''
+ extrawhere = ''
mlist = Message.objects.defer('bodytxt', 'cc', 'to').select_related().extra(where=extrawhere, params=extraparams).order_by('-date')[:limit]
allyearmonths = set([(m.date.year, m.date.month) for m in mlist])
resp = HttpResponse(content_type='application/json')
json.dump([
- {'msgid': m.messageid,
- 'date': m.date.isoformat(),
- 'from': m.mailfrom,
- 'subj': m.subject,}
+ {
+ 'msgid': m.messageid,
+ 'date': m.date.isoformat(),
+ 'from': m.mailfrom,
+ 'subj': m.subject,
+ }
for m in mlist], resp)
# Make sure this expires from the varnish cache when new entries show
resp['X-pgthread'] = m.threadid
return resp
+
def thread_subscribe(request, msgid):
if not settings.PUBLIC_ARCHIVES:
return HttpResponseForbidden('No API access on private archives for now')
# we might need that flexibility in the future.
hide_reasons = [
None, # placeholder for 0
- 'This message has been hidden because a virus was found in the message.', # 1
- 'This message has been hidden because the message violated policies.', # 2
- 'This message has been hidden because for privacy reasons.', # 3
- 'This message was corrupt', # 4
+ 'This message has been hidden because a virus was found in the message.', # 1
+ 'This message has been hidden because the message violated policies.', # 2
+ 'This message has been hidden because for privacy reasons.', # 3
+ 'This message was corrupt', # 4
]
# multiple times from templates without generating multiple queries
# to the database.
_attachments = None
+
@property
def attachments(self):
if not self._attachments:
# Weird value
return 'This message has been hidden.'
+
class ListGroup(models.Model):
groupid = models.IntegerField(null=False, primary_key=True)
groupname = models.CharField(max_length=200, null=False, blank=False)
class Meta:
db_table = 'listgroups'
+
class List(models.Model):
listid = models.IntegerField(null=False, primary_key=True)
listname = models.CharField(max_length=200, null=False, blank=False, unique=True)
group = models.ForeignKey(ListGroup, db_column='groupid')
subscriber_access = models.BooleanField(null=False, blank=False, default=False, help_text="Subscribers can access contents (default is admins only)")
-
@property
def maybe_shortdesc(self):
if self.shortdesc:
class Meta:
db_table = 'lists'
+
class Attachment(models.Model):
message = models.ForeignKey(Message, null=False, blank=False, db_column='message')
filename = models.CharField(max_length=1000, null=False, blank=False)
unique_together = (('list', 'username'), )
db_table = 'listsubscribers'
+
class ApiClient(models.Model):
apikey = models.CharField(max_length=100, null=False, blank=False)
postback = models.URLField(max_length=500, null=False, blank=False)
class Meta:
db_table = 'apiclients'
+
class ThreadSubscription(models.Model):
apiclient = models.ForeignKey(ApiClient, null=False, blank=False)
threadid = models.IntegerField(null=False, blank=False)
from django import shortcuts
+
class ERedirect(Exception):
def __init__(self, url):
self.url = url
+
class RedirectMiddleware(object):
def process_exception(self, request, exception):
if isinstance(exception, ERedirect):
register = template.Library()
+
def _rewrite_email(value):
- return value.replace('@', '(at)').replace('.','(dot)')
+ return value.replace('@', '(at)').replace('.', '(dot)')
+
@register.filter(name='hidemail')
@stringfilter
def hidemail(value):
return _rewrite_email(value)
+
# A regular expression and replacement function to mangle email addresses.
#
# The archived messages contain a lot of links to other messages in the
# Those are not email addresses, so ignore them. The links won't work if they
# are mangled.
_re_mail = re.compile('(/m(essage-id)?/)?[^()<>@,;:\/\s"\'&|]+@[^()<>@,;:\/\s"\'&|]+')
+
+
def _rewrite_email_match(match):
if match.group(1):
return match.group(0) # was preceded by /message-id/
else:
return _rewrite_email(match.group(0))
+
@register.filter(name='hideallemail')
@stringfilter
def hideallemail(value):
return _re_mail.sub(lambda x: _rewrite_email_match(x), value)
+
@register.filter(name='nameonly')
@stringfilter
def nameonly(value):
return name
return email.split('@')[0]
+
@register.filter(name='md5')
@stringfilter
def md5(value):
from .models import *
+
# Ensure the user is logged in (if it's not public lists)
def ensure_logged_in(request):
if settings.PUBLIC_ARCHIVES:
return
raise ERedirect('%s?next=%s' % (settings.LOGIN_URL, request.path))
+
# Ensure the user has permissions to access a list. If not, raise
# a permissions exception.
def ensure_list_permissions(request, l):
# Redirect to a login page
raise ERedirect('%s?next=%s' % (settings.LOGIN_URL, request.path))
+
# Ensure the user has permissions to access a message. In order to view
# a message, the user must have permissions on *all* lists the thread
# appears on.
# Redirect to a login page
raise ERedirect('%s?next=%s' % (settings.LOGIN_URL, request.path))
+
# Decorator to set cache age
def cache(days=0, hours=0, minutes=0, seconds=0):
"Set the server to cache object a specified time. td must be a timedelta object"
if settings.PUBLIC_ARCHIVES:
# Only set cache headers on public archives
td = timedelta(hours=hours, minutes=minutes, seconds=seconds)
- resp['Cache-Control'] = 's-maxage=%s' % (td.days*3600*24 + td.seconds)
+ resp['Cache-Control'] = 's-maxage=%s' % (td.days * 3600 * 24 + td.seconds)
return resp
return __cache
return _cache
+
def nocache(fn):
def _nocache(request, *_args, **_kwargs):
resp = fn(request, *_args, **_kwargs)
return resp
return _nocache
+
# Decorator to require http auth
def antispam_auth(fn):
def _antispam_auth(request, *_args, **_kwargs):
return _antispam_auth
-
def get_all_groups_and_lists(request, listid=None):
# Django doesn't (yet) support traversing the reverse relationship,
# so we'll get all the lists and rebuild it backwards.
'groupid': l.group.groupid,
'groupname': l.group.groupname,
'sortkey': l.group.sortkey,
- 'lists': [l,],
+ 'lists': [l, ],
'homelink': 'list/group/%s' % l.group.groupid,
}
if listname:
self.ctx.update({'searchform_listname': listname})
+
def render_nav(navcontext, template, ctx):
ctx.update(navcontext.ctx)
return render(navcontext.request, template, ctx)
+
@cache(hours=4)
def index(request):
ensure_logged_in(request)
@cache(hours=8)
def groupindex(request, groupid):
(groups, listgroupid) = get_all_groups_and_lists(request)
- mygroups = [{'groupname': g['groupname'], 'lists': g['lists']} for g in groups if g['groupid']==int(groupid)]
+ mygroups = [{'groupname': g['groupname'], 'lists': g['lists']} for g in groups if g['groupid'] == int(groupid)]
if len(mygroups) == 0:
raise Http404('List group does not exist')
'groups': mygroups,
})
+
@cache(hours=8)
def monthlist(request, listname):
l = get_object_or_404(List, listname=listname)
curs = connection.cursor()
curs.execute("SELECT year, month FROM list_months WHERE listid=%(listid)s ORDER BY year DESC, month DESC", {'listid': l.listid})
- months=[{'year':r[0],'month':r[1], 'date':datetime(r[0],r[1],1)} for r in curs.fetchall()]
+ months = [{'year': r[0], 'month': r[1], 'date': datetime(r[0], r[1], 1)} for r in curs.fetchall()]
return render_nav(NavContext(request, l.listid, l.listname), 'monthlist.html', {
'list': l,
'months': months,
})
+
def get_monthday_info(mlist, l, d):
allmonths = set([m.date.month for m in mlist])
monthdate = None
'daysinmonth': daysinmonth,
'yearmonth': yearmonth,
})
- r['X-pglm'] = ':%s:' % (':'.join(['%s/%s/%s' % (l.listid, year, month) for year,month in allyearmonths]))
+ r['X-pglm'] = ':%s:' % (':'.join(['%s/%s/%s' % (l.listid, year, month) for year, month in allyearmonths]))
return r
+
def render_datelist_from(request, l, d, title, to=None):
# NOTE! Basic permissions checks must be done before calling this function!
datefilter = Q(date__gte=d)
return _render_datelist(request, l, d, datefilter, title,
lambda x: list(x.order_by('date')[:200]))
+
def render_datelist_to(request, l, d, title):
# NOTE! Basic permissions checks must be done before calling this function!
return _render_datelist(request, l, d, Q(date__lte=d), title,
lambda x: sorted(x.order_by('-date')[:200], key=lambda m: m.date))
+
@cache(hours=2)
def datelistsince(request, listname, msgid):
l = get_object_or_404(List, listname=listname)
msg = get_object_or_404(Message, messageid=msgid)
return render_datelist_from(request, l, msg.date, "%s since %s" % (l.listname, msg.date.strftime("%Y-%m-%d %H:%M:%S")))
+
# Longer cache since this will be used for the fixed date links
@cache(hours=4)
def datelistsincetime(request, listname, year, month, day, hour, minute):
raise Http404("Invalid date format, not found")
return render_datelist_from(request, l, d, "%s since %s" % (l.listname, d.strftime("%Y-%m-%d %H:%M")))
+
@cache(hours=2)
def datelistbefore(request, listname, msgid):
l = get_object_or_404(List, listname=listname)
msg = get_object_or_404(Message, messageid=msgid)
return render_datelist_to(request, l, msg.date, "%s before %s" % (l.listname, msg.date.strftime("%Y-%m-%d %H:%M:%S")))
+
@cache(hours=2)
def datelistbeforetime(request, listname, year, month, day, hour, minute):
l = get_object_or_404(List, listname=listname)
raise Http404("Invalid date format, not found")
return render_datelist_to(request, l, d, "%s before %s" % (l.listname, d.strftime("%Y-%m-%d %H:%M")))
+
@cache(hours=4)
def datelist(request, listname, year, month):
l = get_object_or_404(List, listname=listname)
except ValueError:
raise Http404("Malformatted date, month not found")
- enddate = d+timedelta(days=31)
+ enddate = d + timedelta(days=31)
enddate = datetime(enddate.year, enddate.month, 1)
return render_datelist_from(request, l, d, "%s - %s %s" % (l.listname, d.strftime("%B"), d.year), enddate)
+
@cache(hours=4)
def attachment(request, attid):
# Use a direct query instead of django, since it has bad support for
return HttpResponse(r[0][3], content_type=r[0][1])
+
def _build_thread_structure(threadid):
# Yeah, this is *way* too complicated for the django ORM
curs = connection.cursor()
SELECT id,_from,subject,date,messageid,has_attachment,parentid,datepath FROM t ORDER BY datepath||date
""", {'threadid': threadid})
- for id,_from,subject,date,messageid,has_attachment,parentid,parentpath in curs.fetchall():
- yield {'id':id, 'mailfrom':_from, 'subject': subject, 'date': date, 'printdate': date.strftime("%Y-%m-%d %H:%M:%S"), 'messageid': messageid, 'hasattachment': has_attachment, 'parentid': parentid, 'indent': " " * len(parentpath)}
+ for id, _from, subject, date, messageid, has_attachment, parentid, parentpath in curs.fetchall():
+ yield {
+ 'id': id,
+ 'mailfrom': _from,
+ 'subject': subject,
+ 'date': date,
+ 'printdate': date.strftime("%Y-%m-%d %H:%M:%S"),
+ 'messageid': messageid,
+ 'hasattachment': has_attachment,
+ 'parentid': parentid,
+ 'indent': " " * len(parentpath),
+ }
def _get_nextprevious(listmap, dt):
}
return retval
+
@cache(hours=4)
def message(request, msgid):
ensure_message_permissions(request, msgid)
if ims >= newest:
return HttpResponseNotModified()
- responses = [t for t in threadstruct if t['parentid']==m.id]
+ responses = [t for t in threadstruct if t['parentid'] == m.id]
if m.parentid:
for t in threadstruct:
r['Last-Modified'] = http_date(newest)
return r
+
@cache(hours=4)
def message_flat(request, msgid):
ensure_message_permissions(request, msgid)
r['Last-Modified'] = http_date(newest)
return r
+
@nocache
@antispam_auth
def message_raw(request, msgid):
msg = parser.parse(s)
return msg.as_string(unixfrom=True)
-
def _message_stream(first):
yield _one_message(first[1])
r['Content-type'] = 'application/mbox'
return r
+
@nocache
@antispam_auth
def message_mbox(request, msgid):
},
msgid)
+
@nocache
@antispam_auth
def mbox(request, listname, listname2, mboxyear, mboxmonth):
query = query.replace('%%%', '')
return _build_mbox(query, params)
+
def search(request):
if not settings.PUBLIC_ARCHIVES:
# We don't support searching of non-public archives at all at this point.
's': subject,
'f': mailfrom,
'r': rank,
- 'a': abstract.replace("[[[[[[", "<b>").replace("]]]]]]","</b>"),
+ 'a': abstract.replace("[[[[[[", "<b>").replace("]]]]]]", "</b>"),
} for messageid, date, subject, mailfrom, rank, abstract in curs.fetchall()],
resp)
return resp
+
@cache(seconds=10)
def web_sync_timestamp(request):
s = datetime.now().strftime("%Y-%m-%d %H:%M:%S\n")
r['Content-Length'] = len(s)
return r
+
@cache(hours=8)
def legacy(request, listname, year, month, msgnum):
curs = connection.cursor()
raise Http404('Message does not exist')
return HttpResponsePermanentRedirect('/message-id/%s' % r[0][0])
+
# dynamic CSS serving, meaning we merge a number of different CSS into a
# single one, making sure it turns into a single http response. We do this
# dynamically, since the output will be cached.
_dynamic_cssmap = {
'base': ['media/css/main.css',
- 'media/css/normalize.css',],
+ 'media/css/normalize.css', ],
'docs': ['media/css/global.css',
'media/css/table.css',
'media/css/text.css',
'media/css/docs.css'],
}
+
@cache(hours=8)
def dynamic_css(request, css):
if css not in _dynamic_cssmap:
return resp
+
# Redirect to the requested url, with a slash first. This is used to remove
# trailing slashes on messageid links by doing a permanent redirect. This is
# better than just eating them, since this way we only end up with one copy
def slash_redirect(request, url):
return HttpResponsePermanentRedirect("/%s" % url)
+
# Redirect the requested URL to whatever happens to be in the regexp capture.
# This is used for user agents that generate broken URLs that are easily
# captured using regexp.
DATABASES = {
'default': {
- 'ENGINE': 'django.db.backends.postgresql_psycopg2', # Add 'postgresql_psycopg2', 'postgresql', 'mysql', 'sqlite3' or 'oracle'.
+ 'ENGINE': 'django.db.backends.postgresql_psycopg2', # Add 'postgresql_psycopg2', 'postgresql', 'mysql', 'sqlite3' or 'oracle'.
'NAME': 'archives', # Or path to database file if using sqlite3.
'USER': '', # Not used with sqlite3.
'PASSWORD': '', # Not used with sqlite3.
# Don't forget to use absolute paths, not relative paths.
)
-# List of finder classes that know how to find static files in
-# various locations.
-#STATICFILES_FINDERS = (
-# 'django.contrib.staticfiles.finders.FileSystemFinder',
-# 'django.contrib.staticfiles.finders.AppDirectoriesFinder',
-# 'django.contrib.staticfiles.finders.DefaultStorageFinder',
-#)
-
# Make this unique, and don't share it with anybody.
SECRET_KEY = '7j9q&&!g26rkh!=g%1zb@20b^k^gmzy4=!mhzu2wesxb9b%16m'
}
}
-
-
# Required for lighttpd
-FORCE_SCRIPT_NAME=""
+FORCE_SCRIPT_NAME = ""
# Always override!
SEARCH_CLIENTS = ('127.0.0.1',)
] + INSTALLED_APPS
from archives.util import validate_new_user
- PGAUTH_CREATEUSER_CALLBACK=validate_new_user
+ PGAUTH_CREATEUSER_CALLBACK = validate_new_user
from django.db import connection
from django.utils.functional import SimpleLazyObject
+
def validate_new_user(username, email, firstname, lastname):
# Only allow user creation if they are already a subscriber
curs = connection.cursor()
return HttpResponse("You are not currently subscribed to any mailing list on this server. Account not created.")
+
def _get_gitrev():
# Return the current git revision, that is used for
# cache-busting URLs.
# If packed-refs also can't be read, just give up
return 'eeeeeeee'
+
# Template context processor to add information about the root link and
# the current git revision. git revision is returned as a lazy object so
# we don't spend effort trying to load it if we don't need it (though
import psycopg2
+
def scan_message(messageid, olddate, curs):
u = "http://archives.postgresql.org/msgtxt.php?id=%s" % messageid
print("Scanning message at %s (date reported as %s)..." % (u, olddate))
# Can be either one of them, but we really don't care...
ds = None
- for k,r in list(msg.items()):
+ for k, r in list(msg.items()):
if k != 'Received': continue
print("Trying on %s" % r)
elif x.upper() == 'N':
break
+
if __name__ == "__main__":
cfg = ConfigParser()
cfg.read('%s/archives.ini' % os.path.realpath(os.path.dirname(sys.argv[0])))
- connstr = cfg.get('db','connstr')
+ connstr = cfg.get('db', 'connstr')
conn = psycopg2.connect(connstr)
parser.print_help()
sys.exit(1)
-
# Arguments OK, now connect
cfg = ConfigParser()
cfg.read(os.path.join(os.path.realpath(os.path.dirname(sys.argv[0])), 'archives.ini'))
try:
- connstr = cfg.get('db','connstr')
+ connstr = cfg.get('db', 'connstr')
except:
connstr = 'need_connstr'
# Same month, so do it
monthrange = ((today.year, today.month),)
else:
- monthrange = ((today.year, today.month),(yesterday.year, yesterday.month))
+ monthrange = ((today.year, today.month), (yesterday.year, yesterday.month))
for lid, lname in all_lists:
for year, month in monthrange:
fullpath = os.path.join(args.destination, lname, 'files/public/archive')
from lib.varnish import VarnishPurger
reasons = [
- None, # Placeholder for 0
+ None, # Placeholder for 0
"virus",
"violates policies",
"privacy",
cfg = ConfigParser()
cfg.read('%s/archives.ini' % os.path.realpath(os.path.dirname(sys.argv[0])))
try:
- connstr = cfg.get('db','connstr')
+ connstr = cfg.get('db', 'connstr')
except:
connstr = 'need_connstr'
def print_status(self):
opstatus.print_status()
+
class OpStatus(object):
def __init__(self):
self.stored = 0
log = Log()
opstatus = OpStatus()
-
SEPARATOR = "ABCARCHBREAK123" * 50
bSEPARATOR = bytes(SEPARATOR, 'ascii')
+
class MailboxBreakupParser(object):
def __init__(self, fn):
self.EOF = False
from lib.exception import IgnorableException
from lib.log import log
+
class ArchivesParser(object):
def __init__(self):
self.parser = BytesParser(policy=compat32)
if m and not m in self.parents:
self.parents.append(m)
-
def clean_charset(self, charset):
lcharset = charset.lower()
if lcharset == 'unknown-8bit' or lcharset == 'x-unknown' or lcharset == 'unknown':
if not params:
# No content-type, so we assume us-ascii
return str(b, 'us-ascii', errors='ignore')
- for k,v in params:
+ for k, v in params:
if k.lower() == 'charset':
charset = v
break
# Regular expression matching the PostgreSQL custom mail footer that
# is appended to all emails.
_re_footer = re.compile('(.*)--\s+\nSent via [^\s]+ mailing list \([^\)]+\)\nTo make changes to your subscription:\nhttp://www\.postgresql\.org/mailpref/[^\s]+\s*$', re.DOTALL)
+
def get_body(self):
b = self._get_body()
if b:
# If it has a name, we consider it an attachments
if not container.get_params():
return
- for k,v in container.get_params():
- if k=='name' and v != '':
+ for k, v in container.get_params():
+ if k == 'name' and v != '':
# Yes, it has a name
try:
self.attachments.append((self._extract_filename(container), container.get_content_type(), container.get_payload(decode=True)))
# No name, and text/plain, so ignore it
re_msgid = re.compile('^\s*<(.*)>\s*')
+
def clean_messageid(self, messageid, ignorebroken=False):
m = self.re_msgid.match(messageid)
if not m:
log.status("Could not parse messageid '%s', ignoring it" % messageid)
return None
raise IgnorableException("Could not parse message id '%s'" % messageid)
- return m.groups(1)[0].replace(' ','')
+ return m.groups(1)[0].replace(' ', '')
# _date_multi_re = re.compile(' \((\w+\s\w+(\s+\w+)*|)\)$')
# Now using [^\s] instead of \w, to work with japanese chars
_date_multi_re2 = re.compile(' ([\+-]\d{4}) \([^)]+\)$')
_date_multiminus_re = re.compile(' -(-\d+)$')
_date_offsetnoplus_re = re.compile(' (\d{4})$')
+
def forgiving_date_decode(self, d):
if d.strip() == '':
raise IgnorableException("Failed to parse empty date")
if d.endswith('+0-900'):
d = d.replace('+0-900', '-0900')
if d.endswith('Mexico/General'):
- d = d.replace('Mexico/General','CDT')
+ d = d.replace('Mexico/General', 'CDT')
if d.endswith('Pacific Daylight Time'):
d = d.replace('Pacific Daylight Time', 'PDT')
if d.endswith(' ZE2'):
- d = d.replace(' ZE2',' +0200')
+ d = d.replace(' ZE2', ' +0200')
if d.find('-Juin-') > 0:
- d = d.replace('-Juin-','-Jun-')
+ d = d.replace('-Juin-', '-Jun-')
if d.find('-Juil-') > 0:
- d = d.replace('-Juil-','-Jul-')
+ d = d.replace('-Juil-', '-Jul-')
if d.find(' 0 (GMT)'):
- d = d.replace(' 0 (GMT)',' +0000')
+ d = d.replace(' 0 (GMT)', ' +0000')
if self._date_multiminus_re.search(d):
d = self._date_multiminus_re.sub(' \\1', d)
if self._date_offsetnoplus_re.search(d):
d = self._date_offsetnoplus_re.sub('+\\1', d)
-
# We have a number of dates in the format
# "<full datespace> +0200 (MET DST)"
# or similar. The problem coming from the space within the
# Some offsets are >16 hours, which postgresql will not
# (for good reasons) accept
- if dp.utcoffset() and abs(dp.utcoffset().days * (24*60*60) + dp.utcoffset().seconds) > 60*60*16-1:
+ if dp.utcoffset() and abs(dp.utcoffset().days * (24 * 60 * 60) + dp.utcoffset().seconds) > 60 * 60 * 16 - 1:
# Convert it to a UTC timestamp using Python. It will give
# us the right time, but the wrong timezone. Should be
# enough...
# Workaround for broken quoting in some MUAs (see below)
_re_mailworkaround = re.compile('"(=\?[^\?]+\?[QB]\?[^\?]+\?=)"', re.IGNORECASE)
+
def _decode_mime_header(self, hdr, email_workaround):
if hdr == None:
return None
# do this *before* doing any MIME decoding, we should be safe against
# anybody *actually* putting that sequence in the header (since we
# won't match the encoded contents)
- hdr = hdr.replace("\n\t"," ")
+ hdr = hdr.replace("\n\t", " ")
# In at least some cases, at least gmail (and possibly other MUAs)
# incorrectly put double quotes in the name/email field even when
def get_mandatory(self, fieldname):
try:
x = self.msg[fieldname]
- if x==None:
+ if x == None:
raise Exception()
return x
except:
from lib.log import log, opstatus
+
class ArchivesParserStorage(ArchivesParser):
def __init__(self):
super(ArchivesParserStorage, self).__init__()
'message': pk,
})
if len(self.attachments):
- curs.executemany("INSERT INTO attachments (message, filename, contenttype, attachment) VALUES (%(message)s, %(filename)s, %(contenttype)s, %(attachment)s)",[ {
+ curs.executemany("INSERT INTO attachments (message, filename, contenttype, attachment) VALUES (%(message)s, %(filename)s, %(contenttype)s, %(attachment)s)", [{
'message': pk,
'filename': a[0] or 'unknown_filename',
'contenttype': a[1],
all_parents = curs.fetchall()
if len(all_parents):
# At least one of the parents exist. Now try to figure out which one
- best_parent = len(self.parents)+1
+ best_parent = len(self.parents) + 1
best_threadid = -1
best_parentid = None
- for i in range(0,len(all_parents)):
- for j in range(0,len(self.parents)):
+ for i in range(0, len(all_parents)):
+ for j in range(0, len(self.parents)):
if self.parents[j] == all_parents[i][1]:
# This messageid found. Better than the last one?
if j < best_parent:
self.msgid, id, self.threadid, self.parentid))
if len(self.attachments):
# Insert attachments
- curs.executemany("INSERT INTO attachments (message, filename, contenttype, attachment) VALUES (%(message)s, %(filename)s, %(contenttype)s, %(attachment)s)",[ {
+ curs.executemany("INSERT INTO attachments (message, filename, contenttype, attachment) VALUES (%(message)s, %(filename)s, %(contenttype)s, %(attachment)s)", [{
'message': id,
'filename': a[0] or 'unknown_filename',
'contenttype': a[1],
f.write("\n-------------------------------\n\n")
return
-
if (_from.rstrip(), to.rstrip(), cc.rstrip(), subject.rstrip()) != (self._from, self.to, self.cc, self.subject):
log.status("Message %s has header changes " % self.msgid)
f.write("==== %s ====\n" % self.msgid)
tofile='new',
n=0,
lineterm=''))
- if (len(tempdiff)-2) % 3 == 0:
+ if (len(tempdiff) - 2) % 3 == 0:
# 3 rows to a diff, two header rows.
# Then verify that each slice of 3 contains one @@ row (header), one -From and one +>From,
# which indicates the only change is in the From.
ok = True
tempdiff = tempdiff[2:]
while tempdiff:
- a,b,c = (tempdiff.pop(0), tempdiff.pop(0), tempdiff.pop(0))
+ a, b, c = (tempdiff.pop(0), tempdiff.pop(0), tempdiff.pop(0))
if not (a.startswith('@@ ') and b.startswith('-From ') and c.startswith('+>From ')):
- ok=False
+ ok = False
break
if ok:
fromonlyf.write("%s\n" % self.msgid)
return
-
# Generate a nicer diff
d = list(difflib.unified_diff(bodytxt.splitlines(),
self.bodytxt.splitlines(),
from lib.log import log
+
class VarnishPurger(object):
def __init__(self, cfg):
self.cfg = cfg
})
if r.status_code != 200:
log.error("Failed to send purge request!")
-
from lib.log import log, opstatus
from lib.varnish import VarnishPurger
+
def log_failed_message(listid, srctype, src, msg, err):
try:
msgid = msg.msgid
cfg = ConfigParser()
cfg.read('%s/archives.ini' % os.path.realpath(os.path.dirname(sys.argv[0])))
try:
- connstr = cfg.get('db','connstr')
+ connstr = cfg.get('db', 'connstr')
except:
connstr = 'need_connstr'
try:
ap.analyze(date_override=opt.force_date)
except IgnorableException as e:
- log_failed_message(listid, "stdin","", ap, e)
+ log_failed_message(listid, "stdin", "", ap, e)
conn.close()
sys.exit(1)
ap.store(conn, listid)
import psycopg2
import requests
-if __name__=="__main__":
+if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Synchronize lists from pglister")
parser.add_argument('--dryrun', action='store_true', help="Don't commit changes to database")
cfg = ConfigParser()
cfg.read('%s/archives.ini' % os.path.realpath(os.path.dirname(sys.argv[0])))
try:
- connstr = cfg.get('db','connstr')
+ connstr = cfg.get('db', 'connstr')
except:
connstr = 'need_connstr'
if cfg.has_option('pglister', 'subscribers') and cfg.getint('pglister', 'subscribers'):
- do_subscribers=1
+ do_subscribers = 1
else:
- do_subscribers=0
+ do_subscribers = 0
psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
conn = psycopg2.connect(connstr)
obj = r.json()
# For groups, just add them if they don't exist
- groups = {g['group']['id']:g['group']['groupname'] for g in obj}
+ groups = {g['group']['id']: g['group']['groupname'] for g in obj}
- for id,name in list(groups.items()):
+ for id, name in list(groups.items()):
curs.execute("SELECT EXISTS (SELECT 1 FROM listgroups WHERE groupname=%(group)s)", {
'group': name,
})
else:
print("Removed subscriber %s from list %s" % (who, name))
-
# We don't remove lists ever, because we probably want to keep archives around.
# But for now, we alert on them.
curs.execute("SELECT listname FROM lists WHERE active AND NOT listname=ANY(%(lists)s)", {
cfg = ConfigParser()
cfg.read('%s/archives.ini' % os.path.realpath(os.path.dirname(sys.argv[0])))
try:
- connstr = cfg.get('db','connstr')
+ connstr = cfg.get('db', 'connstr')
except:
connstr = 'need_connstr'
from lib.log import log, opstatus
from lib.varnish import VarnishPurger
+
def ResultIter(cursor):
# Fetch lots of data but keep memory usage down a bit, by feeding it out of
# a generator, and use fetchmany()
cfg = ConfigParser()
cfg.read('%s/archives.ini' % os.path.realpath(os.path.dirname(sys.argv[0])))
try:
- connstr = cfg.get('db','connstr')
+ connstr = cfg.get('db', 'connstr')
except:
connstr = 'need_connstr'
if not opt.update:
f = codecs.open("reparse.diffs", "w", "utf-8")
- fromonlyf = open("reparse.fromonly","w")
+ fromonlyf = open("reparse.fromonly", "w")
firststatus = datetime.now()
laststatus = datetime.now()
ap.diff(conn, f, fromonlyf, id)
if datetime.now() - laststatus > timedelta(seconds=5):
sys.stdout.write("%s messages parsed (%s%%, %s / second), %s updated\r" % (num,
- num*100/totalcount,
- num / ((datetime.now()-firststatus).seconds),
+ num * 100 / totalcount,
+ num / ((datetime.now() - firststatus).seconds),
updated))
sys.stdout.flush()
laststatus = datetime.now()
cfg = ConfigParser()
cfg.read('%s/../archives.ini' % os.path.realpath(os.path.dirname(sys.argv[0])))
try:
- connstr = cfg.get('db','connstr')
+ connstr = cfg.get('db', 'connstr')
except:
connstr = 'need_connstr'
cfg = ConfigParser()
cfg.read('%s/../archives.ini' % os.path.realpath(os.path.dirname(sys.argv[0])))
try:
- connstr = cfg.get('db','connstr')
+ connstr = cfg.get('db', 'connstr')
except:
connstr = 'need_connstr'