diff options
author | Magnus Hagander | 2012-07-09 18:06:44 +0000 |
---|---|---|
committer | Magnus Hagander | 2012-07-09 18:06:44 +0000 |
commit | 6e1e82b64f4c3e33e4ea629eb9cbb22f97fed19a (patch) | |
tree | b104133904fbc41f4fd7ed0a5b9a904e9e501591 | |
parent | 9ef747bbd259cb12b03f09cf4e310b03393c8718 (diff) |
Store the raw text of messages.
Also add deferred loading of all large (possibly TOASTable) columns
not needed in the django views
-rw-r--r-- | django/archives/mailarchives/models.py | 1 | ||||
-rw-r--r-- | django/archives/mailarchives/views.py | 28 | ||||
-rw-r--r-- | django/archives/urls.py | 1 | ||||
-rw-r--r-- | loader/lib/parser.py | 1 | ||||
-rw-r--r-- | loader/lib/storage.py | 3 | ||||
-rw-r--r-- | loader/sql/schema.sql | 3 |
6 files changed, 29 insertions, 8 deletions
diff --git a/django/archives/mailarchives/models.py b/django/archives/mailarchives/models.py index d1e0eb0..a55370d 100644 --- a/django/archives/mailarchives/models.py +++ b/django/archives/mailarchives/models.py @@ -9,6 +9,7 @@ class Message(models.Model): date = models.DateTimeField(null=False) messageid = models.TextField(null=False) bodytxt = models.TextField(null=False) + rawtxt = models.TextField(null=True) parentid = models.IntegerField(null=False, blank=False) has_attachment = models.BooleanField(null=False, default=False) diff --git a/django/archives/mailarchives/views.py b/django/archives/mailarchives/views.py index f4dc709..7fe05b6 100644 --- a/django/archives/mailarchives/views.py +++ b/django/archives/mailarchives/views.py @@ -1,4 +1,4 @@ -from django.http import HttpResponse +from django.http import HttpResponse, Http404 from django.shortcuts import render_to_response, get_object_or_404 from django.db import connection from django.db.models import Q @@ -31,7 +31,7 @@ def render_datelist_from(request, l, d, title, to=None): if to: datefilter.add(Q(date__lt=to), Q.AND) - mlist = Message.objects.select_related().filter(datefilter).extra(where=["threadid IN (SELECT threadid FROM list_threads WHERE listid=%s)" % l.listid]).order_by('date')[:200] + mlist = Message.objects.defer('bodytxt', 'rawtxt', 'cc', 'to').select_related().filter(datefilter).extra(where=["threadid IN (SELECT threadid FROM list_threads WHERE listid=%s)" % l.listid]).order_by('date')[:200] threads = set([m.threadid for m in mlist]) r = render_to_response('datelist.html', { @@ -46,7 +46,7 @@ def render_datelist_to(request, l, d, title): # Need to sort this backwards in the database to get the LIMIT applied # properly, and then manually resort it in the correct order. We can do # the second sort safely in python since it's not a lot of items.. - mlist = sorted(Message.objects.select_related().filter(date__lte=d).extra(where=["threadid IN (SELECT threadid FROM list_threads WHERE listid=%s)" % l.listid]).order_by('-date')[:200], key=lambda m: m.date) + mlist = sorted(Message.objects.defer('bodytxt', 'rawtxt', 'cc', 'to').select_related().filter(date__lte=d).extra(where=["threadid IN (SELECT threadid FROM list_threads WHERE listid=%s)" % l.listid]).order_by('-date')[:200], key=lambda m: m.date) threads = set([m.threadid for m in mlist]) r = render_to_response('datelist.html', { @@ -112,7 +112,11 @@ SELECT id,_from,subject,date,messageid,has_attachment,parentid,datepath FROM t O yield {'id':id, 'mailfrom':_from, 'subject': subject, 'printdate': date.strftime("%Y-%m-%d %H:%M:%S"), 'messageid': messageid, 'hasattachment': has_attachment, 'parentid': parentid, 'indent': " " * len(parentpath)} def message(request, msgid): - m = get_object_or_404(Message, messageid=msgid) + try: + m = Message.objects.defer('rawtxt').get(messageid=msgid) + except Message.DoesNotExist, e: + raise Http404('Message does not exist') + lists = List.objects.extra(where=["listid IN (SELECT listid FROM list_threads WHERE threadid=%s)" % m.threadid]).order_by('listname') threadstruct = list(_build_thread_structure(m.threadid)) responses = [t for t in threadstruct if t['parentid']==m.id] @@ -134,8 +138,11 @@ def message(request, msgid): return r def message_flat(request, msgid): - msg = get_object_or_404(Message, messageid=msgid) - allmsg = Message.objects.filter(threadid=msg.threadid).order_by('date') + try: + msg = Message.objects.defer('rawtxt').get(messageid=msgid) + except Message.DoesNotExist, e: + raise Http404('Message does not exist') + allmsg = Message.objects.defer('rawtxt').filter(threadid=msg.threadid).order_by('date') # XXX: need to get the complete list of lists! r = render_to_response('message_flat.html', { @@ -145,6 +152,15 @@ def message_flat(request, msgid): r['X-pgthread'] = ":%s:" % msg.threadid return r +def message_raw(request, msgid): + try: + msg = Message.objects.defer('subject', 'mailfrom', 'to', 'cc', 'bodytxt').get(messageid=msgid) + except Message.DoesNotExist, e: + raise Http404('Message does not exist') + r = HttpResponse(msg.rawtxt, content_type='text/plain') + r['X-pgthread'] = ":%s:" % msg.threadid + return r + def testview(request, seqid): m = Message.objects.get(pk=seqid) try: diff --git a/django/archives/urls.py b/django/archives/urls.py index e6990ca..26cd68b 100644 --- a/django/archives/urls.py +++ b/django/archives/urls.py @@ -21,6 +21,7 @@ urlpatterns = patterns('', (r'^$', 'archives.mailarchives.views.index'), (r'^message-id/([^/]+)$', 'archives.mailarchives.views.message'), (r'^message-id/([^/]+)/flat$', 'archives.mailarchives.views.message_flat'), + (r'^message-id/([^/]+)/raw$', 'archives.mailarchives.views.message_raw'), (r'^([\w-]+)/$', 'archives.mailarchives.views.monthlist'), (r'^([\w-]+)/(\d+)-(\d+)/$', 'archives.mailarchives.views.datelist'), (r'^([\w-]+)/since/(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})', 'archives.mailarchives.views.datelistsincetime'), diff --git a/loader/lib/parser.py b/loader/lib/parser.py index 258cc2b..670a70c 100644 --- a/loader/lib/parser.py +++ b/loader/lib/parser.py @@ -18,6 +18,7 @@ class ArchivesParser(object): def parse(self, stream): self.msg = self.parser.parse(stream) + self.rawtxt = unicode(self.msg) def is_msgid(self, msgid): # Look for a specific messageid. This means we might parse it twice, diff --git a/loader/lib/storage.py b/loader/lib/storage.py index ab25793..1a34b1b 100644 --- a/loader/lib/storage.py +++ b/loader/lib/storage.py @@ -131,7 +131,7 @@ class ArchivesParserStorage(ArchivesParser): if len(curs.fetchall()): log.status("Tagged thread %s with listid %s" % (self.threadid, listid)) - curs.execute("INSERT INTO messages (parentid, threadid, _from, _to, cc, subject, date, has_attachment, messageid, bodytxt) VALUES (%(parentid)s, %(threadid)s, %(from)s, %(to)s, %(cc)s, %(subject)s, %(date)s, %(has_attachment)s, %(messageid)s, %(bodytxt)s) RETURNING id", { + curs.execute("INSERT INTO messages (parentid, threadid, _from, _to, cc, subject, date, has_attachment, messageid, bodytxt, rawtxt) VALUES (%(parentid)s, %(threadid)s, %(from)s, %(to)s, %(cc)s, %(subject)s, %(date)s, %(has_attachment)s, %(messageid)s, %(bodytxt)s, %(rawtxt)s) RETURNING id", { 'parentid': self.parentid, 'threadid': self.threadid, 'from': self._from, @@ -142,6 +142,7 @@ class ArchivesParserStorage(ArchivesParser): 'has_attachment': len(self.attachments) > 0, 'messageid': self.msgid, 'bodytxt': self.bodytxt, + 'rawtxt': self.rawtxt, }) id = curs.fetchall()[0][0] if len(self.attachments): diff --git a/loader/sql/schema.sql b/loader/sql/schema.sql index ff8e5d3..b6cf49f 100644 --- a/loader/sql/schema.sql +++ b/loader/sql/schema.sql @@ -11,7 +11,8 @@ CREATE TABLE messages ( date timestamptz NOT NULL, has_attachment boolean NOT NULL, messageid text NOT NULL, - bodytxt text NOT NULL + bodytxt text NOT NULL, + rawtxt text NOT NULL ); CREATE INDEX idx_messages_threadid ON messages(threadid); CREATE UNIQUE INDEX idx_messages_msgid ON messages(messageid); |