summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMagnus Hagander2012-07-09 18:06:44 +0000
committerMagnus Hagander2012-07-09 18:06:44 +0000
commit6e1e82b64f4c3e33e4ea629eb9cbb22f97fed19a (patch)
treeb104133904fbc41f4fd7ed0a5b9a904e9e501591
parent9ef747bbd259cb12b03f09cf4e310b03393c8718 (diff)
Store the raw text of messages.
Also add deferred loading of all large (possibly TOASTable) columns not needed in the django views
-rw-r--r--django/archives/mailarchives/models.py1
-rw-r--r--django/archives/mailarchives/views.py28
-rw-r--r--django/archives/urls.py1
-rw-r--r--loader/lib/parser.py1
-rw-r--r--loader/lib/storage.py3
-rw-r--r--loader/sql/schema.sql3
6 files changed, 29 insertions, 8 deletions
diff --git a/django/archives/mailarchives/models.py b/django/archives/mailarchives/models.py
index d1e0eb0..a55370d 100644
--- a/django/archives/mailarchives/models.py
+++ b/django/archives/mailarchives/models.py
@@ -9,6 +9,7 @@ class Message(models.Model):
date = models.DateTimeField(null=False)
messageid = models.TextField(null=False)
bodytxt = models.TextField(null=False)
+ rawtxt = models.TextField(null=True)
parentid = models.IntegerField(null=False, blank=False)
has_attachment = models.BooleanField(null=False, default=False)
diff --git a/django/archives/mailarchives/views.py b/django/archives/mailarchives/views.py
index f4dc709..7fe05b6 100644
--- a/django/archives/mailarchives/views.py
+++ b/django/archives/mailarchives/views.py
@@ -1,4 +1,4 @@
-from django.http import HttpResponse
+from django.http import HttpResponse, Http404
from django.shortcuts import render_to_response, get_object_or_404
from django.db import connection
from django.db.models import Q
@@ -31,7 +31,7 @@ def render_datelist_from(request, l, d, title, to=None):
if to:
datefilter.add(Q(date__lt=to), Q.AND)
- mlist = Message.objects.select_related().filter(datefilter).extra(where=["threadid IN (SELECT threadid FROM list_threads WHERE listid=%s)" % l.listid]).order_by('date')[:200]
+ mlist = Message.objects.defer('bodytxt', 'rawtxt', 'cc', 'to').select_related().filter(datefilter).extra(where=["threadid IN (SELECT threadid FROM list_threads WHERE listid=%s)" % l.listid]).order_by('date')[:200]
threads = set([m.threadid for m in mlist])
r = render_to_response('datelist.html', {
@@ -46,7 +46,7 @@ def render_datelist_to(request, l, d, title):
# Need to sort this backwards in the database to get the LIMIT applied
# properly, and then manually resort it in the correct order. We can do
# the second sort safely in python since it's not a lot of items..
- mlist = sorted(Message.objects.select_related().filter(date__lte=d).extra(where=["threadid IN (SELECT threadid FROM list_threads WHERE listid=%s)" % l.listid]).order_by('-date')[:200], key=lambda m: m.date)
+ mlist = sorted(Message.objects.defer('bodytxt', 'rawtxt', 'cc', 'to').select_related().filter(date__lte=d).extra(where=["threadid IN (SELECT threadid FROM list_threads WHERE listid=%s)" % l.listid]).order_by('-date')[:200], key=lambda m: m.date)
threads = set([m.threadid for m in mlist])
r = render_to_response('datelist.html', {
@@ -112,7 +112,11 @@ SELECT id,_from,subject,date,messageid,has_attachment,parentid,datepath FROM t O
yield {'id':id, 'mailfrom':_from, 'subject': subject, 'printdate': date.strftime("%Y-%m-%d %H:%M:%S"), 'messageid': messageid, 'hasattachment': has_attachment, 'parentid': parentid, 'indent': " " * len(parentpath)}
def message(request, msgid):
- m = get_object_or_404(Message, messageid=msgid)
+ try:
+ m = Message.objects.defer('rawtxt').get(messageid=msgid)
+ except Message.DoesNotExist, e:
+ raise Http404('Message does not exist')
+
lists = List.objects.extra(where=["listid IN (SELECT listid FROM list_threads WHERE threadid=%s)" % m.threadid]).order_by('listname')
threadstruct = list(_build_thread_structure(m.threadid))
responses = [t for t in threadstruct if t['parentid']==m.id]
@@ -134,8 +138,11 @@ def message(request, msgid):
return r
def message_flat(request, msgid):
- msg = get_object_or_404(Message, messageid=msgid)
- allmsg = Message.objects.filter(threadid=msg.threadid).order_by('date')
+ try:
+ msg = Message.objects.defer('rawtxt').get(messageid=msgid)
+ except Message.DoesNotExist, e:
+ raise Http404('Message does not exist')
+ allmsg = Message.objects.defer('rawtxt').filter(threadid=msg.threadid).order_by('date')
# XXX: need to get the complete list of lists!
r = render_to_response('message_flat.html', {
@@ -145,6 +152,15 @@ def message_flat(request, msgid):
r['X-pgthread'] = ":%s:" % msg.threadid
return r
+def message_raw(request, msgid):
+ try:
+ msg = Message.objects.defer('subject', 'mailfrom', 'to', 'cc', 'bodytxt').get(messageid=msgid)
+ except Message.DoesNotExist, e:
+ raise Http404('Message does not exist')
+ r = HttpResponse(msg.rawtxt, content_type='text/plain')
+ r['X-pgthread'] = ":%s:" % msg.threadid
+ return r
+
def testview(request, seqid):
m = Message.objects.get(pk=seqid)
try:
diff --git a/django/archives/urls.py b/django/archives/urls.py
index e6990ca..26cd68b 100644
--- a/django/archives/urls.py
+++ b/django/archives/urls.py
@@ -21,6 +21,7 @@ urlpatterns = patterns('',
(r'^$', 'archives.mailarchives.views.index'),
(r'^message-id/([^/]+)$', 'archives.mailarchives.views.message'),
(r'^message-id/([^/]+)/flat$', 'archives.mailarchives.views.message_flat'),
+ (r'^message-id/([^/]+)/raw$', 'archives.mailarchives.views.message_raw'),
(r'^([\w-]+)/$', 'archives.mailarchives.views.monthlist'),
(r'^([\w-]+)/(\d+)-(\d+)/$', 'archives.mailarchives.views.datelist'),
(r'^([\w-]+)/since/(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})', 'archives.mailarchives.views.datelistsincetime'),
diff --git a/loader/lib/parser.py b/loader/lib/parser.py
index 258cc2b..670a70c 100644
--- a/loader/lib/parser.py
+++ b/loader/lib/parser.py
@@ -18,6 +18,7 @@ class ArchivesParser(object):
def parse(self, stream):
self.msg = self.parser.parse(stream)
+ self.rawtxt = unicode(self.msg)
def is_msgid(self, msgid):
# Look for a specific messageid. This means we might parse it twice,
diff --git a/loader/lib/storage.py b/loader/lib/storage.py
index ab25793..1a34b1b 100644
--- a/loader/lib/storage.py
+++ b/loader/lib/storage.py
@@ -131,7 +131,7 @@ class ArchivesParserStorage(ArchivesParser):
if len(curs.fetchall()):
log.status("Tagged thread %s with listid %s" % (self.threadid, listid))
- curs.execute("INSERT INTO messages (parentid, threadid, _from, _to, cc, subject, date, has_attachment, messageid, bodytxt) VALUES (%(parentid)s, %(threadid)s, %(from)s, %(to)s, %(cc)s, %(subject)s, %(date)s, %(has_attachment)s, %(messageid)s, %(bodytxt)s) RETURNING id", {
+ curs.execute("INSERT INTO messages (parentid, threadid, _from, _to, cc, subject, date, has_attachment, messageid, bodytxt, rawtxt) VALUES (%(parentid)s, %(threadid)s, %(from)s, %(to)s, %(cc)s, %(subject)s, %(date)s, %(has_attachment)s, %(messageid)s, %(bodytxt)s, %(rawtxt)s) RETURNING id", {
'parentid': self.parentid,
'threadid': self.threadid,
'from': self._from,
@@ -142,6 +142,7 @@ class ArchivesParserStorage(ArchivesParser):
'has_attachment': len(self.attachments) > 0,
'messageid': self.msgid,
'bodytxt': self.bodytxt,
+ 'rawtxt': self.rawtxt,
})
id = curs.fetchall()[0][0]
if len(self.attachments):
diff --git a/loader/sql/schema.sql b/loader/sql/schema.sql
index ff8e5d3..b6cf49f 100644
--- a/loader/sql/schema.sql
+++ b/loader/sql/schema.sql
@@ -11,7 +11,8 @@ CREATE TABLE messages (
date timestamptz NOT NULL,
has_attachment boolean NOT NULL,
messageid text NOT NULL,
- bodytxt text NOT NULL
+ bodytxt text NOT NULL,
+ rawtxt text NOT NULL
);
CREATE INDEX idx_messages_threadid ON messages(threadid);
CREATE UNIQUE INDEX idx_messages_msgid ON messages(messageid);