diff options
-rw-r--r-- | django/archives/mailarchives/models.py | 4 | ||||
-rw-r--r-- | django/archives/mailarchives/views.py | 81 | ||||
-rw-r--r-- | django/archives/settings.py | 2 | ||||
-rw-r--r-- | django/archives/urls.py | 3 |
4 files changed, 87 insertions, 3 deletions
diff --git a/django/archives/mailarchives/models.py b/django/archives/mailarchives/models.py index 491516b..5325264 100644 --- a/django/archives/mailarchives/models.py +++ b/django/archives/mailarchives/models.py @@ -9,10 +9,10 @@ class Message(models.Model): date = models.DateTimeField(null=False) messageid = models.TextField(null=False) bodytxt = models.TextField(null=False) -# rawtxt = models.TextField(null=True) - # rawtxt is a bytea field, which django doesn't support + # rawtxt is a bytea field, which django doesn't support (easily) parentid = models.IntegerField(null=False, blank=False) has_attachment = models.BooleanField(null=False, default=False) + # fti is a tsvector field, which django doesn't support (easily) class Meta: db_table = 'messages' diff --git a/django/archives/mailarchives/views.py b/django/archives/mailarchives/views.py index 25c08e3..fbe1492 100644 --- a/django/archives/mailarchives/views.py +++ b/django/archives/mailarchives/views.py @@ -1,14 +1,17 @@ from django.template import RequestContext -from django.http import HttpResponse, Http404 +from django.http import HttpResponse, HttpResponseForbidden, Http404 from django.shortcuts import render_to_response, get_object_or_404 from django.db import connection from django.db.models import Q +from django.conf import settings import urllib import re from datetime import datetime, timedelta import calendar +import simplejson as json + from models import * def get_all_groups_and_lists(listid=None): @@ -246,3 +249,79 @@ def oldsite(request, msgid): u = urllib.urlopen('http://archives.postgresql.org/message-id/%s' % msgid) m = re.search('<!--X-Body-of-Message-->(.*)<!--X-Body-of-Message-End-->', u.read(), re.DOTALL) return HttpResponse(m.groups(1), content_type='text/html') + +def search(request): + # Only certain hosts are allowed to call the search API + if not request.META['REMOTE_ADDR'] in settings.SEARCH_CLIENTS: + return HttpResponseForbidden('Invalid host') + + # Perform a search of the archives and return a JSON document. + # Expects the following (optional) POST parameters: + # q = query to search for + # l = comma separated list of lists to search for + # d = number of days back to search for, or -1 (or not specified) + # to search the full archives + # s = sort results by ['r'=rank, 'd'=date] + if not request.method == 'POST': + raise Http404('I only respond to POST') + + if not request.POST.has_key('q'): + raise Http404('No search query specified') + query = request.POST['q'] + + if request.POST.has_key('l'): + try: + lists = [int(x) for x in request.POST['l'].split(',')] + except: + # If failing to parse list of lists, just search all + lists = None + else: + lists = None + + if request.POST.has_key('d'): + days = int(request.POST['d']) + if days < 1 or days > 365: + firstdate = None + else: + firstdate = datetime.now() - timedelta(days=days) + else: + firstdate = None + + if request.POST.has_key('s'): + list_sort = request.POST['s'] == 'd' and 'd' or 'r' + else: + list_sort = 'r' + + # Ok, we have all we need to do the search + curs = connection.cursor() + qstr = "SELECT listname, messageid, date, subject, _from, ts_rank_cd(fti, plainto_tsquery(%(q)s)), ts_headline(bodytxt, plainto_tsquery(%(q)s),'StartSel=\"[[[[[[\",StopSel=\"]]]]]]\"') FROM messages m INNER JOIN list_threads lt ON lt.threadid=m.threadid INNER JOIN lists l ON l.listid=lt.listid WHERE fti @@ plainto_tsquery(%(q)s)" + params = { + 'q': query, + } + if lists: + qstr += " AND lt.listid=ANY(%(lists)s) " + params['lists'] = lists + if firstdate: + qstr += " AND m.date > %(date)s" + params['date'] = firstdate + if list_sort == 'r': + qstr += " ORDER BY ts_rank_cd(fti, plainto_tsquery(%(q)s)) DESC LIMIT 1000" + else: + qstr += " ORDER BY date DESC LIMIT 1000" + + curs.execute(qstr, params) + + resp = HttpResponse(mimetype='application/json') + + json.dump([{ + 'l': listname, + 'm': messageid, + 'd': date.isoformat(), + 's': subject, + 'f': mailfrom, + 'r': rank, + 'a': abstract.replace("[[[[[[", "<b>").replace("]]]]]]","</b>"), + + } for listname, messageid, date, subject, mailfrom, rank, abstract in curs.fetchall()], + resp) + return resp diff --git a/django/archives/settings.py b/django/archives/settings.py index 51ae2cb..b2c871b 100644 --- a/django/archives/settings.py +++ b/django/archives/settings.py @@ -149,3 +149,5 @@ LOGGING = { # Required for lighttpd FORCE_SCRIPT_NAME="" + +from settings_local import * diff --git a/django/archives/urls.py b/django/archives/urls.py index d8e6551..9841175 100644 --- a/django/archives/urls.py +++ b/django/archives/urls.py @@ -22,6 +22,9 @@ urlpatterns = patterns('', (r'^message-id/([^/]+)$', 'archives.mailarchives.views.message'), (r'^flat/([^/]+)$', 'archives.mailarchives.views.message_flat'), (r'^raw/([^/]+)$', 'archives.mailarchives.views.message_raw'), + (r'^search/', 'archives.mailarchives.views.search'), + + # Date etc indexes (r'^([\w-]+)/$', 'archives.mailarchives.views.monthlist'), (r'^([\w-]+)/(\d+)-(\d+)/$', 'archives.mailarchives.views.datelist'), (r'^([\w-]+)/since/(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})', 'archives.mailarchives.views.datelistsincetime'), |