summaryrefslogtreecommitdiff
path: root/planet/aggregator.py
diff options
context:
space:
mode:
authorMagnus Hagander2008-12-29 15:45:11 +0000
committerMagnus Hagander2008-12-29 15:45:11 +0000
commitab9da90e22a595ec81b20de4397e054f4130b192 (patch)
treee4b254f738cfaac5c3b618680c98bb94608d661b /planet/aggregator.py
parent402e8f30095cf5c97e00be7039b3a37d7ca9941c (diff)
Support filtering feeds by author names, to pull only parts of a shared blog
without requiring use of specific tags/categories. In passing, fix a number of error messages to throw the proper type of exception. Selena Deckelmann, with some polishing from me. git-svn-id: file:///Users/dpage/pgweb/svn-repo/trunk@2367 8f5c7a92-453e-0410-a47f-ad33c8a6b003
Diffstat (limited to 'planet/aggregator.py')
-rwxr-xr-xplanet/aggregator.py22
1 files changed, 21 insertions, 1 deletions
diff --git a/planet/aggregator.py b/planet/aggregator.py
index dba6d176..182d0233 100755
--- a/planet/aggregator.py
+++ b/planet/aggregator.py
@@ -17,11 +17,12 @@ class Aggregator:
def __init__(self, db):
self.db = db
self.stored = 0
+ self.authorfilter = None
socket.setdefaulttimeout(20)
def Update(self):
feeds = self.db.cursor()
- feeds.execute('SELECT id,feedurl,name,lastget FROM planet.feeds')
+ feeds.execute('SELECT id,feedurl,name,lastget,authorfilter FROM planet.feeds')
for feed in feeds.fetchall():
try:
self.ParseFeed(feed)
@@ -43,7 +44,12 @@ class Aggregator:
print "Feed %s status %s" % (feedinfo[1], feed.status)
return
+ self.authorfilter = feedinfo[4]
+
for entry in feed.entries:
+ if not self.matches_filter(entry):
+ continue
+
# Grab the entry. At least atom feeds from wordpress store what we
# want in entry.content[0].value and *also* has a summary that's
# much shorter. Other blog software store what we want in the summary
@@ -66,6 +72,20 @@ class Aggregator:
self.db.cursor().execute("UPDATE planet.feeds SET lastget=COALESCE((SELECT max(dat) FROM planet.posts WHERE planet.posts.feed=planet.feeds.id),'2000-01-01') WHERE planet.feeds.id=%(feed)s", {'feed': feedinfo[0]})
#self.db.cursor().execute('UPDATE planet.feeds SET lastget=%(lg)s WHERE id=%(feed)s', {'lg':parsestart, 'feed': feedinfo[0]})
+ def matches_filter(self, entry):
+ # For now, we only match against self.authorfilter. In the future,
+ # there may be more filters.
+ if self.authorfilter:
+ # Match against an author filter
+
+ if entry.has_key('author_detail'):
+ return entry.author_detail.name == self.authorfilter
+ else:
+ return False
+
+ # No filters, always return true
+ return True
+
def StoreEntry(self, feedid, guid, date, link, guidisperma, title, txt):
c = self.db.cursor()
c.execute("SELECT id FROM planet.posts WHERE feed=%(feed)s AND guid=%(guid)s", {'feed':feedid, 'guid':guid})