diff options
author | Magnus Hagander | 2008-12-29 15:45:11 +0000 |
---|---|---|
committer | Magnus Hagander | 2008-12-29 15:45:11 +0000 |
commit | ab9da90e22a595ec81b20de4397e054f4130b192 (patch) | |
tree | e4b254f738cfaac5c3b618680c98bb94608d661b /planet/aggregator.py | |
parent | 402e8f30095cf5c97e00be7039b3a37d7ca9941c (diff) |
Support filtering feeds by author names, to pull only parts of a shared blog
without requiring use of specific tags/categories.
In passing, fix a number of error messages to throw the proper type of
exception.
Selena Deckelmann, with some polishing from me.
git-svn-id: file:///Users/dpage/pgweb/svn-repo/trunk@2367 8f5c7a92-453e-0410-a47f-ad33c8a6b003
Diffstat (limited to 'planet/aggregator.py')
-rwxr-xr-x | planet/aggregator.py | 22 |
1 files changed, 21 insertions, 1 deletions
diff --git a/planet/aggregator.py b/planet/aggregator.py index dba6d176..182d0233 100755 --- a/planet/aggregator.py +++ b/planet/aggregator.py @@ -17,11 +17,12 @@ class Aggregator: def __init__(self, db): self.db = db self.stored = 0 + self.authorfilter = None socket.setdefaulttimeout(20) def Update(self): feeds = self.db.cursor() - feeds.execute('SELECT id,feedurl,name,lastget FROM planet.feeds') + feeds.execute('SELECT id,feedurl,name,lastget,authorfilter FROM planet.feeds') for feed in feeds.fetchall(): try: self.ParseFeed(feed) @@ -43,7 +44,12 @@ class Aggregator: print "Feed %s status %s" % (feedinfo[1], feed.status) return + self.authorfilter = feedinfo[4] + for entry in feed.entries: + if not self.matches_filter(entry): + continue + # Grab the entry. At least atom feeds from wordpress store what we # want in entry.content[0].value and *also* has a summary that's # much shorter. Other blog software store what we want in the summary @@ -66,6 +72,20 @@ class Aggregator: self.db.cursor().execute("UPDATE planet.feeds SET lastget=COALESCE((SELECT max(dat) FROM planet.posts WHERE planet.posts.feed=planet.feeds.id),'2000-01-01') WHERE planet.feeds.id=%(feed)s", {'feed': feedinfo[0]}) #self.db.cursor().execute('UPDATE planet.feeds SET lastget=%(lg)s WHERE id=%(feed)s', {'lg':parsestart, 'feed': feedinfo[0]}) + def matches_filter(self, entry): + # For now, we only match against self.authorfilter. In the future, + # there may be more filters. + if self.authorfilter: + # Match against an author filter + + if entry.has_key('author_detail'): + return entry.author_detail.name == self.authorfilter + else: + return False + + # No filters, always return true + return True + def StoreEntry(self, feedid, guid, date, link, guidisperma, title, txt): c = self.db.cursor() c.execute("SELECT id FROM planet.posts WHERE feed=%(feed)s AND guid=%(guid)s", {'feed':feedid, 'guid':guid}) |