diff options
26 files changed, 1364 insertions, 1364 deletions
diff --git a/hamnadmin/hamnadmin/mailqueue/admin.py b/hamnadmin/hamnadmin/mailqueue/admin.py index b63f317..023776c 100644 --- a/hamnadmin/hamnadmin/mailqueue/admin.py +++ b/hamnadmin/hamnadmin/mailqueue/admin.py @@ -5,26 +5,26 @@ from email.parser import Parser from .models import QueuedMail class QueuedMailAdmin(admin.ModelAdmin): - model = QueuedMail - readonly_fields = ('parsed_content', ) + model = QueuedMail + readonly_fields = ('parsed_content', ) - def parsed_content(self, obj): - # We only try to parse the *first* piece, because we assume - # all our emails are trivial. - try: - parser = Parser() - msg = parser.parsestr(obj.fullmsg) - b = msg.get_payload(decode=True) - if b: return b + def parsed_content(self, obj): + # We only try to parse the *first* piece, because we assume + # all our emails are trivial. + try: + parser = Parser() + msg = parser.parsestr(obj.fullmsg) + b = msg.get_payload(decode=True) + if b: return b - pl = msg.get_payload() - for p in pl: - b = p.get_payload(decode=True) - if b: return b - return "Could not find body" - except Exception as e: - return "Failed to get body: %s" % e + pl = msg.get_payload() + for p in pl: + b = p.get_payload(decode=True) + if b: return b + return "Could not find body" + except Exception as e: + return "Failed to get body: %s" % e - parsed_content.short_description = 'Parsed mail' + parsed_content.short_description = 'Parsed mail' admin.site.register(QueuedMail, QueuedMailAdmin) diff --git a/hamnadmin/hamnadmin/mailqueue/management/commands/send_queued_mail.py b/hamnadmin/hamnadmin/mailqueue/management/commands/send_queued_mail.py index 6319b2e..acf4b74 100755 --- a/hamnadmin/hamnadmin/mailqueue/management/commands/send_queued_mail.py +++ b/hamnadmin/hamnadmin/mailqueue/management/commands/send_queued_mail.py @@ -13,22 +13,22 @@ import smtplib from hamnadmin.mailqueue.models import QueuedMail class Command(BaseCommand): - help = 'Send queued mail' + help = 'Send queued mail' - def handle(self, *args, **options): - # Grab advisory lock, if available. Lock id is just a random number - # since we only need to interlock against ourselves. The lock is - # automatically released when we're done. - curs = connection.cursor() - curs.execute("SELECT pg_try_advisory_lock(72181378)") - if not curs.fetchall()[0][0]: - raise CommandException("Failed to get advisory lock, existing send_queued_mail process stuck?") + def handle(self, *args, **options): + # Grab advisory lock, if available. Lock id is just a random number + # since we only need to interlock against ourselves. The lock is + # automatically released when we're done. + curs = connection.cursor() + curs.execute("SELECT pg_try_advisory_lock(72181378)") + if not curs.fetchall()[0][0]: + raise CommandException("Failed to get advisory lock, existing send_queued_mail process stuck?") - for m in QueuedMail.objects.all(): - # Yes, we do a new connection for each run. Just because we can. - # If it fails we'll throw an exception and just come back on the - # next cron job. And local delivery should never fail... - smtp = smtplib.SMTP("localhost") - smtp.sendmail(m.sender, m.receiver, m.fullmsg.encode('utf-8')) - smtp.close() - m.delete() + for m in QueuedMail.objects.all(): + # Yes, we do a new connection for each run. Just because we can. + # If it fails we'll throw an exception and just come back on the + # next cron job. And local delivery should never fail... + smtp = smtplib.SMTP("localhost") + smtp.sendmail(m.sender, m.receiver, m.fullmsg.encode('utf-8')) + smtp.close() + m.delete() diff --git a/hamnadmin/hamnadmin/mailqueue/models.py b/hamnadmin/hamnadmin/mailqueue/models.py index f0cc442..f45fd72 100644 --- a/hamnadmin/hamnadmin/mailqueue/models.py +++ b/hamnadmin/hamnadmin/mailqueue/models.py @@ -1,11 +1,11 @@ from django.db import models class QueuedMail(models.Model): - sender = models.EmailField(max_length=100, null=False, blank=False) - receiver = models.EmailField(max_length=100, null=False, blank=False) - # We store the raw MIME message, so if there are any attachments or - # anything, we just push them right in there! - fullmsg = models.TextField(null=False, blank=False) + sender = models.EmailField(max_length=100, null=False, blank=False) + receiver = models.EmailField(max_length=100, null=False, blank=False) + # We store the raw MIME message, so if there are any attachments or + # anything, we just push them right in there! + fullmsg = models.TextField(null=False, blank=False) - def __str__(self): - return "%s: %s -> %s" % (self.pk, self.sender, self.receiver) + def __str__(self): + return "%s: %s -> %s" % (self.pk, self.sender, self.receiver) diff --git a/hamnadmin/hamnadmin/mailqueue/util.py b/hamnadmin/hamnadmin/mailqueue/util.py index 44fdd16..05394a3 100644 --- a/hamnadmin/hamnadmin/mailqueue/util.py +++ b/hamnadmin/hamnadmin/mailqueue/util.py @@ -8,39 +8,39 @@ from email import encoders from .models import QueuedMail def _encoded_email_header(name, email): - if name: - return formataddr((str(Header(name, 'utf-8')), email)) - return email + if name: + return formataddr((str(Header(name, 'utf-8')), email)) + return email def send_simple_mail(sender, receiver, subject, msgtxt, attachments=None, bcc=None, sendername=None, receivername=None): - # attachment format, each is a tuple of (name, mimetype,contents) - # content should be *binary* and not base64 encoded, since we need to - # use the base64 routines from the email library to get a properly - # formatted output message - msg = MIMEMultipart() - msg['Subject'] = subject - msg['To'] = _encoded_email_header(receivername, receiver) - msg['From'] = _encoded_email_header(sendername, sender) - msg['Date'] = formatdate(localtime=True) - - msg.attach(MIMEText(msgtxt, _charset='utf-8')) - - if attachments: - for filename, contenttype, content in attachments: - main,sub = contenttype.split('/') - part = MIMENonMultipart(main,sub) - part.set_payload(content) - part.add_header('Content-Disposition', 'attachment; filename="%s"' % filename) - encoders.encode_base64(part) - msg.attach(part) - - - # Just write it to the queue, so it will be transactionally rolled back - QueuedMail(sender=sender, receiver=receiver, fullmsg=msg.as_string()).save() - # Any bcc is just entered as a separate email - if bcc: - QueuedMail(sender=sender, receiver=bcc, fullmsg=msg.as_string()).save() + # attachment format, each is a tuple of (name, mimetype,contents) + # content should be *binary* and not base64 encoded, since we need to + # use the base64 routines from the email library to get a properly + # formatted output message + msg = MIMEMultipart() + msg['Subject'] = subject + msg['To'] = _encoded_email_header(receivername, receiver) + msg['From'] = _encoded_email_header(sendername, sender) + msg['Date'] = formatdate(localtime=True) + + msg.attach(MIMEText(msgtxt, _charset='utf-8')) + + if attachments: + for filename, contenttype, content in attachments: + main,sub = contenttype.split('/') + part = MIMENonMultipart(main,sub) + part.set_payload(content) + part.add_header('Content-Disposition', 'attachment; filename="%s"' % filename) + encoders.encode_base64(part) + msg.attach(part) + + + # Just write it to the queue, so it will be transactionally rolled back + QueuedMail(sender=sender, receiver=receiver, fullmsg=msg.as_string()).save() + # Any bcc is just entered as a separate email + if bcc: + QueuedMail(sender=sender, receiver=bcc, fullmsg=msg.as_string()).save() def send_mail(sender, receiver, fullmsg): - # Send an email, prepared as the full MIME encoded mail already - QueuedMail(sender=sender, receiver=receiver, fullmsg=fullmsg).save() + # Send an email, prepared as the full MIME encoded mail already + QueuedMail(sender=sender, receiver=receiver, fullmsg=fullmsg).save() diff --git a/hamnadmin/hamnadmin/register/admin.py b/hamnadmin/hamnadmin/register/admin.py index 36b0f27..1efafde 100644 --- a/hamnadmin/hamnadmin/register/admin.py +++ b/hamnadmin/hamnadmin/register/admin.py @@ -5,26 +5,26 @@ from django.conf import settings from hamnadmin.register.models import * class TeamAdmin(admin.ModelAdmin): - list_display = ['name', 'manager', 'teamurl'] + list_display = ['name', 'manager', 'teamurl'] class BlogAdmin(admin.ModelAdmin): - list_display = ['user', 'approved', 'name', 'feedurl', 'authorfilter', ] - ordering = ['approved', 'name', ] #meh, multiple ordering not supported - search_fields = ['user__username', 'name', 'feedurl'] + list_display = ['user', 'approved', 'name', 'feedurl', 'authorfilter', ] + ordering = ['approved', 'name', ] #meh, multiple ordering not supported + search_fields = ['user__username', 'name', 'feedurl'] - def change_view(self, request, object_id, extra_context=None): - blog = Blog(pk=object_id) - my_context = { - 'posts': blog.posts.all()[:10], - } - return super(BlogAdmin, self).change_view(request, object_id, extra_context=my_context) + def change_view(self, request, object_id, extra_context=None): + blog = Blog(pk=object_id) + my_context = { + 'posts': blog.posts.all()[:10], + } + return super(BlogAdmin, self).change_view(request, object_id, extra_context=my_context) class PostAdmin(admin.ModelAdmin): - list_display = ['dat', 'title', 'hidden', 'feed'] - search_fields = ['title', 'feed__name', 'feed__feedurl'] + list_display = ['dat', 'title', 'hidden', 'feed'] + search_fields = ['title', 'feed__name', 'feed__feedurl'] class AggregatorLogAdmin(admin.ModelAdmin): - list_display = ['ts', 'success', 'feed', 'info'] + list_display = ['ts', 'success', 'feed', 'info'] admin.site.register(Team, TeamAdmin) admin.site.register(Blog, BlogAdmin) diff --git a/hamnadmin/hamnadmin/register/feeds.py b/hamnadmin/hamnadmin/register/feeds.py index d727d62..c8fdfeb 100644 --- a/hamnadmin/hamnadmin/register/feeds.py +++ b/hamnadmin/hamnadmin/register/feeds.py @@ -5,38 +5,38 @@ from hamnadmin.util.html import TruncateAndClean from .models import Post class PostFeed(Feed): - title = 'Planet PostgreSQL' - link = 'https://planet.postgresql.org' - feed_url = 'https://planet.postgresql.org/rss20.xml' - description = 'Planet PostgreSQL' - generator = 'Planet PostgreSQL' - - def get_object(self, request, type=None): - return type - - def items(self, type): - qs = Post.objects.filter(feed__approved=True, hidden=False).order_by('-dat') - if type == "_short": - qs = qs.extra(select = {'short': 1}) - return qs[:30] - - def item_title(self, item): - return "{0}: {1}".format(item.feed.name, item.title) - - def item_link(self, item): - if not item.shortlink: - # If not cached, calculate one - return item._get_shortlink() - return item.shortlink - - def item_pubdate(self, item): - return item.dat - - def item_description(self, item): - if hasattr(item, 'short'): - try: - return TruncateAndClean(item.txt) - except Exception as e: - return "Unable to clean HTML" - else: - return item.txt + title = 'Planet PostgreSQL' + link = 'https://planet.postgresql.org' + feed_url = 'https://planet.postgresql.org/rss20.xml' + description = 'Planet PostgreSQL' + generator = 'Planet PostgreSQL' + + def get_object(self, request, type=None): + return type + + def items(self, type): + qs = Post.objects.filter(feed__approved=True, hidden=False).order_by('-dat') + if type == "_short": + qs = qs.extra(select = {'short': 1}) + return qs[:30] + + def item_title(self, item): + return "{0}: {1}".format(item.feed.name, item.title) + + def item_link(self, item): + if not item.shortlink: + # If not cached, calculate one + return item._get_shortlink() + return item.shortlink + + def item_pubdate(self, item): + return item.dat + + def item_description(self, item): + if hasattr(item, 'short'): + try: + return TruncateAndClean(item.txt) + except Exception as e: + return "Unable to clean HTML" + else: + return item.txt diff --git a/hamnadmin/hamnadmin/register/forms.py b/hamnadmin/hamnadmin/register/forms.py index 1824b97..b66a582 100644 --- a/hamnadmin/hamnadmin/register/forms.py +++ b/hamnadmin/hamnadmin/register/forms.py @@ -11,80 +11,80 @@ import requests import requests_oauthlib class BlogEditForm(forms.ModelForm): - class Meta: - model = Blog - fields = ('feedurl', 'team', 'twitteruser', 'authorfilter') - - def __init__(self, request, *args, **kwargs): - self.request = request - super(BlogEditForm, self).__init__(*args, **kwargs) - for f in self.fields.values(): - f.widget.attrs['class'] = 'form-control' - - if kwargs['instance'].approved: - self.fields['feedurl'].help_text="Note that changing the feed URL will disable the blog pending new moderation" - self.fields['authorfilter'].help_text="Note that changing the author filter will disable the blog pending new moderation" - - - def clean(self): - tracemessages = [] - def _trace(msg): - tracemessages.append(msg) - - if 'feedurl' not in self.cleaned_data: - # No feedurl present means error already thrown - return self.cleaned_data - - # Create a fake instance to pass down. We'll just throw it away - feedobj = Blog(feedurl=self.cleaned_data.get('feedurl', None), authorfilter=self.cleaned_data['authorfilter']) - fetcher = FeedFetcher(feedobj, _trace, False) - try: - entries = list(fetcher.parse()) - except ParserGotRedirect: - raise forms.ValidationError("This URL returns a permanent redirect") - except Exception as e: - raise forms.ValidationError("Failed to retreive and parse feed: %s" % e) - if len(entries) == 0: - for m in tracemessages: - messages.info(self.request, m) - raise forms.ValidationError("No entries found in blog. You cannot submit a blog until it contains entries.") - - return self.cleaned_data - - def clean_twitteruser(self): - if self.cleaned_data['twitteruser'] == '': - return '' - - u = self.cleaned_data['twitteruser'] - if u.startswith('@'): - u = u[1:] - - if not settings.TWITTER_CLIENT: - # Can't validate beyond this unless we have client keys configured - return u - - tw = requests_oauthlib.OAuth1Session(settings.TWITTER_CLIENT, - settings.TWITTER_CLIENTSECRET, - settings.TWITTER_TOKEN, - settings.TWITTER_TOKENSECRET) - try: - r = tw.get('https://api.twitter.com/1.1/users/show.json', params={ - 'screen_name': u, - }, timeout=5) - if r.status_code != 200: - raise forms.ValidationError("Could not find twitter user") - j = r.json() - if j['protected']: - raise forms.ValidationError("Cannot register protected twitter accounts") - except requests.exceptions.ReadTimeout: - raise forms.ValidationError("Timeout trying to validate account with twitter") - return u + class Meta: + model = Blog + fields = ('feedurl', 'team', 'twitteruser', 'authorfilter') + + def __init__(self, request, *args, **kwargs): + self.request = request + super(BlogEditForm, self).__init__(*args, **kwargs) + for f in self.fields.values(): + f.widget.attrs['class'] = 'form-control' + + if kwargs['instance'].approved: + self.fields['feedurl'].help_text="Note that changing the feed URL will disable the blog pending new moderation" + self.fields['authorfilter'].help_text="Note that changing the author filter will disable the blog pending new moderation" + + + def clean(self): + tracemessages = [] + def _trace(msg): + tracemessages.append(msg) + + if 'feedurl' not in self.cleaned_data: + # No feedurl present means error already thrown + return self.cleaned_data + + # Create a fake instance to pass down. We'll just throw it away + feedobj = Blog(feedurl=self.cleaned_data.get('feedurl', None), authorfilter=self.cleaned_data['authorfilter']) + fetcher = FeedFetcher(feedobj, _trace, False) + try: + entries = list(fetcher.parse()) + except ParserGotRedirect: + raise forms.ValidationError("This URL returns a permanent redirect") + except Exception as e: + raise forms.ValidationError("Failed to retreive and parse feed: %s" % e) + if len(entries) == 0: + for m in tracemessages: + messages.info(self.request, m) + raise forms.ValidationError("No entries found in blog. You cannot submit a blog until it contains entries.") + + return self.cleaned_data + + def clean_twitteruser(self): + if self.cleaned_data['twitteruser'] == '': + return '' + + u = self.cleaned_data['twitteruser'] + if u.startswith('@'): + u = u[1:] + + if not settings.TWITTER_CLIENT: + # Can't validate beyond this unless we have client keys configured + return u + + tw = requests_oauthlib.OAuth1Session(settings.TWITTER_CLIENT, + settings.TWITTER_CLIENTSECRET, + settings.TWITTER_TOKEN, + settings.TWITTER_TOKENSECRET) + try: + r = tw.get('https://api.twitter.com/1.1/users/show.json', params={ + 'screen_name': u, + }, timeout=5) + if r.status_code != 200: + raise forms.ValidationError("Could not find twitter user") + j = r.json() + if j['protected']: + raise forms.ValidationError("Cannot register protected twitter accounts") + except requests.exceptions.ReadTimeout: + raise forms.ValidationError("Timeout trying to validate account with twitter") + return u class ModerateRejectForm(forms.Form): - message = forms.CharField(min_length=30, required=True, widget=forms.Textarea) - modsonly = forms.BooleanField(required=False, label="Moderators only", help_text="Should message be sent only to moderators, and not to the submitter (for spam submissions mainly)") + message = forms.CharField(min_length=30, required=True, widget=forms.Textarea) + modsonly = forms.BooleanField(required=False, label="Moderators only", help_text="Should message be sent only to moderators, and not to the submitter (for spam submissions mainly)") - def __init__(self, *args, **kwargs): - super(ModerateRejectForm, self).__init__(*args, **kwargs) - for f in self.fields.values(): - f.widget.attrs['class'] = 'form-control' + def __init__(self, *args, **kwargs): + super(ModerateRejectForm, self).__init__(*args, **kwargs) + for f in self.fields.values(): + f.widget.attrs['class'] = 'form-control' diff --git a/hamnadmin/hamnadmin/register/management/commands/aggregate_feeds.py b/hamnadmin/hamnadmin/register/management/commands/aggregate_feeds.py index 78fa872..83f614c 100644 --- a/hamnadmin/hamnadmin/register/management/commands/aggregate_feeds.py +++ b/hamnadmin/hamnadmin/register/management/commands/aggregate_feeds.py @@ -15,218 +15,218 @@ from hamnadmin.util.varnish import purge_root_and_feeds class BreakoutException(Exception): - pass + pass class Command(BaseCommand): - help = 'Aggregate one or more feeds' - - def add_arguments(self, parser): - parser.add_argument('--id', type=int, help="Fetch just one feed specified by id") - parser.add_argument('-d', '--debug', action='store_true', help="Enable debug mode, don't save anything") - parser.add_argument('-f', '--full', action='store_true', help="Fetch full feed, regardless of last fetch date") - parser.add_argument('-p', '--parallelism', type=int, default=10, help="Number of parallell requests") - - def trace(self, msg): - if self.verbose: - self.stdout.write(msg) - - def handle(self, *args, **options): - self.verbose = options['verbosity'] > 1 - self.debug = options['debug'] - if self.debug: - self.verbose=True - self.full = options['full'] - - if options['id']: - feeds = Blog.objects.filter(pk=options['id']) - else: - # Fetch all feeds - that are not archived. We do fetch feeds that are not approved, - # to make sure they work. - feeds = Blog.objects.filter(archived=False) - - # Fan out the fetching itself - fetchers = [FeedFetcher(f, self.trace) for f in feeds] - num = len(fetchers) - pool = ThreadPool(options['parallelism']) - pr = pool.map_async(self._fetch_one_feed, fetchers) - while not pr.ready(): - gevent.sleep(1) - self.trace("Fetching feeds (%s/%s done), please wait..." % (num-pool.task_queue.unfinished_tasks, num)) - - total_entries = 0 - # Fetching was async, but results processing will be sync. Don't want to deal with - # multithreaded database connections and such complications. - try: - with transaction.atomic(): - for feed, results in pr.get(): - if isinstance(results, ParserGotRedirect): - # Received a redirect. If this is a redirect for exactly the same URL just - # from http to https, special case this and allow it. For any other redirect, - # we don't follow it since it might no longer be a properly filtered feed - # for example. - if results.url == feed.feedurl: - # Redirect to itself! Should never happen, of course. - AggregatorLog(feed=feed, success=False, - info="Feed returned redirect loop to itself!").save() - elif results.url == feed.feedurl.replace('http://', 'https://'): - # OK, update it! - AggregatorLog(feed=feed, success=True, - info="Feed returned redirect to https, updating registration").save() - send_simple_mail(settings.EMAIL_SENDER, - feed.user.email, - "Your blog at Planet PostgreSQL redirected", - "The blog aggregator at Planet PostgreSQL has picked up a redirect for your blog.\nOld URL: {0}\nNew URL: {1}\n\nThe database has been updated, and new entries will be fetched from the secure URL in the future.\n".format(feed.feedurl, results.url), - sendername="Planet PostgreSQL", - receivername="{0} {1}".format(feed.user.first_name, feed.user.last_name), - ) - send_simple_mail(settings.EMAIL_SENDER, - settings.NOTIFICATION_RECEIVER, - "Blog redirect detected on Planet PostgreSQL", - "The blog at {0} by {1}\nis returning a redirect to a https version of itself.\n\nThe database has automatically been updated, and will start fetching using https in the future,\n\n".format(feed.feedurl, feed.user), - sendername="Planet PostgreSQL", - receivername="Planet PostgreSQL Moderators", - ) - feed.feedurl = results.url - feed.save() - else: - AggregatorLog(feed=feed, success=False, - info="Feed returned redirect (http 301)").save() - elif isinstance(results, Exception): - AggregatorLog(feed=feed, - success=False, - info=results).save() - else: - if feed.approved: - had_entries = True - else: - had_entries = feed.has_entries - entries = 0 - titles = [] - ids = [] - - for entry in results: - self.trace("Found entry at %s" % entry.link) - # Entry is a post, but we need to check if it's already there. Check - # is done on guid. Some blogs use http and https in the guid, and - # also change between them depending on how the blog is fetched, - # so check for those two explicitly. - if 'http://' in entry.guid: - alternateguid = entry.guid.replace('http://', 'https://') - elif 'https://' in entry.guid: - alternateguid = entry.guid.replace('https://', 'http://') - else: - alternateguid = None - # We check if this entry has been syndicated on any *other* blog as well, - # so we don't accidentally post something more than once. - if not Post.objects.filter(Q(guid=entry.guid) | Q(guid=alternateguid)).exists(): - self.trace("Saving entry at %s" % entry.link) - entry.save() - entry.update_shortlink() - AggregatorLog(feed=feed, - success=True, - info="Fetched entry at '%s'" % entry.link).save() - entries += 1 - titles.append(entry.title) - ids.append(entry.pk) - total_entries += 1 - else: - self.trace("Skipping entry: %s" % entry.link) - - if entries > 0 and feed.approved: - # If we picked "too many" entries, this might indicate a misconfigured blog that - # stopped doing it's filtering correctly. - if entries > settings.MAX_SAFE_ENTRIES_PER_FETCH: - self.trace("{0} new entries for {1}, >{2}, hiding".format( - entries, feed.feedurl, settings.MAX_SAFE_ENTRIES_PER_FETCH)) - Post.objects.filter(id__in=ids).update(hidden=True) - # Email a notification that they were picked up - send_simple_mail(settings.EMAIL_SENDER, - feed.user.email, - "Many posts found at your blog at Planet PostgreSQL", - "The blog aggregator at Planet PostgreSQL has just picked up the following\nposts from your blog at {0}:\n\n{1}\n\nSince this is a large number of posts, they have been fetched\nand marked as hidden, to avoid possible duplicates.\n\nPlease go to https://planet.postgresql.org/register/edit/{2}\nand confirm (by unhiding) which of these should be posted.\n\nThank you!\n\n".format( - feed.blogurl, - "\n".join(["* " + t for t in titles]), - feed.id), - sendername="Planet PostgreSQL", - receivername="{0} {1}".format(feed.user.first_name, feed.user.last_name), - ) - send_simple_mail(settings.EMAIL_SENDER, - settings.NOTIFICATION_RECEIVER, - "Excessive posts from feed on Planet PostgreSQL", - "The blog at {0} by {1}\nreceived {2} new posts in a single fetch.\nAs this may be incorrect, the posts have been marked as hidden.\nThe author may individually mark them as visible depending on\nprevious posts, and has been sent a notification about this.".format(feed.feedurl, feed.user, len(ids)), - sendername="Planet PostgreSQL", - receivername="Planet PostgreSQL Moderators", - ) - else: - # Email a notification that they were picked up - send_simple_mail(settings.EMAIL_SENDER, - feed.user.email, - "Posts found at your blog at Planet PostgreSQL", - "The blog aggregator at Planet PostgreSQL has just picked up the following\nposts from your blog at {0}:\n\n{1}\n\nIf these entries are correct, you don't have to do anything.\nIf any entry should not be there, head over to\n\nhttps://planet.postgresql.org/register/edit/{2}/\n\nand click the 'Hide' button for those entries as soon\nas possible.\n\nThank you!\n\n".format( - feed.blogurl, - "\n".join(["* " + t for t in titles]), - feed.id), - sendername="Planet PostgreSQL", - receivername="{0} {1}".format(feed.user.first_name, feed.user.last_name), - ) - - if entries > 0 and not had_entries: - # Entries showed up on a blog that was previously empty - send_simple_mail(settings.EMAIL_SENDER, - settings.NOTIFICATION_RECEIVER, - "A blog was added to Planet PostgreSQL", - "The blog at {0} by {1}\nwas added to Planet PostgreSQL, and has now received entries.\n\nTo moderate: https://planet.postgresql.org/register/moderate/\n\n".format(feed.feedurl, feed.user), - sendername="Planet PostgreSQL", - receivername="Planet PostgreSQL Moderators", - ) - - # If the blog URL changed, update it as requested - if getattr(feed, 'new_blogurl', None): - self.trace("URL changed for %s to %s" % (feed.feedurl, feed.new_blogurl)) - send_simple_mail(settings.EMAIL_SENDER, - settings.NOTIFICATION_RECEIVER, - "A blog url changed on Planet PostgreSQL", - "When checking the blog at {0} by {1}\nthe blog URL was updated to:\n{2}\n(from previous value {3})\n\nTo moderate: https://planet.postgresql.org/register/moderate/\n\n".format(feed.feedurl, feed.user, feed.new_blogurl, feed.blogurl), - sendername="Planet PostgreSQL", - receivername="Planet PostgreSQL Moderators", - ) - send_simple_mail(settings.EMAIL_SENDER, - feed.user.email, - "URL of your blog at Planet PostgreSQL updated", - "The blog aggregator at Planet PostgreSQL has update the URL of your blog\nwith the feed at {0} to:\n{1} (from {2})\nIf this is correct, you don't have to do anything.\nIf not, please contact planet@postgresql.org\n".format( - feed.feedurl, - feed.new_blogurl, - feed.blogurl, - ), - sendername="Planet PostgreSQL", - receivername="{0} {1}".format(feed.user.first_name, feed.user.last_name), - ) - feed.blogurl = feed.new_blogurl - feed.save() - if self.debug: - # Roll back transaction without error - raise BreakoutException() - except BreakoutException: - self.stderr.write("Rolling back all changes") - pass - - if total_entries > 0 and not self.debug: - purge_root_and_feeds() - - def _fetch_one_feed(self, fetcher): - if self.full: - self.trace("Fetching %s" % fetcher.feed.feedurl) - since = None - else: - since = fetcher.feed.lastget - self.trace("Fetching %s since %s" % (fetcher.feed.feedurl, since)) - try: - entries = list(fetcher.parse(since)) - except ParserGotRedirect as e: - return (fetcher.feed, e) - except Exception as e: - self.stderr.write("Failed to fetch '%s': %s" % (fetcher.feed.feedurl, e)) - return (fetcher.feed, e) - return (fetcher.feed, entries) + help = 'Aggregate one or more feeds' + + def add_arguments(self, parser): + parser.add_argument('--id', type=int, help="Fetch just one feed specified by id") + parser.add_argument('-d', '--debug', action='store_true', help="Enable debug mode, don't save anything") + parser.add_argument('-f', '--full', action='store_true', help="Fetch full feed, regardless of last fetch date") + parser.add_argument('-p', '--parallelism', type=int, default=10, help="Number of parallell requests") + + def trace(self, msg): + if self.verbose: + self.stdout.write(msg) + + def handle(self, *args, **options): + self.verbose = options['verbosity'] > 1 + self.debug = options['debug'] + if self.debug: + self.verbose=True + self.full = options['full'] + + if options['id']: + feeds = Blog.objects.filter(pk=options['id']) + else: + # Fetch all feeds - that are not archived. We do fetch feeds that are not approved, + # to make sure they work. + feeds = Blog.objects.filter(archived=False) + + # Fan out the fetching itself + fetchers = [FeedFetcher(f, self.trace) for f in feeds] + num = len(fetchers) + pool = ThreadPool(options['parallelism']) + pr = pool.map_async(self._fetch_one_feed, fetchers) + while not pr.ready(): + gevent.sleep(1) + self.trace("Fetching feeds (%s/%s done), please wait..." % (num-pool.task_queue.unfinished_tasks, num)) + + total_entries = 0 + # Fetching was async, but results processing will be sync. Don't want to deal with + # multithreaded database connections and such complications. + try: + with transaction.atomic(): + for feed, results in pr.get(): + if isinstance(results, ParserGotRedirect): + # Received a redirect. If this is a redirect for exactly the same URL just + # from http to https, special case this and allow it. For any other redirect, + # we don't follow it since it might no longer be a properly filtered feed + # for example. + if results.url == feed.feedurl: + # Redirect to itself! Should never happen, of course. + AggregatorLog(feed=feed, success=False, + info="Feed returned redirect loop to itself!").save() + elif results.url == feed.feedurl.replace('http://', 'https://'): + # OK, update it! + AggregatorLog(feed=feed, success=True, + info="Feed returned redirect to https, updating registration").save() + send_simple_mail(settings.EMAIL_SENDER, + feed.user.email, + "Your blog at Planet PostgreSQL redirected", + "The blog aggregator at Planet PostgreSQL has picked up a redirect for your blog.\nOld URL: {0}\nNew URL: {1}\n\nThe database has been updated, and new entries will be fetched from the secure URL in the future.\n".format(feed.feedurl, results.url), + sendername="Planet PostgreSQL", + receivername="{0} {1}".format(feed.user.first_name, feed.user.last_name), + ) + send_simple_mail(settings.EMAIL_SENDER, + settings.NOTIFICATION_RECEIVER, + "Blog redirect detected on Planet PostgreSQL", + "The blog at {0} by {1}\nis returning a redirect to a https version of itself.\n\nThe database has automatically been updated, and will start fetching using https in the future,\n\n".format(feed.feedurl, feed.user), + sendername="Planet PostgreSQL", + receivername="Planet PostgreSQL Moderators", + ) + feed.feedurl = results.url + feed.save() + else: + AggregatorLog(feed=feed, success=False, + info="Feed returned redirect (http 301)").save() + elif isinstance(results, Exception): + AggregatorLog(feed=feed, + success=False, + info=results).save() + else: + if feed.approved: + had_entries = True + else: + had_entries = feed.has_entries + entries = 0 + titles = [] + ids = [] + + for entry in results: + self.trace("Found entry at %s" % entry.link) + # Entry is a post, but we need to check if it's already there. Check + # is done on guid. Some blogs use http and https in the guid, and + # also change between them depending on how the blog is fetched, + # so check for those two explicitly. + if 'http://' in entry.guid: + alternateguid = entry.guid.replace('http://', 'https://') + elif 'https://' in entry.guid: + alternateguid = entry.guid.replace('https://', 'http://') + else: + alternateguid = None + # We check if this entry has been syndicated on any *other* blog as well, + # so we don't accidentally post something more than once. + if not Post.objects.filter(Q(guid=entry.guid) | Q(guid=alternateguid)).exists(): + self.trace("Saving entry at %s" % entry.link) + entry.save() + entry.update_shortlink() + AggregatorLog(feed=feed, + success=True, + info="Fetched entry at '%s'" % entry.link).save() + entries += 1 + titles.append(entry.title) + ids.append(entry.pk) + total_entries += 1 + else: + self.trace("Skipping entry: %s" % entry.link) + + if entries > 0 and feed.approved: + # If we picked "too many" entries, this might indicate a misconfigured blog that + # stopped doing it's filtering correctly. + if entries > settings.MAX_SAFE_ENTRIES_PER_FETCH: + self.trace("{0} new entries for {1}, >{2}, hiding".format( + entries, feed.feedurl, settings.MAX_SAFE_ENTRIES_PER_FETCH)) + Post.objects.filter(id__in=ids).update(hidden=True) + # Email a notification that they were picked up + send_simple_mail(settings.EMAIL_SENDER, + feed.user.email, + "Many posts found at your blog at Planet PostgreSQL", + "The blog aggregator at Planet PostgreSQL has just picked up the following\nposts from your blog at {0}:\n\n{1}\n\nSince this is a large number of posts, they have been fetched\nand marked as hidden, to avoid possible duplicates.\n\nPlease go to https://planet.postgresql.org/register/edit/{2}\nand confirm (by unhiding) which of these should be posted.\n\nThank you!\n\n".format( + feed.blogurl, + "\n".join(["* " + t for t in titles]), + feed.id), + sendername="Planet PostgreSQL", + receivername="{0} {1}".format(feed.user.first_name, feed.user.last_name), + ) + send_simple_mail(settings.EMAIL_SENDER, + settings.NOTIFICATION_RECEIVER, + "Excessive posts from feed on Planet PostgreSQL", + "The blog at {0} by {1}\nreceived {2} new posts in a single fetch.\nAs this may be incorrect, the posts have been marked as hidden.\nThe author may individually mark them as visible depending on\nprevious posts, and has been sent a notification about this.".format(feed.feedurl, feed.user, len(ids)), + sendername="Planet PostgreSQL", + receivername="Planet PostgreSQL Moderators", + ) + else: + # Email a notification that they were picked up + send_simple_mail(settings.EMAIL_SENDER, + feed.user.email, + "Posts found at your blog at Planet PostgreSQL", + "The blog aggregator at Planet PostgreSQL has just picked up the following\nposts from your blog at {0}:\n\n{1}\n\nIf these entries are correct, you don't have to do anything.\nIf any entry should not be there, head over to\n\nhttps://planet.postgresql.org/register/edit/{2}/\n\nand click the 'Hide' button for those entries as soon\nas possible.\n\nThank you!\n\n".format( + feed.blogurl, + "\n".join(["* " + t for t in titles]), + feed.id), + sendername="Planet PostgreSQL", + receivername="{0} {1}".format(feed.user.first_name, feed.user.last_name), + ) + + if entries > 0 and not had_entries: + # Entries showed up on a blog that was previously empty + send_simple_mail(settings.EMAIL_SENDER, + settings.NOTIFICATION_RECEIVER, + "A blog was added to Planet PostgreSQL", + "The blog at {0} by {1}\nwas added to Planet PostgreSQL, and has now received entries.\n\nTo moderate: https://planet.postgresql.org/register/moderate/\n\n".format(feed.feedurl, feed.user), + sendername="Planet PostgreSQL", + receivername="Planet PostgreSQL Moderators", + ) + + # If the blog URL changed, update it as requested + if getattr(feed, 'new_blogurl', None): + self.trace("URL changed for %s to %s" % (feed.feedurl, feed.new_blogurl)) + send_simple_mail(settings.EMAIL_SENDER, + settings.NOTIFICATION_RECEIVER, + "A blog url changed on Planet PostgreSQL", + "When checking the blog at {0} by {1}\nthe blog URL was updated to:\n{2}\n(from previous value {3})\n\nTo moderate: https://planet.postgresql.org/register/moderate/\n\n".format(feed.feedurl, feed.user, feed.new_blogurl, feed.blogurl), + sendername="Planet PostgreSQL", + receivername="Planet PostgreSQL Moderators", + ) + send_simple_mail(settings.EMAIL_SENDER, + feed.user.email, + "URL of your blog at Planet PostgreSQL updated", + "The blog aggregator at Planet PostgreSQL has update the URL of your blog\nwith the feed at {0} to:\n{1} (from {2})\nIf this is correct, you don't have to do anything.\nIf not, please contact planet@postgresql.org\n".format( + feed.feedurl, + feed.new_blogurl, + feed.blogurl, + ), + sendername="Planet PostgreSQL", + receivername="{0} {1}".format(feed.user.first_name, feed.user.last_name), + ) + feed.blogurl = feed.new_blogurl + feed.save() + if self.debug: + # Roll back transaction without error + raise BreakoutException() + except BreakoutException: + self.stderr.write("Rolling back all changes") + pass + + if total_entries > 0 and not self.debug: + purge_root_and_feeds() + + def _fetch_one_feed(self, fetcher): + if self.full: + self.trace("Fetching %s" % fetcher.feed.feedurl) + since = None + else: + since = fetcher.feed.lastget + self.trace("Fetching %s since %s" % (fetcher.feed.feedurl, since)) + try: + entries = list(fetcher.parse(since)) + except ParserGotRedirect as e: + return (fetcher.feed, e) + except Exception as e: + self.stderr.write("Failed to fetch '%s': %s" % (fetcher.feed.feedurl, e)) + return (fetcher.feed, e) + return (fetcher.feed, entries) diff --git a/hamnadmin/hamnadmin/register/management/commands/delete_old_logs.py b/hamnadmin/hamnadmin/register/management/commands/delete_old_logs.py index e482bcd..09e9d87 100644 --- a/hamnadmin/hamnadmin/register/management/commands/delete_old_logs.py +++ b/hamnadmin/hamnadmin/register/management/commands/delete_old_logs.py @@ -7,11 +7,11 @@ from datetime import timedelta LOG_KEEP_DAYS=300 class Command(BaseCommand): - help = "Delete old logs" + help = "Delete old logs" - def handle(self, *args, **options): - with transaction.atomic(): - curs = connection.cursor() - curs.execute("DELETE FROM aggregatorlog WHERE ts < NOW()-%(age)s", { - 'age': timedelta(days=LOG_KEEP_DAYS), - }) + def handle(self, *args, **options): + with transaction.atomic(): + curs = connection.cursor() + curs.execute("DELETE FROM aggregatorlog WHERE ts < NOW()-%(age)s", { + 'age': timedelta(days=LOG_KEEP_DAYS), + }) diff --git a/hamnadmin/hamnadmin/register/management/commands/send_logs.py b/hamnadmin/hamnadmin/register/management/commands/send_logs.py index 99993d2..6ba62b1 100644 --- a/hamnadmin/hamnadmin/register/management/commands/send_logs.py +++ b/hamnadmin/hamnadmin/register/management/commands/send_logs.py @@ -12,21 +12,21 @@ from hamnadmin.register.models import Blog THRESHOLD=20 class Command(BaseCommand): - help = "Send planet aggregation logs to blog owners" - - def handle(self, *args, **options): - with transaction.atomic(): - for feed in Blog.objects.filter(archived=False, - aggregatorlog__success=False, - aggregatorlog__ts__gt=datetime.now()-timedelta(days=1), - ).annotate( - num=Count("aggregatorlog__id") - ).filter(num__gt=THRESHOLD).order_by(): - # We assume this is only run once a day, so just generate one email - send_simple_mail(settings.EMAIL_SENDER, - feed.user.email, - "Errors retreiving your feed for Planet PostgreSQL", - """Your blog aggregated to Planet PostgreSQL with feed URL + help = "Send planet aggregation logs to blog owners" + + def handle(self, *args, **options): + with transaction.atomic(): + for feed in Blog.objects.filter(archived=False, + aggregatorlog__success=False, + aggregatorlog__ts__gt=datetime.now()-timedelta(days=1), + ).annotate( + num=Count("aggregatorlog__id") + ).filter(num__gt=THRESHOLD).order_by(): + # We assume this is only run once a day, so just generate one email + send_simple_mail(settings.EMAIL_SENDER, + feed.user.email, + "Errors retreiving your feed for Planet PostgreSQL", + """Your blog aggregated to Planet PostgreSQL with feed URL {0} @@ -48,9 +48,9 @@ A message like this will be sent once a day as long as your blog is generating more than {3} errors per day. """.format(feed.feedurl, - feed.num, - feed.id, - THRESHOLD), - sendername="Planet PostgreSQL", - receivername="{0} {1}".format(feed.user.first_name, feed.user.last_name), - ) + feed.num, + feed.id, + THRESHOLD), + sendername="Planet PostgreSQL", + receivername="{0} {1}".format(feed.user.first_name, feed.user.last_name), + ) diff --git a/hamnadmin/hamnadmin/register/migrations/0003_user_foreign_key.py b/hamnadmin/hamnadmin/register/migrations/0003_user_foreign_key.py index 0c4a2a3..4bf958f 100644 --- a/hamnadmin/hamnadmin/register/migrations/0003_user_foreign_key.py +++ b/hamnadmin/hamnadmin/register/migrations/0003_user_foreign_key.py @@ -19,12 +19,12 @@ class Migration(migrations.Migration): field=models.ForeignKey(to=settings.AUTH_USER_MODEL, null=True, on_delete=models.CASCADE), preserve_default=False, ), - migrations.RunSQL("UPDATE feeds SET user_id=(SELECT id FROM auth_user WHERE auth_user.username=userid)"), - migrations.AlterField( - model_name='blog', - name='user', - field=models.ForeignKey(to=settings.AUTH_USER_MODEL, on_delete=models.CASCADE), - ), + migrations.RunSQL("UPDATE feeds SET user_id=(SELECT id FROM auth_user WHERE auth_user.username=userid)"), + migrations.AlterField( + model_name='blog', + name='user', + field=models.ForeignKey(to=settings.AUTH_USER_MODEL, on_delete=models.CASCADE), + ), migrations.RemoveField( model_name='blog', name='userid', diff --git a/hamnadmin/hamnadmin/register/models.py b/hamnadmin/hamnadmin/register/models.py index d0d4b00..f598845 100644 --- a/hamnadmin/hamnadmin/register/models.py +++ b/hamnadmin/hamnadmin/register/models.py @@ -5,136 +5,136 @@ from datetime import datetime, timedelta from hamnadmin.util.shortlink import urlvalmap class Team(models.Model): - teamurl = models.CharField(max_length=255, blank=False) - name = models.CharField(max_length=255, blank=False) - manager = models.ForeignKey(User, null=True, blank=True, on_delete=models.CASCADE) + teamurl = models.CharField(max_length=255, blank=False) + name = models.CharField(max_length=255, blank=False) + manager = models.ForeignKey(User, null=True, blank=True, on_delete=models.CASCADE) - def __str__(self): - return "%s (%s)" % (self.name, self.teamurl) + def __str__(self): + return "%s (%s)" % (self.name, self.teamurl) - class Meta: - db_table = 'teams' - ordering = ['name', ] + class Meta: + db_table = 'teams' + ordering = ['name', ] - class Admin: - pass + class Admin: + pass - @property - def all_blogs(self): - return self.blog_set.filter(approved=True, archived=False) + @property + def all_blogs(self): + return self.blog_set.filter(approved=True, archived=False) class Blog(models.Model): - feedurl = models.CharField(max_length=255, blank=False) - name = models.CharField(max_length=255, blank=False) - blogurl = models.CharField(max_length=255, blank=False) - lastget = models.DateTimeField(default=datetime(2000,1,1)) - user = models.ForeignKey(User, null=False, blank=False, on_delete=models.CASCADE) - approved = models.BooleanField(default=False) - archived = models.BooleanField(default=False) - authorfilter = models.CharField(max_length=255,default='',blank=True) - team = models.ForeignKey(Team,db_column='team', blank=True, null=True, on_delete=models.CASCADE) - twitteruser = models.CharField(max_length=255, default='', blank=True) - excludestats = models.BooleanField(null=False, blank=False, default=False) - - # Things that may change - new_blogurl = None - - def __str__(self): - return "%s (%s)" % (self.name, self.feedurl) - - @property - def email(self): - return self.user.email - - @property - def recent_failures(self): - return self.aggregatorlog_set.filter(success=False, ts__gt=datetime.now()-timedelta(days=1)).count() - - @property - def has_entries(self): - return self.posts.filter(hidden=False).exists() - - @property - def latestentry(self): - try: - return self.posts.filter(hidden=False)[0] - except: - return None - - @property - def recent_entries(self): - return self.posts.order_by('-dat')[:10] - - class Meta: - db_table = 'feeds' - ordering = ['approved','name'] - - class Admin: - pass + feedurl = models.CharField(max_length=255, blank=False) + name = models.CharField(max_length=255, blank=False) + blogurl = models.CharField(max_length=255, blank=False) + lastget = models.DateTimeField(default=datetime(2000,1,1)) + user = models.ForeignKey(User, null=False, blank=False, on_delete=models.CASCADE) + approved = models.BooleanField(default=False) + archived = models.BooleanField(default=False) + authorfilter = models.CharField(max_length=255,default='',blank=True) + team = models.ForeignKey(Team,db_column='team', blank=True, null=True, on_delete=models.CASCADE) + twitteruser = models.CharField(max_length=255, default='', blank=True) + excludestats = models.BooleanField(null=False, blank=False, default=False) + + # Things that may change + new_blogurl = None + + def __str__(self): + return "%s (%s)" % (self.name, self.feedurl) + + @property + def email(self): + return self.user.email + + @property + def recent_failures(self): + return self.aggregatorlog_set.filter(success=False, ts__gt=datetime.now()-timedelta(days=1)).count() + + @property + def has_entries(self): + return self.posts.filter(hidden=False).exists() + + @property + def latestentry(self): + try: + return self.posts.filter(hidden=False)[0] + except: + return None + + @property + def recent_entries(self): + return self.posts.order_by('-dat')[:10] + + class Meta: + db_table = 'feeds' + ordering = ['approved','name'] + + class Admin: + pass class Post(models.Model): - feed = models.ForeignKey(Blog,db_column='feed',related_name='posts', on_delete=models.CASCADE) - guid = models.CharField(max_length=255) - link = models.CharField(max_length=255) - txt = models.TextField() - dat = models.DateTimeField() - title = models.CharField(max_length=255) - guidisperma = models.BooleanField(default=False) - hidden = models.BooleanField(default=False) - twittered = models.BooleanField(default=False) - shortlink = models.CharField(max_length=255) - - def __str__(self): - return self.title - - class Meta: - db_table = 'posts' - ordering = ['-dat'] - unique_together = [ - ('id', 'guid'), - ] - - class Admin: - pass - - def update_shortlink(self): - self.shortlink = self._get_shortlink() - self.save() - - def _get_shortlink(self): - s = "" - i = self.id - while i > 0: - s = urlvalmap[i % 64] + s - i //= 64 - return "https://postgr.es/p/%s" % s + feed = models.ForeignKey(Blog,db_column='feed',related_name='posts', on_delete=models.CASCADE) + guid = models.CharField(max_length=255) + link = models.CharField(max_length=255) + txt = models.TextField() + dat = models.DateTimeField() + title = models.CharField(max_length=255) + guidisperma = models.BooleanField(default=False) + hidden = models.BooleanField(default=False) + twittered = models.BooleanField(default=False) + shortlink = models.CharField(max_length=255) + + def __str__(self): + return self.title + + class Meta: + db_table = 'posts' + ordering = ['-dat'] + unique_together = [ + ('id', 'guid'), + ] + + class Admin: + pass + + def update_shortlink(self): + self.shortlink = self._get_shortlink() + self.save() + + def _get_shortlink(self): + s = "" + i = self.id + while i > 0: + s = urlvalmap[i % 64] + s + i //= 64 + return "https://postgr.es/p/%s" % s class AuditEntry(models.Model): - logtime = models.DateTimeField(default=datetime.now) - user = models.CharField(max_length=32) - logtxt = models.CharField(max_length=1024) - - def __init__(self, username, txt): - super(AuditEntry, self).__init__() - self.user = username - self.logtxt = txt - - def __str__(self): - return "%s (%s): %s" % (self.logtime, self.user, self.logtxt) - - class Meta: - db_table = 'auditlog' - ordering = ['logtime'] - + logtime = models.DateTimeField(default=datetime.now) + user = models.CharField(max_length=32) + logtxt = models.CharField(max_length=1024) + + def __init__(self, username, txt): + super(AuditEntry, self).__init__() + self.user = username + self.logtxt = txt + + def __str__(self): + return "%s (%s): %s" % (self.logtime, self.user, self.logtxt) + + class Meta: + db_table = 'auditlog' + ordering = ['logtime'] + class AggregatorLog(models.Model): - ts = models.DateTimeField(auto_now=True) - feed = models.ForeignKey(Blog, db_column='feed', on_delete=models.CASCADE) - success = models.BooleanField() - info = models.TextField() - - class Meta: - db_table = 'aggregatorlog' - ordering = ['-ts'] - - def __str__(self): - return "Log entry for %s (%s)" % (self.feed.name, self.ts) + ts = models.DateTimeField(auto_now=True) + feed = models.ForeignKey(Blog, db_column='feed', on_delete=models.CASCADE) + success = models.BooleanField() + info = models.TextField() + + class Meta: + db_table = 'aggregatorlog' + ordering = ['-ts'] + + def __str__(self): + return "Log entry for %s (%s)" % (self.feed.name, self.ts) diff --git a/hamnadmin/hamnadmin/register/templatetags/hamn.py b/hamnadmin/hamnadmin/register/templatetags/hamn.py index cfd508e..0055650 100644 --- a/hamnadmin/hamnadmin/register/templatetags/hamn.py +++ b/hamnadmin/hamnadmin/register/templatetags/hamn.py @@ -9,7 +9,7 @@ register = template.Library() @register.filter(name='postcontents') @stringfilter def postcontents(value): - try: - return mark_safe(TruncateAndClean(value)) - except Exception as e: - return "Unable to clean HTML" + try: + return mark_safe(TruncateAndClean(value)) + except Exception as e: + return "Unable to clean HTML" diff --git a/hamnadmin/hamnadmin/register/views.py b/hamnadmin/hamnadmin/register/views.py index 517d331..8312565 100644 --- a/hamnadmin/hamnadmin/register/views.py +++ b/hamnadmin/hamnadmin/register/views.py @@ -19,320 +19,320 @@ from .forms import BlogEditForm, ModerateRejectForm # Public planet def planet_home(request): - statdate = datetime.datetime.now() - datetime.timedelta(days=61) - posts = Post.objects.filter(hidden=False, feed__approved=True).order_by('-dat')[:30] - topposters = Blog.objects.filter(approved=True, excludestats=False, posts__hidden=False, posts__dat__gt=statdate).annotate(numposts=Count('posts__id')).order_by('-numposts')[:10] - topteams = Team.objects.filter(blog__approved=True, blog__excludestats=False, blog__posts__hidden=False, blog__posts__dat__gt=statdate).annotate(numposts=Count('blog__posts__id')).order_by('-numposts')[:10] - return render(request, 'index.tmpl', { - 'posts': posts, - 'topposters': topposters, - 'topteams': topteams, - }) + statdate = datetime.datetime.now() - datetime.timedelta(days=61) + posts = Post.objects.filter(hidden=False, feed__approved=True).order_by('-dat')[:30] + topposters = Blog.objects.filter(approved=True, excludestats=False, posts__hidden=False, posts__dat__gt=statdate).annotate(numposts=Count('posts__id')).order_by('-numposts')[:10] + topteams = Team.objects.filter(blog__approved=True, blog__excludestats=False, blog__posts__hidden=False, blog__posts__dat__gt=statdate).annotate(numposts=Count('blog__posts__id')).order_by('-numposts')[:10] + return render(request, 'index.tmpl', { + 'posts': posts, + 'topposters': topposters, + 'topteams': topteams, + }) def planet_feeds(request): - return render(request, 'feeds.tmpl', { - 'feeds': Blog.objects.filter(approved=True, archived=False), - 'teams': Team.objects.filter(blog__approved=True).distinct().order_by('name'), - }) + return render(request, 'feeds.tmpl', { + 'feeds': Blog.objects.filter(approved=True, archived=False), + 'teams': Team.objects.filter(blog__approved=True).distinct().order_by('name'), + }) def planet_add(request): - return render(request, 'add.tmpl', { - }) + return render(request, 'add.tmpl', { + }) # Registration interface (login and all) def issuperuser(user): - return user.is_authenticated and user.is_superuser + return user.is_authenticated and user.is_superuser @login_required def root(request): - if request.user.is_superuser and 'admin' in request.GET and request.GET['admin'] == '1': - blogs = Blog.objects.all().order_by('archived', 'approved', 'name') - else: - blogs = Blog.objects.filter(user=request.user).order_by('archived', 'approved', 'name') - return render(request, 'index.html',{ - 'blogs': blogs, - 'teams': Team.objects.filter(manager=request.user).order_by('name'), - 'title': 'Your blogs', - }) + if request.user.is_superuser and 'admin' in request.GET and request.GET['admin'] == '1': + blogs = Blog.objects.all().order_by('archived', 'approved', 'name') + else: + blogs = Blog.objects.filter(user=request.user).order_by('archived', 'approved', 'name') + return render(request, 'index.html',{ + 'blogs': blogs, + 'teams': Team.objects.filter(manager=request.user).order_by('name'), + 'title': 'Your blogs', + }) @login_required @transaction.atomic def edit(request, id=None): - if id: - if request.user.is_superuser: - blog = get_object_or_404(Blog, id=id) - else: - blog = get_object_or_404(Blog, id=id, user=request.user) - else: - blog = Blog(user=request.user, name = "{0} {1}".format(request.user.first_name, request.user.last_name)) - - if request.method == 'POST': - saved_url = blog.feedurl - saved_filter = blog.authorfilter - saved_team = blog.team - form = BlogEditForm(request, data=request.POST, instance=blog) - if form.is_valid(): - if id: - # This is an existing one. If we change the URL of the blog, it needs to be - # de-moderated if it was previously approved. - if blog.approved: - if saved_url != form.cleaned_data['feedurl'] or saved_filter != form.cleaned_data['authorfilter']: - obj = form.save() - obj.approved = False - obj.save() - - send_simple_mail(settings.EMAIL_SENDER, - settings.NOTIFICATION_RECEIVER, - "A blog was edited on Planet PostgreSQL", - "The blog at {0}\nwas edited by {1} in a way that needs new moderation.\n\nTo moderate: https://planet.postgresql.org/register/moderate/\n\n".format(blog.feedurl, blog.user), - sendername="Planet PostgreSQL", - receivername="Planet PostgreSQL Moderators", - ) - - messages.warning(request, "Blog has been resubmitted for moderation, and is temporarily disabled.") - - purge_root_and_feeds() - purge_url('/feeds.html') - - return HttpResponseRedirect("/register/edit/{0}/".format(obj.id)) - - obj = form.save() - - if obj.team and obj.team != saved_team: - # We allow anybody to join a team by default, and will just send a notice - # so the team manager can undo it. - send_simple_mail(settings.EMAIL_SENDER, - obj.team.manager.email, - "A blog joined your team on Planet PostgreSQL", - "The blog at {0} by {1} {2}\nhas been added to your team {3} on Planet PostgreSQL\n\nIf this is correct, you do not need to do anything.\n\nIf this is incorrect, please go to\n\nhttps://planet.postgresql.org/register/\n\nand click the button to remove the blog from your team.\nWe apologize if this causes work for you.\n\n".format( - obj.feedurl, - obj.user.first_name, obj.user.last_name, - obj.team.name), - sendername="Planet PostgreSQL", - receivername="{0} {1}".format(obj.team.manager.first_name, obj.team.manager.last_name), - ) - - return HttpResponseRedirect("/register/edit/{0}/".format(obj.id)) - else: - form = BlogEditForm(request, instance=blog) - - return render(request, 'edit.html', { - 'new': id is None, - 'form': form, - 'blog': blog, - 'log': AggregatorLog.objects.filter(feed=blog).order_by('-ts')[:30], - 'posts': Post.objects.filter(feed=blog).order_by('-dat')[:10], - 'title': 'Edit blog: %s' % blog.name, - }) + if id: + if request.user.is_superuser: + blog = get_object_or_404(Blog, id=id) + else: + blog = get_object_or_404(Blog, id=id, user=request.user) + else: + blog = Blog(user=request.user, name = "{0} {1}".format(request.user.first_name, request.user.last_name)) + + if request.method == 'POST': + saved_url = blog.feedurl + saved_filter = blog.authorfilter + saved_team = blog.team + form = BlogEditForm(request, data=request.POST, instance=blog) + if form.is_valid(): + if id: + # This is an existing one. If we change the URL of the blog, it needs to be + # de-moderated if it was previously approved. + if blog.approved: + if saved_url != form.cleaned_data['feedurl'] or saved_filter != form.cleaned_data['authorfilter']: + obj = form.save() + obj.approved = False + obj.save() + + send_simple_mail(settings.EMAIL_SENDER, + settings.NOTIFICATION_RECEIVER, + "A blog was edited on Planet PostgreSQL", + "The blog at {0}\nwas edited by {1} in a way that needs new moderation.\n\nTo moderate: https://planet.postgresql.org/register/moderate/\n\n".format(blog.feedurl, blog.user), + sendername="Planet PostgreSQL", + receivername="Planet PostgreSQL Moderators", + ) + + messages.warning(request, "Blog has been resubmitted for moderation, and is temporarily disabled.") + + purge_root_and_feeds() + purge_url('/feeds.html') + + return HttpResponseRedirect("/register/edit/{0}/".format(obj.id)) + + obj = form.save() + + if obj.team and obj.team != saved_team: + # We allow anybody to join a team by default, and will just send a notice + # so the team manager can undo it. + send_simple_mail(settings.EMAIL_SENDER, + obj.team.manager.email, + "A blog joined your team on Planet PostgreSQL", + "The blog at {0} by {1} {2}\nhas been added to your team {3} on Planet PostgreSQL\n\nIf this is correct, you do not need to do anything.\n\nIf this is incorrect, please go to\n\nhttps://planet.postgresql.org/register/\n\nand click the button to remove the blog from your team.\nWe apologize if this causes work for you.\n\n".format( + obj.feedurl, + obj.user.first_name, obj.user.last_name, + obj.team.name), + sendername="Planet PostgreSQL", + receivername="{0} {1}".format(obj.team.manager.first_name, obj.team.manager.last_name), + ) + + return HttpResponseRedirect("/register/edit/{0}/".format(obj.id)) + else: + form = BlogEditForm(request, instance=blog) + + return render(request, 'edit.html', { + 'new': id is None, + 'form': form, + 'blog': blog, + 'log': AggregatorLog.objects.filter(feed=blog).order_by('-ts')[:30], + 'posts': Post.objects.filter(feed=blog).order_by('-dat')[:10], + 'title': 'Edit blog: %s' % blog.name, + }) @login_required @transaction.atomic def delete(request, id): - if request.user.is_superuser: - blog = get_object_or_404(Blog, id=id) - else: - blog = get_object_or_404(Blog, id=id, user=request.user) - - send_simple_mail(settings.EMAIL_SENDER, - settings.NOTIFICATION_RECEIVER, - "A blog was deleted on Planet PostgreSQL", - "The blog at {0} by {1}\nwas deleted by {2}\n\n".format(blog.feedurl, blog.name, request.user.username), - sendername="Planet PostgreSQL", - receivername="Planet PostgreSQL Moderators", - ) - blog.delete() - messages.info(request, "Blog deleted.") - purge_root_and_feeds() - purge_url('/feeds.html') - return HttpResponseRedirect("/register/") + if request.user.is_superuser: + blog = get_object_or_404(Blog, id=id) + else: + blog = get_object_or_404(Blog, id=id, user=request.user) + + send_simple_mail(settings.EMAIL_SENDER, + settings.NOTIFICATION_RECEIVER, + "A blog was deleted on Planet PostgreSQL", + "The blog at {0} by {1}\nwas deleted by {2}\n\n".format(blog.feedurl, blog.name, request.user.username), + sendername="Planet PostgreSQL", + receivername="Planet PostgreSQL Moderators", + ) + blog.delete() + messages.info(request, "Blog deleted.") + purge_root_and_feeds() + purge_url('/feeds.html') + return HttpResponseRedirect("/register/") @login_required @transaction.atomic def archive(request, id): - if request.user.is_superuser: - blog = get_object_or_404(Blog, id=id) - else: - blog = get_object_or_404(Blog, id=id, user=request.user) - - send_simple_mail(settings.EMAIL_SENDER, - settings.NOTIFICATION_RECEIVER, - "A blog was archived on Planet PostgreSQL", - "The blog at {0} by {1}\nwas archived by {2}\n\n".format(blog.feedurl, blog.name, request.user.username), - sendername="Planet PostgreSQL", - receivername="Planet PostgreSQL Moderators", - ) - blog.archived = True - blog.save() - messages.info(request, "Blog archived.") - return HttpResponseRedirect("/register/") + if request.user.is_superuser: + blog = get_object_or_404(Blog, id=id) + else: + blog = get_object_or_404(Blog, id=id, user=request.user) + + send_simple_mail(settings.EMAIL_SENDER, + settings.NOTIFICATION_RECEIVER, + "A blog was archived on Planet PostgreSQL", + "The blog at {0} by {1}\nwas archived by {2}\n\n".format(blog.feedurl, blog.name, request.user.username), + sendername="Planet PostgreSQL", + receivername="Planet PostgreSQL Moderators", + ) + blog.archived = True + blog.save() + messages.info(request, "Blog archived.") + return HttpResponseRedirect("/register/") @login_required @transaction.atomic def remove_from_team(request, teamid, blogid): - team = get_object_or_404(Team, id=teamid, manager=request.user) - blog = get_object_or_404(Blog, id=blogid) - - if blog.team != team: - messages.error(request, "The blog at {0} does not (any more?) belong to the team {1}!".format( - blog.feedurl, - team.name)) - return HttpResponseRedirect("/register/") - - blog.team = None - blog.save() - - send_simple_mail(settings.EMAIL_SENDER, - settings.NOTIFICATION_RECEIVER, - "A blog was removed from a team on Planet PostgreSQL", - "The blog at {0} by {1} {2}\nwas removed from team {3} by {4}.\n".format( - blog.feedurl, blog.user.first_name, blog.user.last_name, team.name, request.user.username), - sendername="Planet PostgreSQL", - receivername="Planet PostgreSQL Moderators", - ) - - send_simple_mail(settings.EMAIL_SENDER, - blog.user.email, - "Your blog on Planet PostgreSQL was removed from the team", - "Your blog at {0} has been removed\nfrom the team {1} on Planet PostgreSQL.\n\nIf you believe this to be in error, please contact\nthe team administrator.\n\n".format(blog.feedurl, team.name), - sendername="Planet PostgreSQL", - receivername="{0} {1}".format(blog.user.first_name, blog.user.last_name), - ) - - messages.info(request, "Blog {0} removed from team {1}".format(blog.feedurl, team.name)) - return HttpResponseRedirect("/register/") + team = get_object_or_404(Team, id=teamid, manager=request.user) + blog = get_object_or_404(Blog, id=blogid) + + if blog.team != team: + messages.error(request, "The blog at {0} does not (any more?) belong to the team {1}!".format( + blog.feedurl, + team.name)) + return HttpResponseRedirect("/register/") + + blog.team = None + blog.save() + + send_simple_mail(settings.EMAIL_SENDER, + settings.NOTIFICATION_RECEIVER, + "A blog was removed from a team on Planet PostgreSQL", + "The blog at {0} by {1} {2}\nwas removed from team {3} by {4}.\n".format( + blog.feedurl, blog.user.first_name, blog.user.last_name, team.name, request.user.username), + sendername="Planet PostgreSQL", + receivername="Planet PostgreSQL Moderators", + ) + + send_simple_mail(settings.EMAIL_SENDER, + blog.user.email, + "Your blog on Planet PostgreSQL was removed from the team", + "Your blog at {0} has been removed\nfrom the team {1} on Planet PostgreSQL.\n\nIf you believe this to be in error, please contact\nthe team administrator.\n\n".format(blog.feedurl, team.name), + sendername="Planet PostgreSQL", + receivername="{0} {1}".format(blog.user.first_name, blog.user.last_name), + ) + + messages.info(request, "Blog {0} removed from team {1}".format(blog.feedurl, team.name)) + return HttpResponseRedirect("/register/") def __getvalidblogpost(request, blogid, postid): - blog = get_object_or_404(Blog, id=blogid) - post = get_object_or_404(Post, id=postid) - if not blog.user == request.user and not request.user.is_superuser: - raise Exception("You can't view/edit somebody elses blog!") - if not post.feed.id == blog.id: - raise Exception("Blog does not match post") - return post + blog = get_object_or_404(Blog, id=blogid) + post = get_object_or_404(Post, id=postid) + if not blog.user == request.user and not request.user.is_superuser: + raise Exception("You can't view/edit somebody elses blog!") + if not post.feed.id == blog.id: + raise Exception("Blog does not match post") + return post def __setposthide(request, blogid, postid, status): - post = __getvalidblogpost(request, blogid, postid) - post.hidden = status - post.save() - AuditEntry(request.user.username, 'Set post %s on blog %s visibility to %s' % (postid, blogid, status)).save() - messages.info(request, 'Set post "%s" to %s' % (post.title, status and "hidden" or "visible"), extra_tags="top") - purge_root_and_feeds() - return HttpResponseRedirect("/register/edit/{0}/".format(blogid)) + post = __getvalidblogpost(request, blogid, postid) + post.hidden = status + post.save() + AuditEntry(request.user.username, 'Set post %s on blog %s visibility to %s' % (postid, blogid, status)).save() + messages.info(request, 'Set post "%s" to %s' % (post.title, status and "hidden" or "visible"), extra_tags="top") + purge_root_and_feeds() + return HttpResponseRedirect("/register/edit/{0}/".format(blogid)) @login_required @transaction.atomic def blogpost_hide(request, blogid, postid): - return __setposthide(request, blogid, postid, True) + return __setposthide(request, blogid, postid, True) @login_required @transaction.atomic def blogpost_unhide(request, blogid, postid): - return __setposthide(request, blogid, postid, False) + return __setposthide(request, blogid, postid, False) @login_required @transaction.atomic def blogpost_delete(request, blogid, postid): - post = __getvalidblogpost(request, blogid, postid) - title = post.title + post = __getvalidblogpost(request, blogid, postid) + title = post.title - # Update the feed last fetched date to be just before this entry, so that we end up - # re-fetching it if necessary. - post.feed.lastget = post.dat - timedelta(minutes=1) - post.feed.save() + # Update the feed last fetched date to be just before this entry, so that we end up + # re-fetching it if necessary. + post.feed.lastget = post.dat - timedelta(minutes=1) + post.feed.save() - # Now actually delete it - post.delete() - AuditEntry(request.user.username, 'Deleted post %s from blog %s' % (postid, blogid)).save() - messages.info(request, 'Deleted post "%s". It will be reloaded on the next scheduled crawl.' % title) - purge_root_and_feeds() - return HttpResponseRedirect("/register/edit/{0}/".format(blogid)) + # Now actually delete it + post.delete() + AuditEntry(request.user.username, 'Deleted post %s from blog %s' % (postid, blogid)).save() + messages.info(request, 'Deleted post "%s". It will be reloaded on the next scheduled crawl.' % title) + purge_root_and_feeds() + return HttpResponseRedirect("/register/edit/{0}/".format(blogid)) # Moderation @login_required @user_passes_test(issuperuser) def moderate(request): - return render(request, 'moderate.html',{ - 'blogs': Blog.objects.filter(approved=False).annotate(oldest=Max('posts__dat')).order_by('oldest'), - 'title': 'Moderation', - }) + return render(request, 'moderate.html',{ + 'blogs': Blog.objects.filter(approved=False).annotate(oldest=Max('posts__dat')).order_by('oldest'), + 'title': 'Moderation', + }) @login_required @user_passes_test(issuperuser) @transaction.atomic def moderate_reject(request, blogid): - blog = get_object_or_404(Blog, id=blogid) - - if request.method == "POST": - form = ModerateRejectForm(data=request.POST) - if form.is_valid(): - # Ok, actually reject this blog. - # Always send moderator mail - send_simple_mail(settings.EMAIL_SENDER, - settings.NOTIFICATION_RECEIVER, - "A blog was rejected on Planet PostgreSQL", - "The blog at {0} by {1} {2}\nwas marked as rejected by {3}. The message given was:\n\n{4}\n\n".format(blog.feedurl, blog.user.first_name, blog.user.last_name, request.user.username, form.cleaned_data['message']), - sendername="Planet PostgreSQL", - receivername="Planet PostgreSQL Moderators", - ) - messages.info(request, "Blog {0} rejected, notification sent to moderators".format(blog.feedurl)) - if not form.cleaned_data['modsonly']: - send_simple_mail(settings.EMAIL_SENDER, - blog.user.email, - "Your blog submission to Planet PostgreSQL", - "The blog at {0} that you submitted to Planet PostgreSQL has\nunfortunately been rejected. The reason given was:\n\n{1}\n\n".format(blog.feedurl, form.cleaned_data['message']), - sendername="Planet PostgreSQL", - receivername = "{0} {1}".format(blog.user.first_name, blog.user.last_name), - ) - messages.info(request, "Blog {0} rejected, notification sent to blog owner".format(blog.feedurl)) - - blog.delete() - return HttpResponseRedirect("/register/moderate/") - else: - form = ModerateRejectForm() - - return render(request, 'moderate_reject.html', { - 'form': form, - 'blog': blog, - 'title': 'Reject blog', - }) + blog = get_object_or_404(Blog, id=blogid) + + if request.method == "POST": + form = ModerateRejectForm(data=request.POST) + if form.is_valid(): + # Ok, actually reject this blog. + # Always send moderator mail + send_simple_mail(settings.EMAIL_SENDER, + settings.NOTIFICATION_RECEIVER, + "A blog was rejected on Planet PostgreSQL", + "The blog at {0} by {1} {2}\nwas marked as rejected by {3}. The message given was:\n\n{4}\n\n".format(blog.feedurl, blog.user.first_name, blog.user.last_name, request.user.username, form.cleaned_data['message']), + sendername="Planet PostgreSQL", + receivername="Planet PostgreSQL Moderators", + ) + messages.info(request, "Blog {0} rejected, notification sent to moderators".format(blog.feedurl)) + if not form.cleaned_data['modsonly']: + send_simple_mail(settings.EMAIL_SENDER, + blog.user.email, + "Your blog submission to Planet PostgreSQL", + "The blog at {0} that you submitted to Planet PostgreSQL has\nunfortunately been rejected. The reason given was:\n\n{1}\n\n".format(blog.feedurl, form.cleaned_data['message']), + sendername="Planet PostgreSQL", + receivername = "{0} {1}".format(blog.user.first_name, blog.user.last_name), + ) + messages.info(request, "Blog {0} rejected, notification sent to blog owner".format(blog.feedurl)) + + blog.delete() + return HttpResponseRedirect("/register/moderate/") + else: + form = ModerateRejectForm() + + return render(request, 'moderate_reject.html', { + 'form': form, + 'blog': blog, + 'title': 'Reject blog', + }) @login_required @user_passes_test(issuperuser) @transaction.atomic def moderate_approve(request, blogid): - blog = get_object_or_404(Blog, id=blogid) + blog = get_object_or_404(Blog, id=blogid) - if blog.approved: - messages.info(request, "Blog {0} was already approved.".format(blog.feedurl)) - return HttpResponseRedirect("/register/moderate/") + if blog.approved: + messages.info(request, "Blog {0} was already approved.".format(blog.feedurl)) + return HttpResponseRedirect("/register/moderate/") - send_simple_mail(settings.EMAIL_SENDER, - settings.NOTIFICATION_RECEIVER, - "A blog was approved on Planet PostgreSQL", - "The blog at {0} by {1} {2}\nwas marked as approved by {3}.\n\n".format(blog.feedurl, blog.user.first_name, blog.user.last_name, request.user.username), - sendername="Planet PostgreSQL", - receivername="Planet PostgreSQL Moderators", - ) + send_simple_mail(settings.EMAIL_SENDER, + settings.NOTIFICATION_RECEIVER, + "A blog was approved on Planet PostgreSQL", + "The blog at {0} by {1} {2}\nwas marked as approved by {3}.\n\n".format(blog.feedurl, blog.user.first_name, blog.user.last_name, request.user.username), + sendername="Planet PostgreSQL", + receivername="Planet PostgreSQL Moderators", + ) - send_simple_mail(settings.EMAIL_SENDER, - blog.user.email, - "Your blog submission to Planet PostgreSQL", - "The blog at {0} that you submitted to Planet PostgreSQL has\nbeen approved.\n\n".format(blog.feedurl), - sendername="Planet PostgreSQL", - receivername = "{0} {1}".format(blog.user.first_name, blog.user.last_name), - ) + send_simple_mail(settings.EMAIL_SENDER, + blog.user.email, + "Your blog submission to Planet PostgreSQL", + "The blog at {0} that you submitted to Planet PostgreSQL has\nbeen approved.\n\n".format(blog.feedurl), + sendername="Planet PostgreSQL", + receivername = "{0} {1}".format(blog.user.first_name, blog.user.last_name), + ) - blog.approved = True - blog.save() + blog.approved = True + blog.save() - AuditEntry(request.user.username, 'Approved blog %s at %s' % (blog.id, blog.feedurl)).save() + AuditEntry(request.user.username, 'Approved blog %s at %s' % (blog.id, blog.feedurl)).save() - messages.info(request, "Blog {0} approved, notification sent to moderators and owner.".format(blog.feedurl)) + messages.info(request, "Blog {0} approved, notification sent to moderators and owner.".format(blog.feedurl)) - purge_root_and_feeds() - purge_url('/feeds.html') + purge_root_and_feeds() + purge_url('/feeds.html') - return HttpResponseRedirect("/register/moderate/") + return HttpResponseRedirect("/register/moderate/") diff --git a/hamnadmin/hamnadmin/settings.py b/hamnadmin/hamnadmin/settings.py index 1511682..e92d5e2 100644 --- a/hamnadmin/hamnadmin/settings.py +++ b/hamnadmin/hamnadmin/settings.py @@ -10,12 +10,12 @@ ADMINS = ( MANAGERS = ADMINS DATABASES={ - 'default': { - 'ENGINE': 'django.db.backends.postgresql_psycopg2', - 'NAME': 'planetbeta', - 'USER': 'admin', - } - } + 'default': { + 'ENGINE': 'django.db.backends.postgresql_psycopg2', + 'NAME': 'planetbeta', + 'USER': 'admin', + } + } TIME_ZONE = 'GMT' LANGUAGE_CODE = 'en-us' @@ -33,7 +33,7 @@ SECRET_KEY = '_q-piuw^kw^v1f%b6nrla+p%=&1bt#z%c$ujhioxe^!z%8q1l0' MIDDLEWARE = ( 'django.middleware.common.CommonMiddleware', 'django.contrib.sessions.middleware.SessionMiddleware', - 'django.middleware.csrf.CsrfViewMiddleware', + 'django.middleware.csrf.CsrfViewMiddleware', 'django.contrib.messages.middleware.MessageMiddleware', 'django.contrib.auth.middleware.AuthenticationMiddleware', ) @@ -41,19 +41,19 @@ MIDDLEWARE = ( ROOT_URLCONF = 'hamnadmin.urls' TEMPLATES = [{ - 'BACKEND': 'django.template.backends.django.DjangoTemplates', - 'DIRS': [os.path.join(os.path.dirname(__file__), '../../template')], - 'OPTIONS': { - 'context_processors': [ - 'django.template.context_processors.request', - 'django.contrib.auth.context_processors.auth', - 'django.contrib.messages.context_processors.messages', - ], - 'loaders': [ - 'django.template.loaders.filesystem.Loader', - 'django.template.loaders.app_directories.Loader', - ], - }, + 'BACKEND': 'django.template.backends.django.DjangoTemplates', + 'DIRS': [os.path.join(os.path.dirname(__file__), '../../template')], + 'OPTIONS': { + 'context_processors': [ + 'django.template.context_processors.request', + 'django.contrib.auth.context_processors.auth', + 'django.contrib.messages.context_processors.messages', + ], + 'loaders': [ + 'django.template.loaders.filesystem.Loader', + 'django.template.loaders.app_directories.Loader', + ], + }, }] INSTALLED_APPS = ( @@ -86,22 +86,22 @@ MAX_SAFE_ENTRIES_PER_FETCH=4 # Dynamically load settings from the "outer" planet.ini that might # be needed. try: - import configparser - _configparser = configparser.ConfigParser() - _configparser.read(os.path.join(os.path.abspath(os.path.dirname(__file__)), '../../planet.ini')) - TWITTER_CLIENT=_configparser.get('twitter', 'consumer') - TWITTER_CLIENTSECRET=_configparser.get('twitter', 'consumersecret') - TWITTER_TOKEN=_configparser.get('twitter', 'token') - TWITTER_TOKENSECRET=_configparser.get('twitter', 'secret') + import configparser + _configparser = configparser.ConfigParser() + _configparser.read(os.path.join(os.path.abspath(os.path.dirname(__file__)), '../../planet.ini')) + TWITTER_CLIENT=_configparser.get('twitter', 'consumer') + TWITTER_CLIENTSECRET=_configparser.get('twitter', 'consumersecret') + TWITTER_TOKEN=_configparser.get('twitter', 'token') + TWITTER_TOKENSECRET=_configparser.get('twitter', 'secret') except: - TWITTER_CLIENT=None - TWITTER_CLIENTSECRET=None - TWITTER_TOKEN=None - TWITTER_TOKENSECRET=None + TWITTER_CLIENT=None + TWITTER_CLIENTSECRET=None + TWITTER_TOKEN=None + TWITTER_TOKENSECRET=None # If there is a local_settings.py, let it override our settings try: - from .local_settings import * + from .local_settings import * except: - pass + pass diff --git a/hamnadmin/hamnadmin/util/aggregate.py b/hamnadmin/hamnadmin/util/aggregate.py index c3924b3..e506e7a 100644 --- a/hamnadmin/hamnadmin/util/aggregate.py +++ b/hamnadmin/hamnadmin/util/aggregate.py @@ -8,144 +8,144 @@ from vendor.feedparser import feedparser from hamnadmin.register.models import Post class ParserGotRedirect(Exception): - def __init__(self, url): - self.url = url - super(Exception, self).__init__() + def __init__(self, url): + self.url = url + super(Exception, self).__init__() class FeedFetcher(object): - def __init__(self, feed, tracefunc=None, update=True): - self.feed = feed - self.tracefunc = tracefunc - self.update = update - self.newest_entry_date = None - - def _trace(self, msg): - if self.tracefunc: - self.tracefunc(msg) - - def parse(self, fetchsince=None): - # If we can't get a socket connection to complete in 10 seconds, - # give up on that feed. - socket.setdefaulttimeout(10) - - if fetchsince: - parser = feedparser.parse(self.feed.feedurl, modified=fetchsince.timetuple()) - else: - parser = feedparser.parse(self.feed.feedurl) - - if not hasattr(parser, 'status'): - # bozo_excpetion can seemingly be set when there is no error as well, - # so make sure we only check if we didn't get a status. - if hasattr(parser, 'bozo_exception'): - raise Exception('Feed load error %s' % parser.bozo_exception) - raise Exception('Feed load error with no exception!') - - if parser.status == 304: - # Not modified - return - - if parser.status == 301 and hasattr(parser, 'href'): - # Permanent redirect. Bubble this up with an exception and let the caller - # handle it. - raise ParserGotRedirect(parser.href) - - if parser.status != 200: - raise Exception('Feed returned status %s' % parser.status) - - self._trace("Fetched %s, status %s" % (self.feed.feedurl, parser.status)) - - try: - if self.feed.blogurl == '': - self.feed.blogurl = parser.feed.link - elif self.feed.blogurl != parser.feed.link: - self.feed.new_blogurl = parser.feed.link - except: - pass - - for entry in parser.entries: - if not self.matches_filter(entry): - self._trace("Entry %s does not match filter, skipped" % entry.link) - continue - - # Grab the entry. At least atom feeds from wordpress store what we - # want in entry.content[0].value and *also* has a summary that's - # much shorter. - # We therefor check all available texts, and just pick the one that - # is longest. - txtalts = [] - try: - txtalts.append(entry.content[0].value) - except: - pass - if 'summary' in entry: - txtalts.append(entry.summary) - - # Select the longest text - txt = max(txtalts, key=len) - if txt == '': - self._trace("Entry %s has no contents" % entry.link) - continue - - dat = None - if hasattr(entry, 'published_parsed'): - dat = datetime.datetime(*(entry.published_parsed[0:6])) - elif hasattr(entry, 'updated_parsed'): - dat = datetime.datetime(*(entry.updated_parsed[0:6])) - else: - self._trace("Failed to get date for entry %s (keys %s)" % (entry.link, list(entry.keys()))) - continue - - if dat > datetime.datetime.now(): - dat = datetime.datetime.now() - - if self.newest_entry_date: - if dat > self.newest_entry_date: - self.newest_entry_date = dat - else: - self.newest_entry_date = dat - - yield Post(feed=self.feed, - guid=entry.id, - link=entry.link, - txt=txt, - dat=dat, - title=entry.title, - ) - - - # Check if we got back a Last-Modified time - if hasattr(parser, 'modified_parsed') and parser['modified_parsed']: - # Last-Modified header retreived. If we did receive it, we will - # trust the content (assuming we can parse it) - d = datetime.datetime(*parser['modified_parsed'][:6]) - if (d-datetime.datetime.now()).days > 5: - # Except if it's ridiculously long in the future, we'll set it - # to right now instead, to deal with buggy blog software. We - # currently define rediculously long as 5 days - d = datetime.datetime.now() - - if self.update: - self.feed.lastget = d - self.feed.save() - else: - # We didn't get a Last-Modified time, so set it to the entry date - # for the latest entry in this feed. - if self.newest_entry_date and self.update: - self.feed.lastget = self.newest_entry_date - self.feed.save() - - def matches_filter(self, entry): - # For now, we only match against self.feed.authorfilter. In the future, - # there may be more filters. - if self.feed.authorfilter: - # Match against an author filter - - if 'author_detail' in entry: - return entry.author_detail.name == self.feed.authorfilter - elif 'author' in entry: - return entry.author == self.feed.authorfilter - else: - return False - - # No filters, always return true - return True + def __init__(self, feed, tracefunc=None, update=True): + self.feed = feed + self.tracefunc = tracefunc + self.update = update + self.newest_entry_date = None + + def _trace(self, msg): + if self.tracefunc: + self.tracefunc(msg) + + def parse(self, fetchsince=None): + # If we can't get a socket connection to complete in 10 seconds, + # give up on that feed. + socket.setdefaulttimeout(10) + + if fetchsince: + parser = feedparser.parse(self.feed.feedurl, modified=fetchsince.timetuple()) + else: + parser = feedparser.parse(self.feed.feedurl) + + if not hasattr(parser, 'status'): + # bozo_excpetion can seemingly be set when there is no error as well, + # so make sure we only check if we didn't get a status. + if hasattr(parser, 'bozo_exception'): + raise Exception('Feed load error %s' % parser.bozo_exception) + raise Exception('Feed load error with no exception!') + + if parser.status == 304: + # Not modified + return + + if parser.status == 301 and hasattr(parser, 'href'): + # Permanent redirect. Bubble this up with an exception and let the caller + # handle it. + raise ParserGotRedirect(parser.href) + + if parser.status != 200: + raise Exception('Feed returned status %s' % parser.status) + + self._trace("Fetched %s, status %s" % (self.feed.feedurl, parser.status)) + + try: + if self.feed.blogurl == '': + self.feed.blogurl = parser.feed.link + elif self.feed.blogurl != parser.feed.link: + self.feed.new_blogurl = parser.feed.link + except: + pass + + for entry in parser.entries: + if not self.matches_filter(entry): + self._trace("Entry %s does not match filter, skipped" % entry.link) + continue + + # Grab the entry. At least atom feeds from wordpress store what we + # want in entry.content[0].value and *also* has a summary that's + # much shorter. + # We therefor check all available texts, and just pick the one that + # is longest. + txtalts = [] + try: + txtalts.append(entry.content[0].value) + except: + pass + if 'summary' in entry: + txtalts.append(entry.summary) + + # Select the longest text + txt = max(txtalts, key=len) + if txt == '': + self._trace("Entry %s has no contents" % entry.link) + continue + + dat = None + if hasattr(entry, 'published_parsed'): + dat = datetime.datetime(*(entry.published_parsed[0:6])) + elif hasattr(entry, 'updated_parsed'): + dat = datetime.datetime(*(entry.updated_parsed[0:6])) + else: + self._trace("Failed to get date for entry %s (keys %s)" % (entry.link, list(entry.keys()))) + continue + + if dat > datetime.datetime.now(): + dat = datetime.datetime.now() + + if self.newest_entry_date: + if dat > self.newest_entry_date: + self.newest_entry_date = dat + else: + self.newest_entry_date = dat + + yield Post(feed=self.feed, + guid=entry.id, + link=entry.link, + txt=txt, + dat=dat, + title=entry.title, + ) + + + # Check if we got back a Last-Modified time + if hasattr(parser, 'modified_parsed') and parser['modified_parsed']: + # Last-Modified header retreived. If we did receive it, we will + # trust the content (assuming we can parse it) + d = datetime.datetime(*parser['modified_parsed'][:6]) + if (d-datetime.datetime.now()).days > 5: + # Except if it's ridiculously long in the future, we'll set it + # to right now instead, to deal with buggy blog software. We + # currently define rediculously long as 5 days + d = datetime.datetime.now() + + if self.update: + self.feed.lastget = d + self.feed.save() + else: + # We didn't get a Last-Modified time, so set it to the entry date + # for the latest entry in this feed. + if self.newest_entry_date and self.update: + self.feed.lastget = self.newest_entry_date + self.feed.save() + + def matches_filter(self, entry): + # For now, we only match against self.feed.authorfilter. In the future, + # there may be more filters. + if self.feed.authorfilter: + # Match against an author filter + + if 'author_detail' in entry: + return entry.author_detail.name == self.feed.authorfilter + elif 'author' in entry: + return entry.author == self.feed.authorfilter + else: + return False + + # No filters, always return true + return True diff --git a/hamnadmin/hamnadmin/util/html.py b/hamnadmin/hamnadmin/util/html.py index 8416b5e..8401ebf 100644 --- a/hamnadmin/hamnadmin/util/html.py +++ b/hamnadmin/hamnadmin/util/html.py @@ -3,101 +3,101 @@ import tidylib import urllib.parse def TruncateAndClean(txt): - # First apply Tidy - (txt, errors) = tidylib.tidy_document(txt, - options={ - 'drop_proprietary_attributes': 1, - 'alt_text': '', - 'hide_comments': 1, - 'output_xhtml': 1, - 'show_body_only': 1, - 'clean': 1, - 'char_encoding': 'utf8', - 'show-warnings': 0, - 'show-info': 0, - }) + # First apply Tidy + (txt, errors) = tidylib.tidy_document(txt, + options={ + 'drop_proprietary_attributes': 1, + 'alt_text': '', + 'hide_comments': 1, + 'output_xhtml': 1, + 'show_body_only': 1, + 'clean': 1, + 'char_encoding': 'utf8', + 'show-warnings': 0, + 'show-info': 0, + }) - if errors: - raise Exception("Tidy failed: %s" % errors) + if errors: + raise Exception("Tidy failed: %s" % errors) - # Then truncate as necessary - ht = HtmlTruncator(2048) - ht.feed(txt) - out = ht.GetText() + # Then truncate as necessary + ht = HtmlTruncator(2048) + ht.feed(txt) + out = ht.GetText() - # Remove initial <br /> tags - while out.startswith('<br'): - out = out[out.find('>')+1:] + # Remove initial <br /> tags + while out.startswith('<br'): + out = out[out.find('>')+1:] - return out + return out class HtmlTruncator(HTMLParser): - def __init__(self, maxlen): - HTMLParser.__init__(self) - self.len = 0 - self.maxlen = maxlen - self.fulltxt = '' - self.trunctxt = '' - self.tagstack = [] - self.skiprest = False - - def feed(self, txt): - txt = txt.lstrip() - self.fulltxt += txt - HTMLParser.feed(self, txt) + def __init__(self, maxlen): + HTMLParser.__init__(self) + self.len = 0 + self.maxlen = maxlen + self.fulltxt = '' + self.trunctxt = '' + self.tagstack = [] + self.skiprest = False + + def feed(self, txt): + txt = txt.lstrip() + self.fulltxt += txt + HTMLParser.feed(self, txt) - def handle_startendtag(self, tag, attrs): - if self.skiprest: return - self.trunctxt += self.get_starttag_text() - - def quoteurl(self, str): - p = str.split(":",2) - if len(p) < 2: - # Don't crash on invalid URLs - return "" - return p[0] + ":" + urllib.parse.quote(p[1]) + def handle_startendtag(self, tag, attrs): + if self.skiprest: return + self.trunctxt += self.get_starttag_text() + + def quoteurl(self, str): + p = str.split(":",2) + if len(p) < 2: + # Don't crash on invalid URLs + return "" + return p[0] + ":" + urllib.parse.quote(p[1]) - def cleanhref(self, attrs): - if attrs[0] == 'href': - return 'href', self.quoteurl(attrs[1]) - return attrs + def cleanhref(self, attrs): + if attrs[0] == 'href': + return 'href', self.quoteurl(attrs[1]) + return attrs - def handle_starttag(self, tag, attrs): - if self.skiprest: return - self.trunctxt += "<" + tag - self.trunctxt += (' '.join([(' %s="%s"' % (k,v)) for k,v in map(self.cleanhref, attrs)])) - self.trunctxt += ">" - self.tagstack.append(tag) + def handle_starttag(self, tag, attrs): + if self.skiprest: return + self.trunctxt += "<" + tag + self.trunctxt += (' '.join([(' %s="%s"' % (k,v)) for k,v in map(self.cleanhref, attrs)])) + self.trunctxt += ">" + self.tagstack.append(tag) - def handle_endtag(self, tag): - if self.skiprest: return - self.trunctxt += "</" + tag + ">" - self.tagstack.pop() + def handle_endtag(self, tag): + if self.skiprest: return + self.trunctxt += "</" + tag + ">" + self.tagstack.pop() - def handle_entityref(self, ref): - self.len += 1 - if self.skiprest: return - self.trunctxt += "&" + ref + ";" + def handle_entityref(self, ref): + self.len += 1 + if self.skiprest: return + self.trunctxt += "&" + ref + ";" - def handle_data(self, data): - self.len += len(data) - if self.skiprest: return - self.trunctxt += data - if self.len > self.maxlen: - # Passed max length, so truncate text as close to the limit as possible - self.trunctxt = self.trunctxt[0:len(self.trunctxt)-(self.len-self.maxlen)] + def handle_data(self, data): + self.len += len(data) + if self.skiprest: return + self.trunctxt += data + if self.len > self.maxlen: + # Passed max length, so truncate text as close to the limit as possible + self.trunctxt = self.trunctxt[0:len(self.trunctxt)-(self.len-self.maxlen)] - # Now append any tags that weren't properly closed - self.tagstack.reverse() - for tag in self.tagstack: - self.trunctxt += "</" + tag + ">" - self.skiprest = True + # Now append any tags that weren't properly closed + self.tagstack.reverse() + for tag in self.tagstack: + self.trunctxt += "</" + tag + ">" + self.skiprest = True - # Finally, append the continuation chars - self.trunctxt += "[...]" + # Finally, append the continuation chars + self.trunctxt += "[...]" - def GetText(self): - if self.len > self.maxlen: - return self.trunctxt - else: - return self.fulltxt + def GetText(self): + if self.len > self.maxlen: + return self.trunctxt + else: + return self.fulltxt diff --git a/hamnadmin/hamnadmin/util/shortlink.py b/hamnadmin/hamnadmin/util/shortlink.py index 72ea0c7..9e79a9b 100644 --- a/hamnadmin/hamnadmin/util/shortlink.py +++ b/hamnadmin/hamnadmin/util/shortlink.py @@ -1,7 +1,7 @@ # Simple map used to shorten id values to URLs urlvalmap = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', - 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', - 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', - 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', - 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', - 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '-', '_'] + 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', + 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', + 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', + 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', + 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '-', '_'] diff --git a/hamnadmin/hamnadmin/util/varnish.py b/hamnadmin/hamnadmin/util/varnish.py index 95bbaa2..869a210 100644 --- a/hamnadmin/hamnadmin/util/varnish.py +++ b/hamnadmin/hamnadmin/util/varnish.py @@ -3,17 +3,17 @@ from django.conf import settings import requests def purge_url(url): - if not settings.VARNISH_URL: - print("Not purging {0}".format(url)) - else: - try: - r = requests.get(settings.VARNISH_URL, headers={ - 'X-Purge': '^' + url, - }) - if r.status_code != 200: - raise Exception("Invalid response code %s" % r.status_code) - except Exception as e: - raise Exception("Failed to purge '{0}': {1}'".format(url, e)) + if not settings.VARNISH_URL: + print("Not purging {0}".format(url)) + else: + try: + r = requests.get(settings.VARNISH_URL, headers={ + 'X-Purge': '^' + url, + }) + if r.status_code != 200: + raise Exception("Invalid response code %s" % r.status_code) + except Exception as e: + raise Exception("Failed to purge '{0}': {1}'".format(url, e)) def purge_root_and_feeds(): - purge_url('/(|rss20.*)$') + purge_url('/(|rss20.*)$') diff --git a/hamnadmin/vendor/feedparser/feedparser.py b/hamnadmin/vendor/feedparser/feedparser.py index 794c1dc..07ed0c8 100644 --- a/hamnadmin/vendor/feedparser/feedparser.py +++ b/hamnadmin/vendor/feedparser/feedparser.py @@ -1954,7 +1954,7 @@ class _FeedParserMixin: def _start_psc_chapters(self, attrsD): if self.psc_chapters_flag is None: - # Transition from None -> True + # Transition from None -> True self.psc_chapters_flag = True attrsD['chapters'] = [] self._getContext()['psc_chapters'] = FeedParserDict(attrsD) diff --git a/listsync.py b/listsync.py index 94d0fcf..9341dc8 100755 --- a/listsync.py +++ b/listsync.py @@ -14,29 +14,29 @@ import requests if __name__=="__main__": - c = configparser.ConfigParser() - c.read('planet.ini') + c = configparser.ConfigParser() + c.read('planet.ini') - conn = psycopg2.connect(c.get('planet', 'db')) - curs = conn.cursor() - curs.execute(""" + conn = psycopg2.connect(c.get('planet', 'db')) + curs = conn.cursor() + curs.execute(""" SELECT DISTINCT email FROM auth_user INNER JOIN feeds ON auth_user.id=feeds.user_id WHERE feeds.approved AND NOT feeds.archived """) - syncstruct = [{'email': r[0]} for r in curs.fetchall()] - - r = requests.put('{0}/api/subscribers/{1}/'.format(c.get('list', 'server'), c.get('list', 'listname')), - headers={'X-api-key': c.get('list', 'apikey')}, - json=syncstruct, - ) - if r.status_code != 200: - print("Failed to talk to pglister api: %s" % r.status_code) - print(r.text) - sys.exit(1) - - j = r.json() - for a in j['added']: - print("Added subscriber %s" % a) - for a in j['deleted']: - print("Removed subscriber %s" % a) + syncstruct = [{'email': r[0]} for r in curs.fetchall()] + + r = requests.put('{0}/api/subscribers/{1}/'.format(c.get('list', 'server'), c.get('list', 'listname')), + headers={'X-api-key': c.get('list', 'apikey')}, + json=syncstruct, + ) + if r.status_code != 200: + print("Failed to talk to pglister api: %s" % r.status_code) + print(r.text) + sys.exit(1) + + j = r.json() + for a in j['added']: + print("Added subscriber %s" % a) + for a in j['deleted']: + print("Removed subscriber %s" % a) diff --git a/posttotwitter.py b/posttotwitter.py index 8154c56..18461a2 100755 --- a/posttotwitter.py +++ b/posttotwitter.py @@ -21,99 +21,99 @@ from twitterclient import TwitterClient _urlvalmap = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '-', '_'] class PostToTwitter(TwitterClient): - def __init__(self, cfg): - TwitterClient.__init__(self, cfg) - - psycopg2.extensions.register_type(psycopg2.extensions.UNICODE) - self.db = psycopg2.connect(c.get('planet','db')) - - - def do_post(self, msg): - """ - Actually make a post to twitter! - """ - r = self.tw.post('{0}statuses/update.json'.format(self.twitter_api), data={ - 'status': msg, - }) - if r.status_code != 200: - raise Exception("Could not post to twitter, status code {0}".format(r.status_code)) - - def Run(self): - c = self.db.cursor() - c.execute("""SELECT posts.id, posts.title, posts.link, posts.shortlink, feeds.name, feeds.twitteruser - FROM posts INNER JOIN feeds ON posts.feed=feeds.id - WHERE approved AND age(dat) < '7 days' AND NOT (twittered OR hidden) ORDER BY dat""") - for post in c.fetchall(): - if post[3] and len(post[3])>1: - short = post[3] - else: - # No short-link exists, so create one. We need the short-link - # to twitter, and we store it separately in the database - # in case it's needed. - try: - short = self.shortid(post[0]) - except Exception as e: - print("Failed to shorten URL %s: %s" % (post[2], e)) - continue - - c.execute("UPDATE posts SET shortlink=%(short)s WHERE id=%(id)s", { - 'short': short, - 'id': post[0], - }) - self.db.commit() - - # Set up the string to twitter - if post[5] and len(post[5])>1: - # Twitter username registered - msg = "%s (@%s): %s %s" % ( - post[4], - post[5], - self.trimpost(post[1],len(post[4])+len(post[5])+len(short)+7), - short, - ) - else: - msg = "%s: %s %s" % ( - post[4], - self.trimpost(post[1],len(post[4])+len(short)+3), - short, - ) - - # Now post it to twitter - try: - self.do_post(msg) - except Exception as e: - print("Error posting to twitter (post %s): %s" % (post[0], e)) - # We'll just try again with the next one - continue - - # Flag this item as posted - c.execute("UPDATE posts SET twittered='t' WHERE id=%(id)s", { 'id': post[0] }) - self.db.commit() - - print("Twittered: %s" % msg) - - - # Trim a post to the length required by twitter, so we don't fail to post - # if a title is really long. Assume other parts of the string to be - # posted are <otherlen> characters. - def trimpost(self, txt, otherlen): - if len(txt) + otherlen < 140: - return txt - return "%s..." % (txt[:(140-otherlen-3)]) - - - # Trim an URL using https://postgr.es - def shortid(self, id): - s = "" - while id > 0: - s = _urlvalmap[id % 64] + s - id /= 64 - return "https://postgr.es/p/%s" % s + def __init__(self, cfg): + TwitterClient.__init__(self, cfg) + + psycopg2.extensions.register_type(psycopg2.extensions.UNICODE) + self.db = psycopg2.connect(c.get('planet','db')) + + + def do_post(self, msg): + """ + Actually make a post to twitter! + """ + r = self.tw.post('{0}statuses/update.json'.format(self.twitter_api), data={ + 'status': msg, + }) + if r.status_code != 200: + raise Exception("Could not post to twitter, status code {0}".format(r.status_code)) + + def Run(self): + c = self.db.cursor() + c.execute("""SELECT posts.id, posts.title, posts.link, posts.shortlink, feeds.name, feeds.twitteruser + FROM posts INNER JOIN feeds ON posts.feed=feeds.id + WHERE approved AND age(dat) < '7 days' AND NOT (twittered OR hidden) ORDER BY dat""") + for post in c.fetchall(): + if post[3] and len(post[3])>1: + short = post[3] + else: + # No short-link exists, so create one. We need the short-link + # to twitter, and we store it separately in the database + # in case it's needed. + try: + short = self.shortid(post[0]) + except Exception as e: + print("Failed to shorten URL %s: %s" % (post[2], e)) + continue + + c.execute("UPDATE posts SET shortlink=%(short)s WHERE id=%(id)s", { + 'short': short, + 'id': post[0], + }) + self.db.commit() + + # Set up the string to twitter + if post[5] and len(post[5])>1: + # Twitter username registered + msg = "%s (@%s): %s %s" % ( + post[4], + post[5], + self.trimpost(post[1],len(post[4])+len(post[5])+len(short)+7), + short, + ) + else: + msg = "%s: %s %s" % ( + post[4], + self.trimpost(post[1],len(post[4])+len(short)+3), + short, + ) + + # Now post it to twitter + try: + self.do_post(msg) + except Exception as e: + print("Error posting to twitter (post %s): %s" % (post[0], e)) + # We'll just try again with the next one + continue + + # Flag this item as posted + c.execute("UPDATE posts SET twittered='t' WHERE id=%(id)s", { 'id': post[0] }) + self.db.commit() + + print("Twittered: %s" % msg) + + + # Trim a post to the length required by twitter, so we don't fail to post + # if a title is really long. Assume other parts of the string to be + # posted are <otherlen> characters. + def trimpost(self, txt, otherlen): + if len(txt) + otherlen < 140: + return txt + return "%s..." % (txt[:(140-otherlen-3)]) + + + # Trim an URL using https://postgr.es + def shortid(self, id): + s = "" + while id > 0: + s = _urlvalmap[id % 64] + s + id /= 64 + return "https://postgr.es/p/%s" % s if __name__=="__main__": - c = configparser.ConfigParser() - c.read('planet.ini') - PostToTwitter(c).Run() + c = configparser.ConfigParser() + c.read('planet.ini') + PostToTwitter(c).Run() diff --git a/redirector/redirector.py b/redirector/redirector.py index cdb2e71..4f07345 100755 --- a/redirector/redirector.py +++ b/redirector/redirector.py @@ -18,63 +18,63 @@ _urlvalmap = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', ' connstr = "" def iddecode(idstr): - idval = 0 - for c in idstr: - idval *= 64 - idval += _urlvalmap.index(c) - return idval + idval = 0 + for c in idstr: + idval *= 64 + idval += _urlvalmap.index(c) + return idval def application(environ, start_response): - try: - # If we have a querystring, get rid of it. This can (presumably) - # happen with some click-tracking systems. - if '?' in environ['REQUEST_URI']: - uri = environ['REQUEST_URI'].split('?')[0] - else: - uri = environ['REQUEST_URI'] - - # Start by getting the id from the request - id = iddecode(uri.split('/')[-1]) - - # Let's figure out where this URL should be - - # Since we cache heavily with varnish in front of this, we don't - # bother with any connection pooling. - conn = psycopg2.connect(connstr) - c = conn.cursor() - c.execute("SELECT link FROM posts WHERE id=%(id)s", { - 'id': id - }) - r = c.fetchall() - - conn.close() - - if len(r) != 1: - start_response('404 Not Found', [ - ('Content-type', 'text/plain'), - ]) - return [b"Link not found\n"] - - # We have a link, return a redirect to it - start_response('301 Moved Permanently', [ - ('Content-type', 'text/html'), - ('Location', r[0][0]), - ('X-Planet', str(id)) - ]) - return [ - b"<html>\n<head>\n<title>postgr.es</title>\n</head>\n<body>\n", - b"<a href=\"%s\">moved here</a>\n" % r[0][0].encode('utf8'), - b"</body>\n</html>\n" - ] - except Exception as ex: - start_response('500 Internal Server Error', [ - ('Content-type', 'text/plain') - ]) - - return [ - "An internal server error occured\n", - str(ex) - ] + try: + # If we have a querystring, get rid of it. This can (presumably) + # happen with some click-tracking systems. + if '?' in environ['REQUEST_URI']: + uri = environ['REQUEST_URI'].split('?')[0] + else: + uri = environ['REQUEST_URI'] + + # Start by getting the id from the request + id = iddecode(uri.split('/')[-1]) + + # Let's figure out where this URL should be + + # Since we cache heavily with varnish in front of this, we don't + # bother with any connection pooling. + conn = psycopg2.connect(connstr) + c = conn.cursor() + c.execute("SELECT link FROM posts WHERE id=%(id)s", { + 'id': id + }) + r = c.fetchall() + + conn.close() + + if len(r) != 1: + start_response('404 Not Found', [ + ('Content-type', 'text/plain'), + ]) + return [b"Link not found\n"] + + # We have a link, return a redirect to it + start_response('301 Moved Permanently', [ + ('Content-type', 'text/html'), + ('Location', r[0][0]), + ('X-Planet', str(id)) + ]) + return [ + b"<html>\n<head>\n<title>postgr.es</title>\n</head>\n<body>\n", + b"<a href=\"%s\">moved here</a>\n" % r[0][0].encode('utf8'), + b"</body>\n</html>\n" + ] + except Exception as ex: + start_response('500 Internal Server Error', [ + ('Content-type', 'text/plain') + ]) + + return [ + "An internal server error occured\n", + str(ex) + ] c = configparser.ConfigParser() diff --git a/setuptwitter.py b/setuptwitter.py index 9540949..562d1be 100755 --- a/setuptwitter.py +++ b/setuptwitter.py @@ -17,10 +17,10 @@ cfg = configparser.ConfigParser() cfg.read('planet.ini') if not cfg.has_option('twitter', 'consumer') or not cfg.has_option('twitter', 'consumersecret'): - print("Before you can run this, you need to register an application at") - print("developer.twitter.com and put the consumer and consumersecret values") - print("in the [twitter] section of planet.ini.") - sys.exit(1) + print("Before you can run this, you need to register an application at") + print("developer.twitter.com and put the consumer and consumersecret values") + print("in the [twitter] section of planet.ini.") + sys.exit(1) oauth = requests_oauthlib.OAuth1Session(cfg.get('twitter', 'consumer'), cfg.get('twitter', 'consumersecret')) fetch_response = oauth.fetch_request_token('https://api.twitter.com/oauth/request_token') @@ -30,10 +30,10 @@ print("Please go to {0} and log in".format(auth_url)) pin = input('Enter the PIN received here:') oauth2 = requests_oauthlib.OAuth1Session(cfg.get('twitter', 'consumer'), - cfg.get('twitter', 'consumersecret'), - fetch_response.get('oauth_token'), - fetch_response.get('oauth_token_secret'), - verifier=pin) + cfg.get('twitter', 'consumersecret'), + fetch_response.get('oauth_token'), + fetch_response.get('oauth_token_secret'), + verifier=pin) tokens = oauth2.fetch_access_token('https://api.twitter.com/oauth/access_token') diff --git a/synctwitter.py b/synctwitter.py index 697e0a1..f5472ec 100755 --- a/synctwitter.py +++ b/synctwitter.py @@ -14,42 +14,42 @@ import configparser from twitterclient import TwitterClient class SyncTwitter(TwitterClient): - def __init__(self, cfg): - TwitterClient.__init__(self, cfg) - - psycopg2.extensions.register_type(psycopg2.extensions.UNICODE) - self.db = psycopg2.connect(cfg.get('planet','db')) - - def Run(self): - # Get list of handles that should be on the list - curs = self.db.cursor() - curs.execute("SELECT DISTINCT lower(twitteruser) FROM feeds WHERE approved AND NOT (twitteruser IS NULL OR twitteruser='') ORDER BY lower(twitteruser)"); - expected = set([r[0].replace('@','') for r in curs.fetchall()]) - - # Get list of current screen names the list is following - current = set(self.list_subscribers()) - - # Start by deleting, then adding the new ones - for s in current.difference(expected): - # We don't care about the return code and just keep running if it - # fails, since we will try again later. - self.remove_subscriber(s) - for s in expected.difference(current): - # If we fail to add a subscriber, stop trying - if not self.add_subscriber(s): - # Most likely it's things like it doesn't exist or we don't have permissions - # to follow it. - print("Failed to add twitter subscriber {0}, removing from feed record".format(s)) - - # To be on the safe side, store the old twitter username. In case the twitter APIs - # go bonkers on us and we end up removing too much. - curs.execute("UPDATE feeds SET oldtwitteruser=twitteruser, twitteruser='' WHERE lower(twitteruser)=%(twitter)s", { - 'twitter': s, - }) - self.db.commit() + def __init__(self, cfg): + TwitterClient.__init__(self, cfg) + + psycopg2.extensions.register_type(psycopg2.extensions.UNICODE) + self.db = psycopg2.connect(cfg.get('planet','db')) + + def Run(self): + # Get list of handles that should be on the list + curs = self.db.cursor() + curs.execute("SELECT DISTINCT lower(twitteruser) FROM feeds WHERE approved AND NOT (twitteruser IS NULL OR twitteruser='') ORDER BY lower(twitteruser)"); + expected = set([r[0].replace('@','') for r in curs.fetchall()]) + + # Get list of current screen names the list is following + current = set(self.list_subscribers()) + + # Start by deleting, then adding the new ones + for s in current.difference(expected): + # We don't care about the return code and just keep running if it + # fails, since we will try again later. + self.remove_subscriber(s) + for s in expected.difference(current): + # If we fail to add a subscriber, stop trying + if not self.add_subscriber(s): + # Most likely it's things like it doesn't exist or we don't have permissions + # to follow it. + print("Failed to add twitter subscriber {0}, removing from feed record".format(s)) + + # To be on the safe side, store the old twitter username. In case the twitter APIs + # go bonkers on us and we end up removing too much. + curs.execute("UPDATE feeds SET oldtwitteruser=twitteruser, twitteruser='' WHERE lower(twitteruser)=%(twitter)s", { + 'twitter': s, + }) + self.db.commit() if __name__=="__main__": - c = configparser.ConfigParser() - c.read('planet.ini') - SyncTwitter(c).Run() + c = configparser.ConfigParser() + c.read('planet.ini') + SyncTwitter(c).Run() diff --git a/twitterclient.py b/twitterclient.py index f303c17..0b367c0 100644 --- a/twitterclient.py +++ b/twitterclient.py @@ -10,76 +10,76 @@ Copyright (C) 2009-2019 PostgreSQL Global Development Group import requests_oauthlib class TwitterClient(object): - """ - Base class representing a twitter client, implementing all those twitter - API calls that are in use. - Does not attempt to be a complete twitter client, just to fill the needs - for the planet software. - """ + """ + Base class representing a twitter client, implementing all those twitter + API calls that are in use. + Does not attempt to be a complete twitter client, just to fill the needs + for the planet software. + """ - def __init__(self, cfg): - """ - Initialize the instance. The parameter cfg is a ConfigParser object - that has loaded the planet.ini file. - """ - self.twittername = cfg.get('twitter', 'account') - self.twitterlist = cfg.get('twitter', 'listname') + def __init__(self, cfg): + """ + Initialize the instance. The parameter cfg is a ConfigParser object + that has loaded the planet.ini file. + """ + self.twittername = cfg.get('twitter', 'account') + self.twitterlist = cfg.get('twitter', 'listname') - self.tw = requests_oauthlib.OAuth1Session(cfg.get('twitter', 'consumer'), - cfg.get('twitter', 'consumersecret'), - cfg.get('twitter', 'token'), - cfg.get('twitter', 'secret')) + self.tw = requests_oauthlib.OAuth1Session(cfg.get('twitter', 'consumer'), + cfg.get('twitter', 'consumersecret'), + cfg.get('twitter', 'token'), + cfg.get('twitter', 'secret')) - self.twitter_api = 'https://api.twitter.com/1.1/' + self.twitter_api = 'https://api.twitter.com/1.1/' - def list_subscribers(self): - # Eek. It seems subscribers are paged even if we don't ask for it - # Thus, we need to loop with multiple requests - cursor=-1 - handles = [] - while cursor != 0: - response = self.tw.get('{0}lists/members.json'.format(self.twitter_api), params={ - 'owner_screen_name': self.twittername, - 'slug': self.twitterlist, - 'cursor': cursor, - }) - if response.status_code != 200: - print(response.json()) - raise Exception("Received status {0} when listing users".format(response.status_code)) - j = response.json() - handles.extend([x['screen_name'].lower() for x in j['users']]) - cursor = j['next_cursor'] + def list_subscribers(self): + # Eek. It seems subscribers are paged even if we don't ask for it + # Thus, we need to loop with multiple requests + cursor=-1 + handles = [] + while cursor != 0: + response = self.tw.get('{0}lists/members.json'.format(self.twitter_api), params={ + 'owner_screen_name': self.twittername, + 'slug': self.twitterlist, + 'cursor': cursor, + }) + if response.status_code != 200: + print(response.json()) + raise Exception("Received status {0} when listing users".format(response.status_code)) + j = response.json() + handles.extend([x['screen_name'].lower() for x in j['users']]) + cursor = j['next_cursor'] - return handles + return handles - def remove_subscriber(self, name): - print("Removing twitter user %s from list." % name) - r = self.tw.post('{0}lists/members/destroy.json'.format(self.twitter_api), data={ - 'owner_screen_name': self.twittername, - 'slug': self.twitterlist, - 'screen_name': name, - }) - if r.status_code != 200: - try: - err = r.json()['errors'][0]['message'] - except: - err = 'Response does not contain error messages with json' - print("Failed to remove subscriber {0}: {1}".format(name, err)) - return False - return True + def remove_subscriber(self, name): + print("Removing twitter user %s from list." % name) + r = self.tw.post('{0}lists/members/destroy.json'.format(self.twitter_api), data={ + 'owner_screen_name': self.twittername, + 'slug': self.twitterlist, + 'screen_name': name, + }) + if r.status_code != 200: + try: + err = r.json()['errors'][0]['message'] + except: + err = 'Response does not contain error messages with json' + print("Failed to remove subscriber {0}: {1}".format(name, err)) + return False + return True - def add_subscriber(self, name): - print("Adding twitter user %s to list." % name) - r = self.tw.post('{0}lists/members/create.json'.format(self.twitter_api), data={ - 'owner_screen_name': self.twittername, - 'slug': self.twitterlist, - 'screen_name': name, - }) - if r.status_code != 200: - try: - err = r.json()['errors'][0]['message'] - except: - err = 'Response does not contain error messages with json' - print("Failed to add subscriber {0}: {1}".format(name, err)) - return False - return True + def add_subscriber(self, name): + print("Adding twitter user %s to list." % name) + r = self.tw.post('{0}lists/members/create.json'.format(self.twitter_api), data={ + 'owner_screen_name': self.twittername, + 'slug': self.twitterlist, + 'screen_name': name, + }) + if r.status_code != 200: + try: + err = r.json()['errors'][0]['message'] + except: + err = 'Response does not contain error messages with json' + print("Failed to add subscriber {0}: {1}".format(name, err)) + return False + return True |
