diff options
author | Magnus Hagander | 2019-01-03 10:14:30 +0000 |
---|---|---|
committer | Magnus Hagander | 2019-01-04 11:23:39 +0000 |
commit | fa36dc94bbd0bc86bf63458d8f00da04785ac669 (patch) | |
tree | 3cc216c4fb6212907172f2c38bbf7ca67e038e5c /loader/lib/parser.py | |
parent | bb5775efe5f938461537e0c95c7c110875e4718b (diff) |
Trap internal AssertionError from python libraries
For some really broken messages, we end up in a cannot-happen codepath.
Trap this one and just consider that MIME part empty, and try again
later.
In passing, also change it so we continue loading after failures of parsing.
We continued in the mode where we just generated diffs, but not when
making updates. Now continue in both cases, but of course don't do the
actual update if the parsing failed.
Diffstat (limited to 'loader/lib/parser.py')
-rw-r--r-- | loader/lib/parser.py | 47 |
1 files changed, 42 insertions, 5 deletions
diff --git a/loader/lib/parser.py b/loader/lib/parser.py index 8ee25c5..b97c8b3 100644 --- a/loader/lib/parser.py +++ b/loader/lib/parser.py @@ -124,7 +124,14 @@ class ArchivesParser(object): return charset def get_payload_as_unicode(self, msg): - b = msg.get_payload(decode=True) + try: + b = msg.get_payload(decode=True) + except AssertionError: + # Badly encoded data can throw an exception here, where the python + # libraries fail to handle it and enters a cannot-happen path. + # In which case we just ignore it and hope for a better MIME part later. + b = None + if b: # Find out if there is a charset charset = None @@ -303,8 +310,15 @@ class ArchivesParser(object): return # For now, accept anything not text/plain if container.get_content_type() != 'text/plain': - self.attachments.append((self._extract_filename(container), container.get_content_type(), container.get_payload(decode=True))) + try: + self.attachments.append((self._extract_filename(container), container.get_content_type(), container.get_payload(decode=True))) + except AssertionError: + # Badly encoded data can throw an exception here, where the python + # libraries fail to handle it and enters a cannot-happen path. + # In which case we just ignore this attachment. + return return + # It's a text/plain, it might be worthwhile. # If it has a name, we consider it an attachments if not container.get_params(): @@ -312,19 +326,42 @@ class ArchivesParser(object): for k,v in container.get_params(): if k=='name' and v != '': # Yes, it has a name - self.attachments.append((self._extract_filename(container), container.get_content_type(), container.get_payload(decode=True))) + try: + self.attachments.append((self._extract_filename(container), container.get_content_type(), container.get_payload(decode=True))) + except AssertionError: + # Badly encoded data can throw an exception here, where the python + # libraries fail to handle it and enters a cannot-happen path. + # In which case we just ignore this attachment. + return + return + # If it's content-disposition=attachment, we also want to save it if 'Content-Disposition' in container and container['Content-Disposition'].startswith('attachment'): - self.attachments.append((self._extract_filename(container), container.get_content_type(), container.get_payload(decode=True))) + try: + self.attachments.append((self._extract_filename(container), container.get_content_type(), container.get_payload(decode=True))) + except AssertionError: + # Badly encoded data can throw an exception here, where the python + # libraries fail to handle it and enters a cannot-happen path. + # In which case we just ignore this attachment. + return + return + # If we have already found one text/plain part, make all # further text/plain parts attachments if self.attachments_found_first_plaintext: # However, this will also *always* catch the MIME part added # by majordomo with the footer. So if that one is present, # we need to explicitly exclude it again. - b = container.get_payload(decode=True) + try: + b = container.get_payload(decode=True) + except AssertionError: + # Badly encoded data can throw an exception here, where the python + # libraries fail to handle it and enters a cannot-happen path. + # In which case we just ignore this attachment. + return + if isinstance(b, str) and not self._re_footer.match(b): # We know there is no name for this one self.attachments.append((None, container.get_content_type(), b)) |