diff options
author | Magnus Hagander | 2018-11-20 14:17:41 +0000 |
---|---|---|
committer | Magnus Hagander | 2018-11-20 14:18:46 +0000 |
commit | bf32ad62796fbcc09ea3839907e825d27cf86b58 (patch) | |
tree | 25282a71fd85d4dbb244e9f86e8ab8b40aa7077a /loader/lib/parser.py | |
parent | b99c61419fc7cd7aa3a3dfce5d7f492136b9d9b2 (diff) |
One more round of header replacement fixes
Diffstat (limited to 'loader/lib/parser.py')
-rw-r--r-- | loader/lib/parser.py | 13 |
1 files changed, 9 insertions, 4 deletions
diff --git a/loader/lib/parser.py b/loader/lib/parser.py index 22f91f2..d0e5302 100644 --- a/loader/lib/parser.py +++ b/loader/lib/parser.py @@ -164,7 +164,9 @@ class ArchivesParser(object): # Sometimes we end up with a trailing \0 when decoding long strings, so # replace it if it's there. - b = b.rstrip('\0') + # In fact, replace it everywhere, since it can also turn up in the middle + # of a text when it's a really broken decoding. + b = b.replace('\0', '') return b @@ -435,7 +437,7 @@ class ArchivesParser(object): # do this *before* doing any MIME decoding, we should be safe against # anybody *actually* putting that sequence in the header (since we # won't match the encoded contents) - hdr = hdr.replace("\n\t","") + hdr = hdr.replace("\n\t"," ") # In at least some cases, at least gmail (and possibly other MUAs) # incorrectly put double quotes in the name/email field even when @@ -456,7 +458,10 @@ class ArchivesParser(object): def decode_mime_header(self, hdr, email_workaround=False): try: - return self._decode_mime_header(hdr, email_workaround) + h = self._decode_mime_header(hdr, email_workaround) + if h: + return h.replace("\0", "") + return '' except LookupError, e: raise IgnorableException("Failed to decode header value '%s': %s" % (hdr, e)) except ValueError, ve: @@ -474,7 +479,7 @@ class ArchivesParser(object): try: return self.msg[fieldname] except: - return None + return '' def html_clean(self, html): # First we pass it through tidy |