diff options
author | Jeremy Kerr <jk@ozlabs.org> | 2008-09-20 12:09:10 +1000 |
---|---|---|
committer | Jeremy Kerr <jk@ozlabs.org> | 2008-09-20 12:09:10 +1000 |
commit | 918fab011f24f22f9915674c104258e20a5fcf26 (patch) | |
tree | b44f8ebc803ade916ea0029d7b8e2c8e62a8230e /apps/patchwork/bin | |
parent | 5787cddc0bde4514cba96a360f89841e13d2e506 (diff) | |
download | patchwork-918fab011f24f22f9915674c104258e20a5fcf26.tar.bz2 patchwork-918fab011f24f22f9915674c104258e20a5fcf26.tar.xz |
[parser] Decode From: headers
We're getting a few utf-8 encoded From: addresses, so decode before
saving to the DB.
Also, add tests.
Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Diffstat (limited to 'apps/patchwork/bin')
-rwxr-xr-x | apps/patchwork/bin/parsemail.py | 17 |
1 files changed, 14 insertions, 3 deletions
diff --git a/apps/patchwork/bin/parsemail.py b/apps/patchwork/bin/parsemail.py index b0f1497..07554bc 100755 --- a/apps/patchwork/bin/parsemail.py +++ b/apps/patchwork/bin/parsemail.py @@ -26,11 +26,11 @@ import time import operator from email import message_from_file try: - from email.header import Header + from email.header import Header, decode_header from email.utils import parsedate_tz, mktime_tz except ImportError: # Python 2.4 compatibility - from email.Header import Header + from email.Header import Header, decode_header from email.Utils import parsedate_tz, mktime_tz from patchwork.parser import parse_patch @@ -38,6 +38,17 @@ from patchwork.models import Patch, Project, Person, Comment list_id_headers = ['List-ID', 'X-Mailing-List'] +def clean_header(header): + """ Decode (possibly non-ascii) headers """ + + def decode(str, fragment): + (frag_str, frag_encoding) = fragment + if frag_encoding: + return str + frag_str.decode(frag_encoding) + return str + frag_str.decode() + + return reduce(decode, decode_header(header), u'').strip() + def find_project(mail): project = None listid_re = re.compile('.*<([^>]+)>.*', re.S) @@ -60,7 +71,7 @@ def find_project(mail): def find_author(mail): - from_header = mail.get('From').strip() + from_header = clean_header(mail.get('From')) (name, email) = (None, None) # tuple of (regex, fn) |