summaryrefslogtreecommitdiffstats
path: root/apps/patchwork/bin
diff options
context:
space:
mode:
authorJeremy Kerr <jk@ozlabs.org>2008-09-20 12:09:10 +1000
committerJeremy Kerr <jk@ozlabs.org>2008-09-20 12:09:10 +1000
commit918fab011f24f22f9915674c104258e20a5fcf26 (patch)
treeb44f8ebc803ade916ea0029d7b8e2c8e62a8230e /apps/patchwork/bin
parent5787cddc0bde4514cba96a360f89841e13d2e506 (diff)
downloadpatchwork-918fab011f24f22f9915674c104258e20a5fcf26.tar.bz2
patchwork-918fab011f24f22f9915674c104258e20a5fcf26.tar.xz
[parser] Decode From: headers
We're getting a few utf-8 encoded From: addresses, so decode before saving to the DB. Also, add tests. Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Diffstat (limited to 'apps/patchwork/bin')
-rwxr-xr-xapps/patchwork/bin/parsemail.py17
1 files changed, 14 insertions, 3 deletions
diff --git a/apps/patchwork/bin/parsemail.py b/apps/patchwork/bin/parsemail.py
index b0f1497..07554bc 100755
--- a/apps/patchwork/bin/parsemail.py
+++ b/apps/patchwork/bin/parsemail.py
@@ -26,11 +26,11 @@ import time
import operator
from email import message_from_file
try:
- from email.header import Header
+ from email.header import Header, decode_header
from email.utils import parsedate_tz, mktime_tz
except ImportError:
# Python 2.4 compatibility
- from email.Header import Header
+ from email.Header import Header, decode_header
from email.Utils import parsedate_tz, mktime_tz
from patchwork.parser import parse_patch
@@ -38,6 +38,17 @@ from patchwork.models import Patch, Project, Person, Comment
list_id_headers = ['List-ID', 'X-Mailing-List']
+def clean_header(header):
+ """ Decode (possibly non-ascii) headers """
+
+ def decode(str, fragment):
+ (frag_str, frag_encoding) = fragment
+ if frag_encoding:
+ return str + frag_str.decode(frag_encoding)
+ return str + frag_str.decode()
+
+ return reduce(decode, decode_header(header), u'').strip()
+
def find_project(mail):
project = None
listid_re = re.compile('.*<([^>]+)>.*', re.S)
@@ -60,7 +71,7 @@ def find_project(mail):
def find_author(mail):
- from_header = mail.get('From').strip()
+ from_header = clean_header(mail.get('From'))
(name, email) = (None, None)
# tuple of (regex, fn)