summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJeremy Kerr <jk@ozlabs.org>2008-09-20 12:09:10 +1000
committerJeremy Kerr <jk@ozlabs.org>2008-09-20 12:09:10 +1000
commit918fab011f24f22f9915674c104258e20a5fcf26 (patch)
treeb44f8ebc803ade916ea0029d7b8e2c8e62a8230e
parent5787cddc0bde4514cba96a360f89841e13d2e506 (diff)
downloadpatchwork-918fab011f24f22f9915674c104258e20a5fcf26.tar.bz2
patchwork-918fab011f24f22f9915674c104258e20a5fcf26.tar.xz
[parser] Decode From: headers
We're getting a few utf-8 encoded From: addresses, so decode before saving to the DB. Also, add tests. Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
-rwxr-xr-xapps/patchwork/bin/parsemail.py17
-rw-r--r--apps/patchwork/tests/patchparser.py47
2 files changed, 59 insertions, 5 deletions
diff --git a/apps/patchwork/bin/parsemail.py b/apps/patchwork/bin/parsemail.py
index b0f1497..07554bc 100755
--- a/apps/patchwork/bin/parsemail.py
+++ b/apps/patchwork/bin/parsemail.py
@@ -26,11 +26,11 @@ import time
import operator
from email import message_from_file
try:
- from email.header import Header
+ from email.header import Header, decode_header
from email.utils import parsedate_tz, mktime_tz
except ImportError:
# Python 2.4 compatibility
- from email.Header import Header
+ from email.Header import Header, decode_header
from email.Utils import parsedate_tz, mktime_tz
from patchwork.parser import parse_patch
@@ -38,6 +38,17 @@ from patchwork.models import Patch, Project, Person, Comment
list_id_headers = ['List-ID', 'X-Mailing-List']
+def clean_header(header):
+ """ Decode (possibly non-ascii) headers """
+
+ def decode(str, fragment):
+ (frag_str, frag_encoding) = fragment
+ if frag_encoding:
+ return str + frag_str.decode(frag_encoding)
+ return str + frag_str.decode()
+
+ return reduce(decode, decode_header(header), u'').strip()
+
def find_project(mail):
project = None
listid_re = re.compile('.*<([^>]+)>.*', re.S)
@@ -60,7 +71,7 @@ def find_project(mail):
def find_author(mail):
- from_header = mail.get('From').strip()
+ from_header = clean_header(mail.get('From'))
(name, email) = (None, None)
# tuple of (regex, fn)
diff --git a/apps/patchwork/tests/patchparser.py b/apps/patchwork/tests/patchparser.py
index 2e207bf..6fe7968 100644
--- a/apps/patchwork/tests/patchparser.py
+++ b/apps/patchwork/tests/patchparser.py
@@ -21,7 +21,8 @@ import unittest
import os
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
-from patchwork.models import Project
+from email import message_from_string
+from patchwork.models import Project, Person
test_mail_dir = 'patchwork/tests/mail'
test_patch_dir = 'patchwork/tests/patches'
@@ -55,7 +56,7 @@ class PatchTest(unittest.TestCase):
return file(os.path.join(test_patch_dir, filename)).read()
-from patchwork.bin.parsemail import find_content
+from patchwork.bin.parsemail import find_content, find_author
class InlinePatchTest(PatchTest):
patch_filename = '0001-add-line.patch'
@@ -126,3 +127,45 @@ class UpdateSigCommentTest(SignatureCommentTest):
""" Test for '---\nUpdate: v2' style comments to patches, with a sig """
patch_filename = '0001-add-line.patch'
test_comment = 'Test comment\nmore comment\n---\nUpdate: test update'
+
+class SenderEncodingTest(unittest.TestCase):
+ sender_name = u'example user'
+ sender_email = 'user@example.com'
+ from_header = 'example user <user@example.com>'
+
+ def setUp(self):
+ mail = 'From: %s\n' % self.from_header + \
+ 'Subject: test\n\n' + \
+ 'test'
+ self.email = message_from_string(mail)
+ (self.person, new) = find_author(self.email)
+ self.person.save()
+
+ def tearDown(self):
+ self.person.delete()
+
+ def testName(self):
+ self.assertEquals(self.person.name, self.sender_name)
+
+ def testEmail(self):
+ self.assertEquals(self.person.email, self.sender_email)
+
+ def testDBQueryName(self):
+ db_person = Person.objects.get(name = self.sender_name)
+ self.assertEquals(self.person, db_person)
+
+ def testDBQueryEmail(self):
+ db_person = Person.objects.get(email = self.sender_email)
+ self.assertEquals(self.person, db_person)
+
+
+class SenderUTF8QPEncodingTest(SenderEncodingTest):
+ sender_name = u'\xe9xample user'
+ from_header = '=?utf-8?q?=C3=A9xample=20user?= <user@example.com>'
+
+class SenderUTF8QPSplitEncodingTest(SenderEncodingTest):
+ sender_name = u'\xe9xample user'
+ from_header = '=?utf-8?q?=C3=A9xample=20?= user <user@example.com>'
+
+class SenderUTF8B64EncodingTest(SenderUTF8QPEncodingTest):
+ from_header = '=?utf-8?B?w6l4YW1wbGUgdXNlcg==?= <user@example.com>'