diff options
author | Siddhesh Poyarekar <siddhesh@redhat.com> | 2014-07-14 10:21:32 +0800 |
---|---|---|
committer | Jeremy Kerr <jk@ozlabs.org> | 2014-07-24 17:30:45 +0800 |
commit | 880fc52d2d4ccdcbf4a7b76f1b4ba6b9e7482dff (patch) | |
tree | 993827678013aab90cccf4b032e956181dcef5aa | |
parent | f49c72da5545f2da49dd83d9ec54fe232cf60360 (diff) | |
download | patchwork-880fc52d2d4ccdcbf4a7b76f1b4ba6b9e7482dff.tar.bz2 patchwork-880fc52d2d4ccdcbf4a7b76f1b4ba6b9e7482dff.tar.xz |
parsemail: Fallback to common charsets when charset is None or x-unknown
We recently encountered a case in our glibc patchwork instance on
sourceware, where a patch was dropped because it had x-unknown
charset.
This change adds a fallback on a set of encodings (instead of just
utf-8) when the charset is not mentioned or if it is set as x-unknown.
Minor changes and testcase by Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Siddhesh Poyarekar <siddhesh@redhat.com>
Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
-rwxr-xr-x | apps/patchwork/bin/parsemail.py | 40 | ||||
-rw-r--r-- | apps/patchwork/tests/mail/0010-invalid-charset.mbox | 90 | ||||
-rw-r--r-- | apps/patchwork/tests/test_patchparser.py | 11 |
3 files changed, 135 insertions, 6 deletions
diff --git a/apps/patchwork/bin/parsemail.py b/apps/patchwork/bin/parsemail.py index b6eb97a..2a4866f 100755 --- a/apps/patchwork/bin/parsemail.py +++ b/apps/patchwork/bin/parsemail.py @@ -24,6 +24,7 @@ import re import datetime import time import operator +import codecs from email import message_from_file try: from email.header import Header, decode_header @@ -147,6 +148,13 @@ def find_pull_request(content): return match.group(1) return None +def try_decode(payload, charset): + try: + payload = unicode(payload, charset) + except UnicodeDecodeError: + return None + return payload + def find_content(project, mail): patchbuf = None commentbuf = '' @@ -157,15 +165,35 @@ def find_content(project, mail): continue payload = part.get_payload(decode=True) - charset = part.get_content_charset() subtype = part.get_content_subtype() - # if we don't have a charset, assume utf-8 - if charset is None: - charset = 'utf-8' - if not isinstance(payload, unicode): - payload = unicode(payload, charset) + charset = part.get_content_charset() + + # Check that we have a charset that we understand. Otherwise, + # ignore it and fallback to our standard set. + if charset is not None: + try: + codec = codecs.lookup(charset) + except LookupError: + charset = None + + # If there is no charset or if it is unknown, then try some common + # charsets before we fail. + if charset is None: + try_charsets = ['utf-8', 'windows-1252', 'iso-8859-1'] + else: + try_charsets = [charset] + + for cset in try_charsets: + decoded_payload = try_decode(payload, cset) + if decoded_payload is not None: + break + payload = decoded_payload + + # Could not find a valid decoded payload. Fail. + if payload is None: + return (None, None) if subtype in ['x-patch', 'x-diff']: patchbuf = payload diff --git a/apps/patchwork/tests/mail/0010-invalid-charset.mbox b/apps/patchwork/tests/mail/0010-invalid-charset.mbox new file mode 100644 index 0000000..10b369d --- /dev/null +++ b/apps/patchwork/tests/mail/0010-invalid-charset.mbox @@ -0,0 +1,90 @@ +From libc-alpha-return-50517-siddhesh=redhat.com@sourceware.org Thu Jun 5 10:36:33 2014 +Received: (qmail 11948 invoked by alias); 4 Jun 2014 17:51:01 -0000 +Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm +List-Id: <libc-alpha.sourceware.org> +Sender: libc-alpha-owner@sourceware.org +Date: Wed, 4 Jun 2014 17:50:46 +0000 +From: "Joseph S. Myers" <joseph@codesourcery.com> +To: <libc-alpha@sourceware.org> +Subject: Fix pow overflow in non-default rounding modes (bug 16315) +Message-ID: <Pine.LNX.4.64.1406041749420.3719@digraph.polyomino.org.uk> +MIME-Version: 1.0 +Content-Type: multipart/mixed; + boundary="-1152306461-1522705971-1401904246=:3719" +Content-Length: 24171 + +---1152306461-1522705971-1401904246=:3719 +Content-Type: text/plain; charset="none" +Content-Transfer-Encoding: QUOTED-PRINTABLE + +This patch, relative to a tree with +<https://sourceware.org/ml/libc-alpha/2014-06/msg00076.html> applied, +fixes bug 16315, bad pow handling of overflow/underflow in non-default +rounding modes. Tests of pow are duly converted to ALL_RM_TEST to run +all tests in all rounding modes. + +There are two main issues here. First, various implementations +compute a negative result by negating a positive result, but this +yields inappropriate overflow / underflow values for directed +rounding, so either overflow / underflow results need recomputing in +the correct sign, or the relevant overflowing / underflowing operation +needs to be made to have a result of the correct sign. Second, the +dbl-64 implementation sets FE_TONEAREST internally; in the overflow / +underflow case, the result needs recomputing in the original rounding +mode. + +Tested x86_64 and x86 and ulps updated accordingly. + +(auto-libm-test-out diffs omitted below.) + +2014-06-04 Joseph Myers <joseph@codesourcery.com> + +=09[BZ #16315] +=09* sysdeps/i386/fpu/e_pow.S (__ieee754_pow): Ensure possibly +=09overflowing or underflowing operations take place with sign of +=09result. +=09* sysdeps/i386/fpu/e_powf.S (__ieee754_powf): Likewise. +=09* sysdeps/i386/fpu/e_powl.S (__ieee754_powl): Likewise. +=09* sysdeps/ieee754/dbl-64/e_pow.c: Include <math.h>. +=09(__ieee754_pow): Recompute overflowing and underflowing results in +=09original rounding mode. +=09* sysdeps/x86/fpu/powl_helper.c: Include <stdbool.h>. +=09(__powl_helper): Allow negative argument X and scale negated value +=09as needed. Avoid passing value outside [-1, 1] to f2xm1. +=09* sysdeps/x86_64/fpu/e_powl.S (__ieee754_powl): Ensure possibly +=09overflowing or underflowing operations take place with sign of +=09result. +=09* sysdeps/x86_64/fpu/multiarch/e_pow.c [HAVE_FMA4_SUPPORT]: +=09Include <math.h>. +=09* math/auto-libm-test-in: Add more tests of pow. +=09* math/auto-libm-test-out: Regenerated. +=09* math/libm-test.inc (pow_test): Use ALL_RM_TEST. +=09(pow_tonearest_test_data): Remove. +=09(pow_test_tonearest): Likewise. +=09(pow_towardzero_test_data): Likewise. +=09(pow_test_towardzero): Likewise. +=09(pow_downward_test_data): Likewise. +=09(pow_test_downward): Likewise. +=09(pow_upward_test_data): Likewise. +=09(pow_test_upward): Likewise. +=09(main): Don't call removed functions. +=09* sysdeps/i386/fpu/libm-test-ulps: Update. +=09* sysdeps/x86_64/fpu/libm-test-ulps: Likewise. + +diff --git a/sysdeps/x86_64/fpu/multiarch/e_pow.c b/sysdeps/x86_64/fpu/mult= +iarch/e_pow.c +index a740b6c..433cce0 100644 +--- a/sysdeps/x86_64/fpu/multiarch/e_pow.c ++++ b/sysdeps/x86_64/fpu/multiarch/e_pow.c +@@ -1,5 +1,6 @@ + #ifdef HAVE_FMA4_SUPPORT + # include <init-arch.h> ++# include <math.h> + # include <math_private.h> +=20 + extern double __ieee754_pow_sse2 (double, double); + +--=20 +Joseph S. Myers +joseph@codesourcery.com +---1152306461-1522705971-1401904246=:3719-- diff --git a/apps/patchwork/tests/test_patchparser.py b/apps/patchwork/tests/test_patchparser.py index 0496a69..d9a24c1 100644 --- a/apps/patchwork/tests/test_patchparser.py +++ b/apps/patchwork/tests/test_patchparser.py @@ -422,6 +422,17 @@ class CVSFormatPatchTest(MBoxPatchTest): self.assertTrue(comment is not None) self.assertTrue(patch.content.startswith('Index')) +class CharsetFallbackPatchTest(MBoxPatchTest): + """ Test mail with and invalid charset name, and check that we can parse + with one of the fallback encodings""" + + mail_file = '0010-invalid-charset.mbox' + + def testPatch(self): + (patch, comment) = find_content(self.project, self.mail) + self.assertTrue(patch is not None) + self.assertTrue(comment is not None) + class DelegateRequestTest(TestCase): patch_filename = '0001-add-line.patch' msgid = '<1@example.com>' |