From: Jordan Moldow Date: Wed, 23 Mar 2016 01:42:37 -0700 Subject: [PATCH] Prevent crashing from UnicodeDecodeError On Python 2, `sys.stdout` and `print` can normally handle any combination of `str` and `unicode` objects. However, `StringIO.StringIO` can only safely handle one or the other. If the program writes both a `unicode` string, and a non-ASCII `str` string, then the `getvalue()` method will fail with `UnicodeDecodeError` [1]. In nose, that causes the script to suddenly abort, with the cryptic `UnicodeDecodeError`. This fix catches `UnicodeError` when trying to get the captured output, and will replace the captured output with a warning message. Fixes #816 [1] Patch-Source: https://github.com/nose-devs/nose/pull/988 diff --git a/nose/plugins/capture.py b/nose/plugins/capture.py index c81f21ec..e7bbdd55 100644 --- a/nose/plugins/capture.py +++ b/nose/plugins/capture.py @@ -12,6 +12,7 @@ import logging import os import sys +import traceback from nose.plugins.base import Plugin from nose.pyversion import exc_to_unicode, force_unicode from nose.util import ln @@ -71,26 +72,56 @@ def beforeTest(self, test): def formatError(self, test, err): """Add captured output to error report. """ - test.capturedOutput = output = self.buffer + test.capturedOutput = output = '' + output_exc_info = None + try: + test.capturedOutput = output = self.buffer + except UnicodeError: + # python2's StringIO.StringIO [1] class has this warning: + # + # The StringIO object can accept either Unicode or 8-bit strings, + # but mixing the two may take some care. If both are used, 8-bit + # strings that cannot be interpreted as 7-bit ASCII (that use the + # 8th bit) will cause a UnicodeError to be raised when getvalue() + # is called. + # + # This exception handler is a protection against issue #816 [2]. + # Capturing the exception info allows us to display it back to the + # user. + # + # [1] + # [2] + output_exc_info = sys.exc_info() self._buf = None - if not output: + if (not output) and (not output_exc_info): # Don't return None as that will prevent other # formatters from formatting and remove earlier formatters # formats, instead return the err we got return err ec, ev, tb = err - return (ec, self.addCaptureToErr(ev, output), tb) + return (ec, self.addCaptureToErr(ev, output, output_exc_info=output_exc_info), tb) def formatFailure(self, test, err): """Add captured output to failure report. """ return self.formatError(test, err) - def addCaptureToErr(self, ev, output): + def addCaptureToErr(self, ev, output, output_exc_info=None): + # If given, output_exc_info should be a 3-tuple from sys.exc_info(), + # from an exception raised while trying to get the captured output. ev = exc_to_unicode(ev) output = force_unicode(output) - return u'\n'.join([ev, ln(u'>> begin captured stdout <<'), - output, ln(u'>> end captured stdout <<')]) + error_text = [ev, ln(u'>> begin captured stdout <<'), + output, ln(u'>> end captured stdout <<')] + if output_exc_info: + error_text.extend([u'OUTPUT ERROR: Could not get captured output.', + # + # + u"The test might've printed both 'unicode' strings and non-ASCII 8-bit 'str' strings.", + ln(u'>> begin captured stdout exception traceback <<'), + u''.join(traceback.format_exception(*output_exc_info)), + ln(u'>> end captured stdout exception traceback <<')]) + return u'\n'.join(error_text) def start(self): self.stdout.append(sys.stdout) diff --git a/unit_tests/test_capture_plugin.py b/unit_tests/test_capture_plugin.py index edab7de0..90125d3e 100644 --- a/unit_tests/test_capture_plugin.py +++ b/unit_tests/test_capture_plugin.py @@ -4,6 +4,12 @@ from optparse import OptionParser from nose.config import Config from nose.plugins.capture import Capture +from nose.pyversion import force_unicode + +if sys.version_info[0] == 2: + py2 = True +else: + py2 = False class TestCapturePlugin(unittest.TestCase): @@ -62,6 +68,35 @@ def test_captures_nonascii_stdout(self): c.end() self.assertEqual(c.buffer, "test 日本\n") + def test_does_not_crash_with_mixed_unicode_and_nonascii_str(self): + class Dummy: + pass + d = Dummy() + c = Capture() + c.start() + printed_nonascii_str = force_unicode("test 日本").encode('utf-8') + printed_unicode = force_unicode("Hello") + print printed_nonascii_str + print printed_unicode + try: + raise Exception("boom") + except: + err = sys.exc_info() + formatted = c.formatError(d, err) + _, fev, _ = formatted + + if py2: + for string in [force_unicode(printed_nonascii_str, encoding='utf-8'), printed_unicode]: + assert string not in fev, "Output unexpectedly found in error message" + assert d.capturedOutput == '', "capturedOutput unexpectedly non-empty" + assert "OUTPUT ERROR" in fev + assert "captured stdout exception traceback" in fev + assert "UnicodeDecodeError" in fev + else: + for string in [repr(printed_nonascii_str), printed_unicode]: + assert string in fev, "Output not found in error message" + assert string in d.capturedOutput, "Output not attached to test" + def test_format_error(self): class Dummy: pass