aboutsummaryrefslogtreecommitdiffstats
path: root/main/py-nose/fix-crashing-from-UnicodeDecodeError.patch
blob: 9154ab086268469f315f85d79a0b125c226b927f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
From: Jordan Moldow <jmoldow@box.com>
Date: Wed, 23 Mar 2016 01:42:37 -0700
Subject: [PATCH] Prevent crashing from UnicodeDecodeError

On Python 2, `sys.stdout` and `print` can normally handle any
combination of `str` and `unicode` objects. However,
`StringIO.StringIO` can only safely handle one or the other. If
the program writes both a `unicode` string, and a non-ASCII
`str` string, then the `getvalue()` method will fail with
`UnicodeDecodeError` [1].

In nose, that causes the script to suddenly abort, with the
cryptic `UnicodeDecodeError`.

This fix catches `UnicodeError` when trying to get the captured
output, and will replace the captured output with a warning
message.

Fixes #816

[1] <https://github.com/python/cpython/blob/2.7/Lib/StringIO.py#L258>

Patch-Source: https://github.com/nose-devs/nose/pull/988

diff --git a/nose/plugins/capture.py b/nose/plugins/capture.py
index c81f21ec..e7bbdd55 100644
--- a/nose/plugins/capture.py
+++ b/nose/plugins/capture.py
@@ -12,6 +12,7 @@
 import logging
 import os
 import sys
+import traceback
 from nose.plugins.base import Plugin
 from nose.pyversion import exc_to_unicode, force_unicode
 from nose.util import ln
@@ -71,26 +72,56 @@ def beforeTest(self, test):
     def formatError(self, test, err):
         """Add captured output to error report.
         """
-        test.capturedOutput = output = self.buffer
+        test.capturedOutput = output = ''
+        output_exc_info = None
+        try:
+            test.capturedOutput = output = self.buffer
+        except UnicodeError:
+            # python2's StringIO.StringIO [1] class has this warning:
+            #
+            #     The StringIO object can accept either Unicode or 8-bit strings,
+            #     but mixing the two may take some care. If both are used, 8-bit
+            #     strings that cannot be interpreted as 7-bit ASCII (that use the
+            #     8th bit) will cause a UnicodeError to be raised when getvalue()
+            #     is called.
+            #
+            # This exception handler is a protection against issue #816 [2].
+            # Capturing the exception info allows us to display it back to the
+            # user.
+            #
+            # [1] <https://github.com/python/cpython/blob/2.7/Lib/StringIO.py#L258>
+            # [2] <https://github.com/nose-devs/nose/issues/816>
+            output_exc_info = sys.exc_info()
         self._buf = None
-        if not output:
+        if (not output) and (not output_exc_info):
             # Don't return None as that will prevent other
             # formatters from formatting and remove earlier formatters
             # formats, instead return the err we got
             return err
         ec, ev, tb = err
-        return (ec, self.addCaptureToErr(ev, output), tb)
+        return (ec, self.addCaptureToErr(ev, output, output_exc_info=output_exc_info), tb)
 
     def formatFailure(self, test, err):
         """Add captured output to failure report.
         """
         return self.formatError(test, err)
 
-    def addCaptureToErr(self, ev, output):
+    def addCaptureToErr(self, ev, output, output_exc_info=None):
+        # If given, output_exc_info should be a 3-tuple from sys.exc_info(),
+        # from an exception raised while trying to get the captured output.
         ev = exc_to_unicode(ev)
         output = force_unicode(output)
-        return u'\n'.join([ev, ln(u'>> begin captured stdout <<'),
-                           output, ln(u'>> end captured stdout <<')])
+        error_text = [ev, ln(u'>> begin captured stdout <<'),
+                      output, ln(u'>> end captured stdout <<')]
+        if output_exc_info:
+            error_text.extend([u'OUTPUT ERROR: Could not get captured output.',
+                               # <https://github.com/python/cpython/blob/2.7/Lib/StringIO.py#L258>
+                               # <https://github.com/nose-devs/nose/issues/816>
+                               u"The test might've printed both 'unicode' strings and non-ASCII 8-bit 'str' strings.",
+                               ln(u'>> begin captured stdout exception traceback <<'),
+                               u''.join(traceback.format_exception(*output_exc_info)),
+                               ln(u'>> end captured stdout exception traceback <<')])
+        return u'\n'.join(error_text)
 
     def start(self):
         self.stdout.append(sys.stdout)
diff --git a/unit_tests/test_capture_plugin.py b/unit_tests/test_capture_plugin.py
index edab7de0..90125d3e 100644
--- a/unit_tests/test_capture_plugin.py
+++ b/unit_tests/test_capture_plugin.py
@@ -4,6 +4,12 @@
 from optparse import OptionParser
 from nose.config import Config
 from nose.plugins.capture import Capture
+from nose.pyversion import force_unicode
+
+if sys.version_info[0] == 2:
+    py2 = True
+else:
+    py2 = False
 
 class TestCapturePlugin(unittest.TestCase):
 
@@ -62,6 +68,35 @@ def test_captures_nonascii_stdout(self):
         c.end()
         self.assertEqual(c.buffer, "test 日本\n")
 
+    def test_does_not_crash_with_mixed_unicode_and_nonascii_str(self):
+        class Dummy:
+            pass
+        d = Dummy()
+        c = Capture()
+        c.start()
+        printed_nonascii_str = force_unicode("test 日本").encode('utf-8')
+        printed_unicode = force_unicode("Hello")
+        print printed_nonascii_str
+        print printed_unicode
+        try:
+            raise Exception("boom")
+        except:
+            err = sys.exc_info()
+        formatted = c.formatError(d, err)
+        _, fev, _ = formatted
+
+        if py2:
+            for string in [force_unicode(printed_nonascii_str, encoding='utf-8'), printed_unicode]:
+                assert string not in fev, "Output unexpectedly found in error message"
+            assert d.capturedOutput == '', "capturedOutput unexpectedly non-empty"
+            assert "OUTPUT ERROR" in fev
+            assert "captured stdout exception traceback" in fev
+            assert "UnicodeDecodeError" in fev
+        else:
+            for string in [repr(printed_nonascii_str), printed_unicode]:
+                assert string in fev, "Output not found in error message"
+                assert string in d.capturedOutput, "Output not attached to test"
+
     def test_format_error(self):
         class Dummy:
             pass