fdo#70796 fix quoted printable encoding bug in internal Python

Change-Id: I4e5563c47df83c50df75ccf330fbd38ec6da9170
author: Andras Timar <andras.timar@collabora.com> 2014-01-15 09:44:40 +0100
committer: Andras Timar <andras.timar@collabora.com> 2014-01-15 09:45:13 +0100
commit: 65be22476b586637ecccc6b96fab187e5933cd00 (patch)
tree: a2e947ec92755f2aaba74691d3de39396163df0c /external
parent: 0b3513f34d1b930c17f42ac2cadad7fa43636848 (diff)
2 files changed, 202 insertions, 0 deletions
diff --git a/external/python3/UnpackedTarball_python3.mk b/external/python3/UnpackedTarball_python3.mk
index 200a9e1372bb..1de6b111a06f 100644
--- a/external/python3/UnpackedTarball_python3.mk
+++ b/external/python3/UnpackedTarball_python3.mk
@@ -32,6 +32,7 @@ $(eval $(call gb_UnpackedTarball_add_patches,python3,\
 	external/python3/python-3.3.0-gcc-4.8.patch.1 \
 	external/python3/python-3.3.0-pythreadstate.patch.1 \
 	external/python3/python-3.3.0-clang.patch.1 \
+	external/python3/python-3.3.3-quoted-printable.patch.1 \
 ))
 
 ifneq ($(filter DRAGONFLY FREEBSD LINUX NETBSD OPENBSD SOLARIS,$(OS)),)
diff --git a/external/python3/python-3.3.3-quoted-printable.patch.1 b/external/python3/python-3.3.3-quoted-printable.patch.1
new file mode 100644
index 000000000000..30c065dd26fd
--- /dev/null
+++ b/external/python3/python-3.3.3-quoted-printable.patch.1
@@ -0,0 +1,201 @@
+
+# HG changeset patch
+# User R David Murray <rdmurray@bitdance.com>
+# Date 1389637161 18000
+# Node ID 4c5b1932354bc4707ef182cf0fa61b2e8ccfaa5e
+# Parent  0ce2396a134bebca11b17337734d5e9966e2a95f
+#20206, #5803: more efficient algorithm that doesn't truncate output.
+
+This fixes an edge case (20206) where if the input ended in a character
+needing encoding but there was no newline on the string, the last byte
+of the encoded character would be dropped.  The fix is to use a more
+efficient algorithm, provided by Serhiy Storchaka (5803), that does not
+have the bug.
+
+diff --git a/Lib/email/quoprimime.py b/Lib/email/quoprimime.py
+--- a/Lib/email/quoprimime.py
++++ b/Lib/email/quoprimime.py
+@@ -53,8 +53,9 @@ EMPTYSTRING = ''
+ # space-wise.  Remember that headers and bodies have different sets of safe
+ # characters.  Initialize both maps with the full expansion, and then override
+ # the safe bytes with the more compact form.
+-_QUOPRI_HEADER_MAP = dict((c, '=%02X' % c) for c in range(256))
+-_QUOPRI_BODY_MAP = _QUOPRI_HEADER_MAP.copy()
++_QUOPRI_MAP = ['=%02X' % c for c in range(256)]
++_QUOPRI_HEADER_MAP = _QUOPRI_MAP[:]
++_QUOPRI_BODY_MAP = _QUOPRI_MAP[:]
+
+ # Safe header bytes which need no encoding.
+ for c in b'-!*+/' + ascii_letters.encode('ascii') + digits.encode('ascii'):
+@@ -121,8 +122,7 @@ def unquote(s):
+
+
+ def quote(c):
+-    return '=%02X' % ord(c)
+-
++    return _QUOPRI_MAP[ord(c)]
+
+
+ def header_encode(header_bytes, charset='iso-8859-1'):
+@@ -140,67 +140,15 @@ def header_encode(header_bytes, charset=
+     if not header_bytes:
+         return ''
+     # Iterate over every byte, encoding if necessary.
+-    encoded = []
+-    for octet in header_bytes:
+-        encoded.append(_QUOPRI_HEADER_MAP[octet])
++    encoded = header_bytes.decode('latin1').translate(_QUOPRI_HEADER_MAP)
+     # Now add the RFC chrome to each encoded chunk and glue the chunks
+     # together.
+-    return '=?%s?q?%s?=' % (charset, EMPTYSTRING.join(encoded))
++    return '=?%s?q?%s?=' % (charset, encoded)
+
+
+-class _body_accumulator(io.StringIO):
+-
+-    def __init__(self, maxlinelen, eol, *args, **kw):
+-        super().__init__(*args, **kw)
+-        self.eol = eol
+-        self.maxlinelen = self.room = maxlinelen
+-
+-    def write_str(self, s):
+-        """Add string s to the accumulated body."""
+-        self.write(s)
+-        self.room -= len(s)
+-
+-    def newline(self):
+-        """Write eol, then start new line."""
+-        self.write_str(self.eol)
+-        self.room = self.maxlinelen
+-
+-    def write_soft_break(self):
+-        """Write a soft break, then start a new line."""
+-        self.write_str('=')
+-        self.newline()
+-
+-    def write_wrapped(self, s, extra_room=0):
+-        """Add a soft line break if needed, then write s."""
+-        if self.room < len(s) + extra_room:
+-            self.write_soft_break()
+-        self.write_str(s)
+-
+-    def write_char(self, c, is_last_char):
+-        if not is_last_char:
+-            # Another character follows on this line, so we must leave
+-            # extra room, either for it or a soft break, and whitespace
+-            # need not be quoted.
+-            self.write_wrapped(c, extra_room=1)
+-        elif c not in ' \t':
+-            # For this and remaining cases, no more characters follow,
+-            # so there is no need to reserve extra room (since a hard
+-            # break will immediately follow).
+-            self.write_wrapped(c)
+-        elif self.room >= 3:
+-            # It's a whitespace character at end-of-line, and we have room
+-            # for the three-character quoted encoding.
+-            self.write(quote(c))
+-        elif self.room == 2:
+-            # There's room for the whitespace character and a soft break.
+-            self.write(c)
+-            self.write_soft_break()
+-        else:
+-            # There's room only for a soft break.  The quoted whitespace
+-            # will be the only content on the subsequent line.
+-            self.write_soft_break()
+-            self.write(quote(c))
+-
++_QUOPRI_BODY_ENCODE_MAP = _QUOPRI_BODY_MAP[:]
++for c in b'\r\n':
++    _QUOPRI_BODY_ENCODE_MAP[c] = chr(c)
+
+ def body_encode(body, maxlinelen=76, eol=NL):
+     """Encode with quoted-printable, wrapping at maxlinelen characters.
+@@ -226,26 +174,56 @@ def body_encode(body, maxlinelen=76, eol
+     if not body:
+         return body
+
+-    # The last line may or may not end in eol, but all other lines do.
+-    last_has_eol = (body[-1] in '\r\n')
++    # quote speacial characters
++    body = body.translate(_QUOPRI_BODY_ENCODE_MAP)
+
+-    # This accumulator will make it easier to build the encoded body.
+-    encoded_body = _body_accumulator(maxlinelen, eol)
++    soft_break = '=' + eol
++    # leave space for the '=' at the end of a line
++    maxlinelen1 = maxlinelen - 1
+
+-    lines = body.splitlines()
+-    last_line_no = len(lines) - 1
+-    for line_no, line in enumerate(lines):
+-        last_char_index = len(line) - 1
+-        for i, c in enumerate(line):
+-            if body_check(ord(c)):
+-                c = quote(c)
+-            encoded_body.write_char(c, i==last_char_index)
+-        # Add an eol if input line had eol.  All input lines have eol except
+-        # possibly the last one.
+-        if line_no < last_line_no or last_has_eol:
+-            encoded_body.newline()
++    encoded_body = []
++    append = encoded_body.append
+
+-    return encoded_body.getvalue()
++    for line in body.splitlines():
++        # break up the line into pieces no longer than maxlinelen - 1
++        start = 0
++        laststart = len(line) - 1 - maxlinelen
++        while start <= laststart:
++            stop = start + maxlinelen1
++            # make sure we don't break up an escape sequence
++            if line[stop - 2] == '=':
++                append(line[start:stop - 1])
++                start = stop - 2
++            elif line[stop - 1] == '=':
++                append(line[start:stop])
++                start = stop - 1
++            else:
++                append(line[start:stop] + '=')
++                start = stop
++
++        # handle rest of line, special case if line ends in whitespace
++        if line and line[-1] in ' \t':
++            room = start - laststart
++            if room >= 3:
++                # It's a whitespace character at end-of-line, and we have room
++                # for the three-character quoted encoding.
++                q = quote(line[-1])
++            elif room == 2:
++                # There's room for the whitespace character and a soft break.
++                q = line[-1] + soft_break
++            else:
++                # There's room only for a soft break.  The quoted whitespace
++                # will be the only content on the subsequent line.
++                q = soft_break + quote(line[-1])
++            append(line[start:-1] + q)
++        else:
++            append(line[start:])
++
++    # add back final newline if present
++    if body[-1] in CRLF:
++        append('')
++
++    return eol.join(encoded_body)
+
+
+
+diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py
+--- a/Lib/test/test_email/test_email.py
++++ b/Lib/test/test_email/test_email.py
+@@ -4216,6 +4216,11 @@ class TestQuopri(unittest.TestCase):
+     def test_encode_one_line_eol(self):
+         self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
+
++    def test_encode_one_line_eol_after_non_ascii(self):
++        # issue 20206; see changeset 0cf700464177 for why the encode/decode.
++        self._test_encode('hello\u03c5\n'.encode('utf-8').decode('latin1'),
++                          'hello=CF=85\r\n', eol='\r\n')
++
+     def test_encode_one_space(self):
+         self._test_encode(' ', '=20')
+
author	Andras Timar <andras.timar@collabora.com>	2014-01-15 09:44:40 +0100
committer	Andras Timar <andras.timar@collabora.com>	2014-01-15 09:45:13 +0100
commit	65be22476b586637ecccc6b96fab187e5933cd00 (patch)
tree	a2e947ec92755f2aaba74691d3de39396163df0c /external
parent	0b3513f34d1b930c17f42ac2cadad7fa43636848 (diff)