diff options
author | Andras Timar <andras.timar@collabora.com> | 2014-01-15 09:44:40 +0100 |
---|---|---|
committer | Andras Timar <andras.timar@collabora.com> | 2014-01-15 09:45:13 +0100 |
commit | 65be22476b586637ecccc6b96fab187e5933cd00 (patch) | |
tree | a2e947ec92755f2aaba74691d3de39396163df0c /external | |
parent | 0b3513f34d1b930c17f42ac2cadad7fa43636848 (diff) |
fdo#70796 fix quoted printable encoding bug in internal Python
Change-Id: I4e5563c47df83c50df75ccf330fbd38ec6da9170
Diffstat (limited to 'external')
-rw-r--r-- | external/python3/UnpackedTarball_python3.mk | 1 | ||||
-rw-r--r-- | external/python3/python-3.3.3-quoted-printable.patch.1 | 201 |
2 files changed, 202 insertions, 0 deletions
diff --git a/external/python3/UnpackedTarball_python3.mk b/external/python3/UnpackedTarball_python3.mk index 200a9e1372bb..1de6b111a06f 100644 --- a/external/python3/UnpackedTarball_python3.mk +++ b/external/python3/UnpackedTarball_python3.mk @@ -32,6 +32,7 @@ $(eval $(call gb_UnpackedTarball_add_patches,python3,\ external/python3/python-3.3.0-gcc-4.8.patch.1 \ external/python3/python-3.3.0-pythreadstate.patch.1 \ external/python3/python-3.3.0-clang.patch.1 \ + external/python3/python-3.3.3-quoted-printable.patch.1 \ )) ifneq ($(filter DRAGONFLY FREEBSD LINUX NETBSD OPENBSD SOLARIS,$(OS)),) diff --git a/external/python3/python-3.3.3-quoted-printable.patch.1 b/external/python3/python-3.3.3-quoted-printable.patch.1 new file mode 100644 index 000000000000..30c065dd26fd --- /dev/null +++ b/external/python3/python-3.3.3-quoted-printable.patch.1 @@ -0,0 +1,201 @@ + +# HG changeset patch +# User R David Murray <rdmurray@bitdance.com> +# Date 1389637161 18000 +# Node ID 4c5b1932354bc4707ef182cf0fa61b2e8ccfaa5e +# Parent 0ce2396a134bebca11b17337734d5e9966e2a95f +#20206, #5803: more efficient algorithm that doesn't truncate output. + +This fixes an edge case (20206) where if the input ended in a character +needing encoding but there was no newline on the string, the last byte +of the encoded character would be dropped. The fix is to use a more +efficient algorithm, provided by Serhiy Storchaka (5803), that does not +have the bug. + +diff --git a/Lib/email/quoprimime.py b/Lib/email/quoprimime.py +--- a/Lib/email/quoprimime.py ++++ b/Lib/email/quoprimime.py +@@ -53,8 +53,9 @@ EMPTYSTRING = '' + # space-wise. Remember that headers and bodies have different sets of safe + # characters. Initialize both maps with the full expansion, and then override + # the safe bytes with the more compact form. +-_QUOPRI_HEADER_MAP = dict((c, '=%02X' % c) for c in range(256)) +-_QUOPRI_BODY_MAP = _QUOPRI_HEADER_MAP.copy() ++_QUOPRI_MAP = ['=%02X' % c for c in range(256)] ++_QUOPRI_HEADER_MAP = _QUOPRI_MAP[:] ++_QUOPRI_BODY_MAP = _QUOPRI_MAP[:] + + # Safe header bytes which need no encoding. + for c in b'-!*+/' + ascii_letters.encode('ascii') + digits.encode('ascii'): +@@ -121,8 +122,7 @@ def unquote(s): + + + def quote(c): +- return '=%02X' % ord(c) +- ++ return _QUOPRI_MAP[ord(c)] + + + def header_encode(header_bytes, charset='iso-8859-1'): +@@ -140,67 +140,15 @@ def header_encode(header_bytes, charset= + if not header_bytes: + return '' + # Iterate over every byte, encoding if necessary. +- encoded = [] +- for octet in header_bytes: +- encoded.append(_QUOPRI_HEADER_MAP[octet]) ++ encoded = header_bytes.decode('latin1').translate(_QUOPRI_HEADER_MAP) + # Now add the RFC chrome to each encoded chunk and glue the chunks + # together. +- return '=?%s?q?%s?=' % (charset, EMPTYSTRING.join(encoded)) ++ return '=?%s?q?%s?=' % (charset, encoded) + + +-class _body_accumulator(io.StringIO): +- +- def __init__(self, maxlinelen, eol, *args, **kw): +- super().__init__(*args, **kw) +- self.eol = eol +- self.maxlinelen = self.room = maxlinelen +- +- def write_str(self, s): +- """Add string s to the accumulated body.""" +- self.write(s) +- self.room -= len(s) +- +- def newline(self): +- """Write eol, then start new line.""" +- self.write_str(self.eol) +- self.room = self.maxlinelen +- +- def write_soft_break(self): +- """Write a soft break, then start a new line.""" +- self.write_str('=') +- self.newline() +- +- def write_wrapped(self, s, extra_room=0): +- """Add a soft line break if needed, then write s.""" +- if self.room < len(s) + extra_room: +- self.write_soft_break() +- self.write_str(s) +- +- def write_char(self, c, is_last_char): +- if not is_last_char: +- # Another character follows on this line, so we must leave +- # extra room, either for it or a soft break, and whitespace +- # need not be quoted. +- self.write_wrapped(c, extra_room=1) +- elif c not in ' \t': +- # For this and remaining cases, no more characters follow, +- # so there is no need to reserve extra room (since a hard +- # break will immediately follow). +- self.write_wrapped(c) +- elif self.room >= 3: +- # It's a whitespace character at end-of-line, and we have room +- # for the three-character quoted encoding. +- self.write(quote(c)) +- elif self.room == 2: +- # There's room for the whitespace character and a soft break. +- self.write(c) +- self.write_soft_break() +- else: +- # There's room only for a soft break. The quoted whitespace +- # will be the only content on the subsequent line. +- self.write_soft_break() +- self.write(quote(c)) +- ++_QUOPRI_BODY_ENCODE_MAP = _QUOPRI_BODY_MAP[:] ++for c in b'\r\n': ++ _QUOPRI_BODY_ENCODE_MAP[c] = chr(c) + + def body_encode(body, maxlinelen=76, eol=NL): + """Encode with quoted-printable, wrapping at maxlinelen characters. +@@ -226,26 +174,56 @@ def body_encode(body, maxlinelen=76, eol + if not body: + return body + +- # The last line may or may not end in eol, but all other lines do. +- last_has_eol = (body[-1] in '\r\n') ++ # quote speacial characters ++ body = body.translate(_QUOPRI_BODY_ENCODE_MAP) + +- # This accumulator will make it easier to build the encoded body. +- encoded_body = _body_accumulator(maxlinelen, eol) ++ soft_break = '=' + eol ++ # leave space for the '=' at the end of a line ++ maxlinelen1 = maxlinelen - 1 + +- lines = body.splitlines() +- last_line_no = len(lines) - 1 +- for line_no, line in enumerate(lines): +- last_char_index = len(line) - 1 +- for i, c in enumerate(line): +- if body_check(ord(c)): +- c = quote(c) +- encoded_body.write_char(c, i==last_char_index) +- # Add an eol if input line had eol. All input lines have eol except +- # possibly the last one. +- if line_no < last_line_no or last_has_eol: +- encoded_body.newline() ++ encoded_body = [] ++ append = encoded_body.append + +- return encoded_body.getvalue() ++ for line in body.splitlines(): ++ # break up the line into pieces no longer than maxlinelen - 1 ++ start = 0 ++ laststart = len(line) - 1 - maxlinelen ++ while start <= laststart: ++ stop = start + maxlinelen1 ++ # make sure we don't break up an escape sequence ++ if line[stop - 2] == '=': ++ append(line[start:stop - 1]) ++ start = stop - 2 ++ elif line[stop - 1] == '=': ++ append(line[start:stop]) ++ start = stop - 1 ++ else: ++ append(line[start:stop] + '=') ++ start = stop ++ ++ # handle rest of line, special case if line ends in whitespace ++ if line and line[-1] in ' \t': ++ room = start - laststart ++ if room >= 3: ++ # It's a whitespace character at end-of-line, and we have room ++ # for the three-character quoted encoding. ++ q = quote(line[-1]) ++ elif room == 2: ++ # There's room for the whitespace character and a soft break. ++ q = line[-1] + soft_break ++ else: ++ # There's room only for a soft break. The quoted whitespace ++ # will be the only content on the subsequent line. ++ q = soft_break + quote(line[-1]) ++ append(line[start:-1] + q) ++ else: ++ append(line[start:]) ++ ++ # add back final newline if present ++ if body[-1] in CRLF: ++ append('') ++ ++ return eol.join(encoded_body) + + + +diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py +--- a/Lib/test/test_email/test_email.py ++++ b/Lib/test/test_email/test_email.py +@@ -4216,6 +4216,11 @@ class TestQuopri(unittest.TestCase): + def test_encode_one_line_eol(self): + self._test_encode('hello\n', 'hello\r\n', eol='\r\n') + ++ def test_encode_one_line_eol_after_non_ascii(self): ++ # issue 20206; see changeset 0cf700464177 for why the encode/decode. ++ self._test_encode('hello\u03c5\n'.encode('utf-8').decode('latin1'), ++ 'hello=CF=85\r\n', eol='\r\n') ++ + def test_encode_one_space(self): + self._test_encode(' ', '=20') + |