diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst index 5e08d56fd66186..32da8294c5a58a 100644 --- a/Doc/library/base64.rst +++ b/Doc/library/base64.rst @@ -73,8 +73,8 @@ POST request. Added the *padded* and *wrapcol* parameters. -.. function:: b64decode(s, altchars=None, validate=False, *, padded=True) - b64decode(s, altchars=None, validate=True, *, ignorechars, padded=True) +.. function:: b64decode(s, altchars=None, validate=False, *, padded=True, canonical=False) + b64decode(s, altchars=None, validate=True, *, ignorechars, padded=True, canonical=False) Decode the Base64 encoded :term:`bytes-like object` or ASCII string *s* and return the decoded :class:`bytes`. @@ -112,10 +112,13 @@ POST request. If *validate* is true, these non-alphabet characters in the input result in a :exc:`binascii.Error`. + If *canonical* is true, non-zero padding bits are rejected. + See :func:`binascii.a2b_base64` for details. + For more information about the strict base64 check, see :func:`binascii.a2b_base64` .. versionchanged:: 3.15 - Added the *ignorechars* and *padded* parameters. + Added the *canonical*, *ignorechars*, and *padded* parameters. .. deprecated:: 3.15 Accepting the ``+`` and ``/`` characters with an alternative alphabet @@ -179,7 +182,7 @@ POST request. Added the *padded* and *wrapcol* parameters. -.. function:: b32decode(s, casefold=False, map01=None, *, padded=True, ignorechars=b'') +.. function:: b32decode(s, casefold=False, map01=None, *, padded=True, ignorechars=b'', canonical=False) Decode the Base32 encoded :term:`bytes-like object` or ASCII string *s* and return the decoded :class:`bytes`. @@ -205,12 +208,15 @@ POST request. *ignorechars* should be a :term:`bytes-like object` containing characters to ignore from the input. + If *canonical* is true, non-zero padding bits are rejected. + See :func:`binascii.a2b_base32` for details. + A :exc:`binascii.Error` is raised if *s* is incorrectly padded or if there are non-alphabet characters present in the input. .. versionchanged:: 3.15 - Added the *ignorechars* and *padded* parameters. + Added the *canonical*, *ignorechars*, and *padded* parameters. .. function:: b32hexencode(s, *, padded=True, wrapcol=0) @@ -224,7 +230,7 @@ POST request. Added the *padded* and *wrapcol* parameters. -.. function:: b32hexdecode(s, casefold=False, *, padded=True, ignorechars=b'') +.. function:: b32hexdecode(s, casefold=False, *, padded=True, ignorechars=b'', canonical=False) Similar to :func:`b32decode` but uses the Extended Hex Alphabet, as defined in :rfc:`4648`. @@ -237,7 +243,7 @@ POST request. .. versionadded:: 3.10 .. versionchanged:: 3.15 - Added the *ignorechars* and *padded* parameters. + Added the *canonical*, *ignorechars*, and *padded* parameters. .. function:: b16encode(s, *, wrapcol=0) @@ -317,7 +323,7 @@ Refer to the documentation of the individual functions for more information. .. versionadded:: 3.4 -.. function:: a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v') +.. function:: a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v', canonical=False) Decode the Ascii85 encoded :term:`bytes-like object` or ASCII string *b* and return the decoded :class:`bytes`. @@ -334,8 +340,16 @@ Refer to the documentation of the individual functions for more information. This should only contain whitespace characters, and by default contains all whitespace characters in ASCII. + If *canonical* is true, non-canonical encodings are rejected. + See :func:`binascii.a2b_ascii85` for details. + .. versionadded:: 3.4 + .. versionchanged:: next + Added the *canonical* parameter. + Single-character final groups are now always rejected as encoding + violations. + .. function:: b85encode(b, pad=False, *, wrapcol=0) @@ -355,7 +369,7 @@ Refer to the documentation of the individual functions for more information. Added the *wrapcol* parameter. -.. function:: b85decode(b, *, ignorechars=b'') +.. function:: b85decode(b, *, ignorechars=b'', canonical=False) Decode the base85-encoded :term:`bytes-like object` or ASCII string *b* and return the decoded :class:`bytes`. Padding is implicitly removed, if @@ -364,10 +378,15 @@ Refer to the documentation of the individual functions for more information. *ignorechars* should be a :term:`bytes-like object` containing characters to ignore from the input. + If *canonical* is true, non-canonical encodings are rejected. + See :func:`binascii.a2b_base85` for details. + .. versionadded:: 3.4 .. versionchanged:: 3.15 - Added the *ignorechars* parameter. + Added the *canonical* and *ignorechars* parameters. + Single-character final groups are now always rejected as encoding + violations. .. function:: z85encode(s, pad=False, *, wrapcol=0) @@ -392,7 +411,7 @@ Refer to the documentation of the individual functions for more information. Added the *wrapcol* parameter. -.. function:: z85decode(s, *, ignorechars=b'') +.. function:: z85decode(s, *, ignorechars=b'', canonical=False) Decode the Z85-encoded :term:`bytes-like object` or ASCII string *s* and return the decoded :class:`bytes`. See `Z85 specification @@ -401,10 +420,15 @@ Refer to the documentation of the individual functions for more information. *ignorechars* should be a :term:`bytes-like object` containing characters to ignore from the input. + If *canonical* is true, non-canonical encodings are rejected. + See :func:`binascii.a2b_base85` for details. + .. versionadded:: 3.13 .. versionchanged:: 3.15 - Added the *ignorechars* parameter. + Added the *canonical* and *ignorechars* parameters. + Single-character final groups are now always rejected as encoding + violations. .. _base64-legacy: diff --git a/Doc/library/binascii.rst b/Doc/library/binascii.rst index 08a82cc4b5f6d5..8b4ba6ae9fb254 100644 --- a/Doc/library/binascii.rst +++ b/Doc/library/binascii.rst @@ -48,8 +48,8 @@ The :mod:`!binascii` module defines the following functions: Added the *backtick* parameter. -.. function:: a2b_base64(string, /, *, padded=True, alphabet=BASE64_ALPHABET, strict_mode=False) - a2b_base64(string, /, *, ignorechars, padded=True, alphabet=BASE64_ALPHABET, strict_mode=True) +.. function:: a2b_base64(string, /, *, padded=True, alphabet=BASE64_ALPHABET, strict_mode=False, canonical=False) + a2b_base64(string, /, *, ignorechars, padded=True, alphabet=BASE64_ALPHABET, strict_mode=True, canonical=False) Convert a block of base64 data back to binary and return the binary data. More than one line may be passed at a time. @@ -83,11 +83,15 @@ The :mod:`!binascii` module defines the following functions: * Contains no excess data after padding (including excess padding, newlines, etc.). * Does not start with a padding. + If *canonical* is true, non-zero padding bits in the last group are rejected + with :exc:`binascii.Error`, enforcing canonical encoding as defined in + :rfc:`4648` section 3.5. This check is independent of *strict_mode*. + .. versionchanged:: 3.11 Added the *strict_mode* parameter. .. versionchanged:: 3.15 - Added the *alphabet*, *ignorechars* and *padded* parameters. + Added the *alphabet*, *canonical*, *ignorechars*, and *padded* parameters. .. function:: b2a_base64(data, *, padded=True, alphabet=BASE64_ALPHABET, wrapcol=0, newline=True) @@ -113,7 +117,7 @@ The :mod:`!binascii` module defines the following functions: Added the *alphabet*, *padded* and *wrapcol* parameters. -.. function:: a2b_ascii85(string, /, *, foldspaces=False, adobe=False, ignorechars=b'') +.. function:: a2b_ascii85(string, /, *, foldspaces=False, adobe=False, ignorechars=b'', canonical=False) Convert Ascii85 data back to binary and return the binary data. @@ -122,7 +126,8 @@ The :mod:`!binascii` module defines the following functions: characters). Each group encodes 32 bits of binary data in the range from ``0`` to ``2 ** 32 - 1``, inclusive. The special character ``z`` is accepted as a short form of the group ``!!!!!``, which encodes four - consecutive null bytes. + consecutive null bytes. A single-character final group is always rejected + as an encoding violation. *foldspaces* is a flag that specifies whether the 'y' short sequence should be accepted as shorthand for 4 consecutive spaces (ASCII 0x20). @@ -135,6 +140,12 @@ The :mod:`!binascii` module defines the following functions: to ignore from the input. This should only contain whitespace characters. + If *canonical* is true, non-canonical encodings are rejected with + :exc:`binascii.Error`. Here "canonical" means the encoding that + :func:`b2a_ascii85` would produce: the ``z`` abbreviation must be used + for all-zero groups (rather than ``!!!!!``), and partial final groups + must use the same padding digits as the encoder. + Invalid Ascii85 data will raise :exc:`binascii.Error`. .. versionadded:: 3.15 @@ -163,7 +174,7 @@ The :mod:`!binascii` module defines the following functions: .. versionadded:: 3.15 -.. function:: a2b_base85(string, /, *, alphabet=BASE85_ALPHABET, ignorechars=b'') +.. function:: a2b_base85(string, /, *, alphabet=BASE85_ALPHABET, ignorechars=b'', canonical=False) Convert Base85 data back to binary and return the binary data. More than one line may be passed at a time. @@ -171,7 +182,8 @@ The :mod:`!binascii` module defines the following functions: Valid Base85 data contains characters from the Base85 alphabet in groups of five (except for the final group, which may have from two to five characters). Each group encodes 32 bits of binary data in the range from - ``0`` to ``2 ** 32 - 1``, inclusive. + ``0`` to ``2 ** 32 - 1``, inclusive. A single-character final group is + always rejected as an encoding violation. Optional *alphabet* must be a :class:`bytes` object of length 85 which specifies an alternative alphabet. @@ -179,6 +191,11 @@ The :mod:`!binascii` module defines the following functions: *ignorechars* should be a :term:`bytes-like object` containing characters to ignore from the input. + If *canonical* is true, non-canonical encodings are rejected with + :exc:`binascii.Error`. Here "canonical" means the encoding that + :func:`b2a_base85` would produce: partial final groups must use the + same padding digits as the encoder. + Invalid Base85 data will raise :exc:`binascii.Error`. .. versionadded:: 3.15 @@ -202,7 +219,7 @@ The :mod:`!binascii` module defines the following functions: .. versionadded:: 3.15 -.. function:: a2b_base32(string, /, *, padded=True, alphabet=BASE32_ALPHABET, ignorechars=b'') +.. function:: a2b_base32(string, /, *, padded=True, alphabet=BASE32_ALPHABET, ignorechars=b'', canonical=False) Convert base32 data back to binary and return the binary data. @@ -231,6 +248,10 @@ The :mod:`!binascii` module defines the following functions: presented before the end of the encoded data and the excess pad characters will be ignored. + If *canonical* is true, non-zero padding bits in the last group are rejected + with :exc:`binascii.Error`, enforcing canonical encoding as defined in + :rfc:`4648` section 3.5. + Invalid base32 data will raise :exc:`binascii.Error`. .. versionadded:: 3.15 diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 8f792800fa64d9..2a1d2fdf43eed1 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -729,6 +729,15 @@ base64 :func:`~base64.z85decode`. (Contributed by Serhiy Storchaka in :gh:`144001` and :gh:`146431`.) +* Added the *canonical* parameter in + :func:`~base64.b32decode`, :func:`~base64.b32hexdecode`, + :func:`~base64.b64decode`, :func:`~base64.urlsafe_b64decode`, + :func:`~base64.a85decode`, :func:`~base64.b85decode`, and + :func:`~base64.z85decode`, + to reject encodings with non-zero padding bits or other non-canonical + forms. + (Contributed by Gregory P. Smith in :gh:`146311`.) + binascii -------- @@ -762,6 +771,10 @@ binascii :func:`~binascii.unhexlify`, and :func:`~binascii.a2b_base64`. (Contributed by Serhiy Storchaka in :gh:`144001` and :gh:`146431`.) +* Added the *canonical* parameter in :func:`~binascii.a2b_base64`, + to reject encodings with non-zero padding bits. + (Contributed by Gregory P. Smith in :gh:`146311`.) + calendar -------- diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index beae65213a27b6..4fd42185d8a4a1 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -1636,6 +1636,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(callable)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(callback)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(cancel)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(canonical)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(capath)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(capitals)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(category)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index bb1c6dbaf03906..f2d43c22069b92 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -359,6 +359,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(callable) STRUCT_FOR_ID(callback) STRUCT_FOR_ID(cancel) + STRUCT_FOR_ID(canonical) STRUCT_FOR_ID(capath) STRUCT_FOR_ID(capitals) STRUCT_FOR_ID(category) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 64b029797ab9b3..6ee64a461d8568 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -1634,6 +1634,7 @@ extern "C" { INIT_ID(callable), \ INIT_ID(callback), \ INIT_ID(cancel), \ + INIT_ID(canonical), \ INIT_ID(capath), \ INIT_ID(capitals), \ INIT_ID(category), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index 461ee36dcebb6d..bcb117e1091674 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -1216,6 +1216,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(canonical); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(capath); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); diff --git a/Lib/base64.py b/Lib/base64.py index 7f39c68070b5da..4b810e08569e5b 100644 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -68,7 +68,7 @@ def b64encode(s, altchars=None, *, padded=True, wrapcol=0): def b64decode(s, altchars=None, validate=_NOT_SPECIFIED, - *, padded=True, ignorechars=_NOT_SPECIFIED): + *, padded=True, ignorechars=_NOT_SPECIFIED, canonical=False): """Decode the Base64 encoded bytes-like object or ASCII string s. Optional altchars must be a bytes-like object or ASCII string of length 2 @@ -110,11 +110,13 @@ def b64decode(s, altchars=None, validate=_NOT_SPECIFIED, alphabet = binascii.BASE64_ALPHABET[:-2] + altchars return binascii.a2b_base64(s, strict_mode=validate, alphabet=alphabet, - padded=padded, ignorechars=ignorechars) + padded=padded, ignorechars=ignorechars, + canonical=canonical) if ignorechars is _NOT_SPECIFIED: ignorechars = b'' result = binascii.a2b_base64(s, strict_mode=validate, - padded=padded, ignorechars=ignorechars) + padded=padded, ignorechars=ignorechars, + canonical=canonical) if badchar is not None: import warnings if validate: @@ -230,7 +232,8 @@ def b32encode(s, *, padded=True, wrapcol=0): return binascii.b2a_base32(s, padded=padded, wrapcol=wrapcol) b32encode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32') -def b32decode(s, casefold=False, map01=None, *, padded=True, ignorechars=b''): +def b32decode(s, casefold=False, map01=None, *, padded=True, ignorechars=b'', + canonical=False): s = _bytes_from_decode_data(s) # Handle section 2.4 zero and one mapping. The flag map01 will be either # False, or the character to map the digit 1 (one) to. It should be @@ -240,7 +243,8 @@ def b32decode(s, casefold=False, map01=None, *, padded=True, ignorechars=b''): s = s.translate(bytes.maketrans(b'01', b'O' + map01)) if casefold: s = s.upper() - return binascii.a2b_base32(s, padded=padded, ignorechars=ignorechars) + return binascii.a2b_base32(s, padded=padded, ignorechars=ignorechars, + canonical=canonical) b32decode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32', extra_args=_B32_DECODE_MAP01_DOCSTRING) @@ -249,13 +253,15 @@ def b32hexencode(s, *, padded=True, wrapcol=0): alphabet=binascii.BASE32HEX_ALPHABET) b32hexencode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32hex') -def b32hexdecode(s, casefold=False, *, padded=True, ignorechars=b''): +def b32hexdecode(s, casefold=False, *, padded=True, ignorechars=b'', + canonical=False): s = _bytes_from_decode_data(s) # base32hex does not have the 01 mapping if casefold: s = s.upper() return binascii.a2b_base32(s, alphabet=binascii.BASE32HEX_ALPHABET, - padded=padded, ignorechars=ignorechars) + padded=padded, ignorechars=ignorechars, + canonical=canonical) b32hexdecode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32hex', extra_args='') @@ -323,7 +329,8 @@ def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False): return binascii.b2a_ascii85(b, foldspaces=foldspaces, adobe=adobe, wrapcol=wrapcol, pad=pad) -def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'): +def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v', + canonical=False): """Decode the Ascii85 encoded bytes-like object or ASCII string b. foldspaces is a flag that specifies whether the 'y' short sequence should be @@ -337,10 +344,13 @@ def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'): input. This should only contain whitespace characters, and by default contains all whitespace characters in ASCII. + If canonical is true, non-canonical encodings are rejected. + The result is returned as a bytes object. """ return binascii.a2b_ascii85(b, foldspaces=foldspaces, - adobe=adobe, ignorechars=ignorechars) + adobe=adobe, ignorechars=ignorechars, + canonical=canonical) def b85encode(b, pad=False, *, wrapcol=0): """Encode bytes-like object b in base85 format and return a bytes object. @@ -353,12 +363,15 @@ def b85encode(b, pad=False, *, wrapcol=0): """ return binascii.b2a_base85(b, wrapcol=wrapcol, pad=pad) -def b85decode(b, *, ignorechars=b''): +def b85decode(b, *, ignorechars=b'', canonical=False): """Decode the base85-encoded bytes-like object or ASCII string b + If canonical is true, non-canonical encodings are rejected. + The result is returned as a bytes object. """ - return binascii.a2b_base85(b, ignorechars=ignorechars) + return binascii.a2b_base85(b, ignorechars=ignorechars, + canonical=canonical) def z85encode(s, pad=False, *, wrapcol=0): """Encode bytes-like object b in z85 format and return a bytes object. @@ -372,12 +385,15 @@ def z85encode(s, pad=False, *, wrapcol=0): return binascii.b2a_base85(s, wrapcol=wrapcol, pad=pad, alphabet=binascii.Z85_ALPHABET) -def z85decode(s, *, ignorechars=b''): +def z85decode(s, *, ignorechars=b'', canonical=False): """Decode the z85-encoded bytes-like object or ASCII string b + If canonical is true, non-canonical encodings are rejected. + The result is returned as a bytes object. """ - return binascii.a2b_base85(s, alphabet=binascii.Z85_ALPHABET, ignorechars=ignorechars) + return binascii.a2b_base85(s, alphabet=binascii.Z85_ALPHABET, + ignorechars=ignorechars, canonical=canonical) # Legacy interface. This code could be cleaned up since I don't believe # binascii has any line length limitations. It just doesn't seem worth it diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py index 81cdacb96241e2..6991e2ef6815e3 100644 --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -383,6 +383,49 @@ def assertInvalidLength(data, strict_mode=True): assertInvalidLength(b'A\tB\nC ??DE', # only 5 valid characters strict_mode=False) + def test_base64_canonical(self): + # https://datatracker.ietf.org/doc/html/rfc4648.html#section-3.5 + # Decoders MAY reject encoded data if the pad bits are not zero. + + # Without canonical=True, non-zero padding bits are accepted + self.assertEqual(binascii.a2b_base64(self.type2test(b'AB==')), b'\x00') + self.assertEqual(binascii.a2b_base64(self.type2test(b'AB=='), + strict_mode=True), b'\x00') + + # 2 data chars + "==": last char has 4 padding bits + # 'A' = 0, 'B' = 1 -> leftover 0001 (non-zero) + with self.assertRaises(binascii.Error): + binascii.a2b_base64(self.type2test(b'AB=='), canonical=True) + # 'A' = 0, 'P' = 15 -> leftover 1111 (non-zero) + with self.assertRaises(binascii.Error): + binascii.a2b_base64(self.type2test(b'AP=='), canonical=True) + + # 3 data chars + "=": last char has 2 padding bits + # 'A' = 0, 'A' = 0, 'B' = 1 -> leftover 01 (non-zero) + with self.assertRaises(binascii.Error): + binascii.a2b_base64(self.type2test(b'AAB='), canonical=True) + # 'A' = 0, 'A' = 0, 'D' = 3 -> leftover 11 (non-zero) + with self.assertRaises(binascii.Error): + binascii.a2b_base64(self.type2test(b'AAD='), canonical=True) + + # Verify that zero padding bits are accepted + binascii.a2b_base64(self.type2test(b'AA=='), canonical=True) + binascii.a2b_base64(self.type2test(b'AAA='), canonical=True) + + # Full quads with no padding have no leftover bits -- always valid + binascii.a2b_base64(self.type2test(b'AAAA'), canonical=True) + + @hypothesis.given(payload=hypothesis.strategies.binary()) + @hypothesis.example(b'') + @hypothesis.example(b'\x00') + @hypothesis.example(b'\xff\xff') + @hypothesis.example(b'abc') + def test_base64_canonical_roundtrip(self, payload): + # The encoder must always produce canonical output. + encoded = binascii.b2a_base64(payload, newline=False) + decoded = binascii.a2b_base64(encoded, canonical=True) + self.assertEqual(decoded, payload) + def test_base64_alphabet(self): alphabet = (b'!"#$%&\'()*+,-012345689@' b'ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr') @@ -443,20 +486,22 @@ def test_ascii85_valid(self): res += b self.assertEqual(res, rawdata) - # Test decoding inputs with length 1 mod 5 - params = [ - (b"a", False, False, b"", b""), - (b"xbw", False, False, b"wx", b""), - (b"<~c~>", False, True, b"", b""), - (b"{d ~>", False, True, b" {", b""), - (b"ye", True, False, b"", b" "), - (b"z\x01y\x00f", True, False, b"\x00\x01", b"\x00\x00\x00\x00 "), - (b"<~FCfN8yg~>", True, True, b"", b"test "), - (b"FE;\x03#8zFCf\x02N8yh~>", True, True, b"\x02\x03", b"tset\x00\x00\x00\x00test "), + # Inputs with length 1 mod 5 end with a 1-char group, which is + # an encoding violation per the PLRM spec. + error_params = [ + (b"a", False, False, b""), + (b"xbw", False, False, b"wx"), + (b"<~c~>", False, True, b""), + (b"{d ~>", False, True, b" {"), + (b"ye", True, False, b""), + (b"z\x01y\x00f", True, False, b"\x00\x01"), + (b"<~FCfN8yg~>", True, True, b""), + (b"FE;\x03#8zFCf\x02N8yh~>", True, True, b"\x02\x03"), ] - for a, foldspaces, adobe, ignorechars, b in params: + for a, foldspaces, adobe, ignorechars in error_params: kwargs = {"foldspaces": foldspaces, "adobe": adobe, "ignorechars": ignorechars} - self.assertEqual(binascii.a2b_ascii85(self.type2test(a), **kwargs), b) + with self.assertRaises(binascii.Error): + binascii.a2b_ascii85(self.type2test(a), **kwargs) def test_ascii85_invalid(self): # Test Ascii85 with invalid characters interleaved @@ -670,16 +715,18 @@ def test_base85_valid(self): self.assertEqual(res, self.rawdata) # Test decoding inputs with different length - self.assertEqual(binascii.a2b_base85(self.type2test(b'a')), b'') - self.assertEqual(binascii.a2b_base85(self.type2test(b'a')), b'') + # 1-char groups are rejected (encoding violation) + with self.assertRaises(binascii.Error): + binascii.a2b_base85(self.type2test(b'a')) self.assertEqual(binascii.a2b_base85(self.type2test(b'ab')), b'q') self.assertEqual(binascii.a2b_base85(self.type2test(b'abc')), b'qa') self.assertEqual(binascii.a2b_base85(self.type2test(b'abcd')), b'qa\x9e') self.assertEqual(binascii.a2b_base85(self.type2test(b'abcde')), b'qa\x9e\xb6') - self.assertEqual(binascii.a2b_base85(self.type2test(b'abcdef')), - b'qa\x9e\xb6') + # 6-char input = full 5-char group + trailing 1-char group (rejected) + with self.assertRaises(binascii.Error): + binascii.a2b_base85(self.type2test(b'abcdef')) self.assertEqual(binascii.a2b_base85(self.type2test(b'abcdefg')), b'qa\x9e\xb6\x81') @@ -767,6 +814,169 @@ def test_base85_alphabet(self): with self.assertRaises(TypeError): binascii.a2b_base64(data, alphabet=bytearray(alphabet)) + def test_base85_canonical(self): + # Non-canonical encodings are accepted without canonical=True + self.assertEqual(binascii.a2b_base85(b'VF'), b'a') + + # 1-char partial groups are always rejected (encoding violation: + # no conforming encoder produces them) + with self.assertRaises(binascii.Error): + binascii.a2b_base85(b'V') + with self.assertRaises(binascii.Error): + binascii.a2b_base85(b'0') + + # Verify round-trip: encode then decode with canonical=True works + for data in [b'a', b'ab', b'abc', b'abcd', b'abcde', + b'\x00', b'\xff', b'\x00\x00', b'\xff\xff\xff']: + encoded = binascii.b2a_base85(data) + decoded = binascii.a2b_base85(encoded, canonical=True) + self.assertEqual(decoded, data) + + # Test non-canonical rejection for each partial group size + # (2-char/1-byte, 3-char/2-byte, 4-char/3-byte). + # Incrementing the last digit by 1 produces a non-canonical + # encoding. For 4-char groups (n_pad=1) a +1 can change the + # output byte, so we use b'ab\x00' whose canonical form allows + # a +1 that still decodes to the same 3 bytes. + for data in [b'a', b'ab', b'ab\x00']: + canonical_enc = binascii.b2a_base85(data) + non_canonical = (canonical_enc[:-1] + + bytes([canonical_enc[-1] + 1])) + # Same decoded output without canonical check + self.assertEqual(binascii.a2b_base85(non_canonical), data) + # Rejected with canonical=True + with self.assertRaises(binascii.Error): + binascii.a2b_base85(non_canonical, canonical=True) + + # Boundary bytes: \x00 and \xff for each partial group size + for data in [b'\x00', b'\x00\x00', b'\x00\x00\x00', + b'\xff', b'\xff\xff', b'\xff\xff\xff']: + canonical_enc = binascii.b2a_base85(data) + binascii.a2b_base85(canonical_enc, canonical=True) + + # Full 5-char groups are always canonical (no padding bits) + self.assertEqual( + binascii.a2b_base85(b'VPa!s', canonical=True), b'abcd') + + # Empty input is valid + self.assertEqual(binascii.a2b_base85(b'', canonical=True), b'') + + @hypothesis.given(payload=hypothesis.strategies.binary()) + @hypothesis.example(b'') + @hypothesis.example(b'\x00') + @hypothesis.example(b'\xff\xff') + @hypothesis.example(b'abc') + def test_base85_canonical_roundtrip(self, payload): + encoded = binascii.b2a_base85(payload) + decoded = binascii.a2b_base85(encoded, canonical=True) + self.assertEqual(decoded, payload) + + @hypothesis.given(payload=hypothesis.strategies.binary(min_size=1, max_size=3)) + @hypothesis.example(b'\x00') + @hypothesis.example(b'\xff') + @hypothesis.example(b'ab\x00') + def test_base85_canonical_unique(self, payload): + # For a partial group, sweeping all 85 last-digit values should + # yield exactly one encoding that both decodes to the original + # payload AND passes canonical=True. + hypothesis.assume(len(payload) % 4 != 0) + canonical_enc = binascii.b2a_base85(payload) + table = binascii.BASE85_ALPHABET + accepted = [] + for digit in table: + candidate = canonical_enc[:-1] + bytes([digit]) + try: + result = binascii.a2b_base85(candidate, canonical=True) + if result == payload: + accepted.append(candidate) + except binascii.Error: + pass + self.assertEqual(accepted, [canonical_enc]) + + def test_ascii85_canonical(self): + # Non-canonical encodings are accepted without canonical=True + self.assertEqual(binascii.a2b_ascii85(b'@0'), b'a') + + # 1-char partial groups are always rejected (PLRM encoding violation) + with self.assertRaises(binascii.Error): + binascii.a2b_ascii85(b'@') + with self.assertRaises(binascii.Error): + binascii.a2b_ascii85(b'!') + + # Verify round-trip: encode then decode with canonical=True works + for data in [b'a', b'ab', b'abc', b'abcd', b'abcde', + b'\x00', b'\xff', b'\x00\x00', b'\xff\xff\xff']: + encoded = binascii.b2a_ascii85(data) + decoded = binascii.a2b_ascii85(encoded, canonical=True) + self.assertEqual(decoded, data) + + # Test non-canonical rejection for each partial group size. + # See test_base85_canonical for why b'ab\x00' is used for 3 bytes. + for data in [b'a', b'ab', b'ab\x00']: + canonical_enc = binascii.b2a_ascii85(data) + non_canonical = (canonical_enc[:-1] + + bytes([canonical_enc[-1] + 1])) + self.assertEqual(binascii.a2b_ascii85(non_canonical), data) + with self.assertRaises(binascii.Error): + binascii.a2b_ascii85(non_canonical, canonical=True) + + # Full 5-char groups are always canonical + self.assertEqual( + binascii.a2b_ascii85(b'@:E_W', canonical=True), b'abcd') + + # 'z' is the canonical form for all-zero groups per the PLRM. + # '!!!!!' decodes identically but is non-canonical. + self.assertEqual(binascii.a2b_ascii85(b'!!!!!'), b'\x00' * 4) + self.assertEqual(binascii.a2b_ascii85(b'z'), b'\x00' * 4) + self.assertEqual( + binascii.a2b_ascii85(b'z', canonical=True), b'\x00' * 4) + with self.assertRaises(binascii.Error): + binascii.a2b_ascii85(b'!!!!!', canonical=True) + # Multiple groups: z + !!!!! should fail + with self.assertRaises(binascii.Error): + binascii.a2b_ascii85(b'z!!!!!', canonical=True) + # Multiple z groups are fine + self.assertEqual( + binascii.a2b_ascii85(b'zz', canonical=True), b'\x00' * 8) + + # Empty input is valid + self.assertEqual(binascii.a2b_ascii85(b'', canonical=True), b'') + + # Adobe-wrapped with canonical + self.assertEqual( + binascii.a2b_ascii85(b'<~@:E_W~>', canonical=True, adobe=True), + b'abcd') + + @hypothesis.given(payload=hypothesis.strategies.binary()) + @hypothesis.example(b'') + @hypothesis.example(b'\x00') + @hypothesis.example(b'\x00\x00\x00\x00') # triggers z abbreviation + @hypothesis.example(b'\xff\xff') + @hypothesis.example(b'abc') + def test_ascii85_canonical_roundtrip(self, payload): + encoded = binascii.b2a_ascii85(payload) + decoded = binascii.a2b_ascii85(encoded, canonical=True) + self.assertEqual(decoded, payload) + + @hypothesis.given(payload=hypothesis.strategies.binary(min_size=1, max_size=3)) + @hypothesis.example(b'\x00') + @hypothesis.example(b'\xff') + @hypothesis.example(b'ab\x00') + def test_ascii85_canonical_unique(self, payload): + hypothesis.assume(len(payload) % 4 != 0) + canonical_enc = binascii.b2a_ascii85(payload) + # Ascii85 alphabet: '!' (33) through 'u' (117) + accepted = [] + for digit in range(33, 118): + candidate = canonical_enc[:-1] + bytes([digit]) + try: + result = binascii.a2b_ascii85(candidate, canonical=True) + if result == payload: + accepted.append(candidate) + except binascii.Error: + pass + self.assertEqual(accepted, [canonical_enc]) + def test_base32_valid(self): # Test base32 with valid data lines = [] @@ -935,6 +1145,55 @@ def assertInvalidLength(data, *args, length=None, **kwargs): assertInvalidLength(b" ABC=====", ignorechars=b' ') assertInvalidLength(b" ABCDEF==", ignorechars=b' ') + def test_base32_canonical(self): + # https://datatracker.ietf.org/doc/html/rfc4648.html#section-3.5 + # Decoders MAY reject encoded data if the pad bits are not zero. + + # Without canonical=True, non-zero padding bits are accepted + self.assertEqual(binascii.a2b_base32(self.type2test(b'AB======')), + b'\x00') + + # 2 data chars + "======": last char has 2 padding bits + with self.assertRaises(binascii.Error): + binascii.a2b_base32(self.type2test(b'AB======'), canonical=True) + with self.assertRaises(binascii.Error): + binascii.a2b_base32(self.type2test(b'AD======'), canonical=True) + + # 4 data chars + "====": last char has 4 padding bits + with self.assertRaises(binascii.Error): + binascii.a2b_base32(self.type2test(b'AAAB===='), canonical=True) + with self.assertRaises(binascii.Error): + binascii.a2b_base32(self.type2test(b'AAAP===='), canonical=True) + + # 5 data chars + "===": last char has 1 padding bit + with self.assertRaises(binascii.Error): + binascii.a2b_base32(self.type2test(b'AAAAB==='), canonical=True) + + # 7 data chars + "=": last char has 3 padding bits + with self.assertRaises(binascii.Error): + binascii.a2b_base32(self.type2test(b'AAAAAAB='), canonical=True) + with self.assertRaises(binascii.Error): + binascii.a2b_base32(self.type2test(b'AAAAAAH='), canonical=True) + + # Verify that zero padding bits are accepted + binascii.a2b_base32(self.type2test(b'AA======'), canonical=True) + binascii.a2b_base32(self.type2test(b'AAAA===='), canonical=True) + binascii.a2b_base32(self.type2test(b'AAAAA==='), canonical=True) + binascii.a2b_base32(self.type2test(b'AAAAAAA='), canonical=True) + + # Full octet with no padding -- always valid + binascii.a2b_base32(self.type2test(b'AAAAAAAA'), canonical=True) + + @hypothesis.given(payload=hypothesis.strategies.binary()) + @hypothesis.example(b'') + @hypothesis.example(b'\x00') + @hypothesis.example(b'\xff\xff') + @hypothesis.example(b'abc') + def test_base32_canonical_roundtrip(self, payload): + encoded = binascii.b2a_base32(payload) + decoded = binascii.a2b_base32(encoded, canonical=True) + self.assertEqual(decoded, payload) + def test_a2b_base32_padded(self): a2b_base32 = binascii.a2b_base32 t = self.type2test diff --git a/Misc/NEWS.d/next/Library/2026-04-25-11-56-05.gh-issue-146311.iHWO0v.rst b/Misc/NEWS.d/next/Library/2026-04-25-11-56-05.gh-issue-146311.iHWO0v.rst new file mode 100644 index 00000000000000..4f4a8365b6cf5e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-04-25-11-56-05.gh-issue-146311.iHWO0v.rst @@ -0,0 +1,7 @@ +Add a *canonical* keyword-only parameter to the base16, base32, base64, +base85, ascii85, and Z85 decoders in :mod:`base64` and :mod:`binascii`. +When true, encodings with non-zero padding bits (base16/32/64) or +non-canonical encodings (base85/ascii85) are rejected. Single-character +final groups in :func:`binascii.a2b_ascii85` and :func:`binascii.a2b_base85` +are now always rejected as encoding violations, regardless of *canonical*; +previously they were silently ignored and produced no output bytes. diff --git a/Modules/binascii.c b/Modules/binascii.c index b80bfbfffe430c..7e6e9655f8d498 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -244,6 +244,9 @@ static const _Py_ALIGNED_DEF(64, unsigned char) table_b2a_base85_a85[] = #define BASE85_A85_Z 0x00000000 #define BASE85_A85_Y 0x20202020 +/* 85**0 through 85**4, used for canonical encoding checks. */ +static const uint32_t pow85[] = {1, 85, 7225, 614125, 52200625}; + static const _Py_ALIGNED_DEF(64, unsigned char) table_a2b_base32[] = { -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, @@ -729,6 +732,8 @@ binascii.a2b_base64 ignorechars: Py_buffer = NULL A byte string containing characters to ignore from the input when strict_mode is true. + canonical: bool = False + When set to true, reject non-zero padding bits per RFC 4648 section 3.5. Decode a line of base64 data. [clinic start generated code]*/ @@ -736,8 +741,8 @@ Decode a line of base64 data. static PyObject * binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode, int padded, PyBytesObject *alphabet, - Py_buffer *ignorechars) -/*[clinic end generated code: output=525d840a299ff132 input=74a53dd3b23474b3]*/ + Py_buffer *ignorechars, int canonical) +/*[clinic end generated code: output=77c46dcbf4239527 input=c99096d071deeec8]*/ { assert(data->len >= 0); @@ -909,6 +914,16 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode, goto error_end; } + /* https://datatracker.ietf.org/doc/html/rfc4648.html#section-3.5 + * Decoders MAY reject non-zero padding bits. */ + if (canonical && leftchar != 0) { + state = get_binascii_state(module); + if (state) { + PyErr_SetString(state->Error, "Non-zero padding bits"); + } + goto error_end; + } + Py_XDECREF(table_obj); return PyBytesWriter_FinishWithPointer(writer, bin_data); @@ -1037,14 +1052,16 @@ binascii.a2b_ascii85 Expect data to be wrapped in '<~' and '~>' as in Adobe Ascii85. ignorechars: Py_buffer = b'' A byte string containing characters to ignore from the input. + canonical: bool = False + When set to true, reject non-canonical encodings. Decode Ascii85 data. [clinic start generated code]*/ static PyObject * binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int foldspaces, - int adobe, Py_buffer *ignorechars) -/*[clinic end generated code: output=599aa3e41095a651 input=f39abd11eab4bac0]*/ + int adobe, Py_buffer *ignorechars, int canonical) +/*[clinic end generated code: output=09b35f1eac531357 input=dd050604ed30199e]*/ { const unsigned char *ascii_data = data->buf; Py_ssize_t ascii_len = data->len; @@ -1107,6 +1124,7 @@ binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int foldspaces, uint32_t leftchar = 0; int group_pos = 0; + int from_z = 0; /* true when current group came from 'z' shorthand */ for (; ascii_len > 0 || group_pos != 0; ascii_len--, ascii_data++) { /* Shift (in radix-85) data or padding into our buffer. */ unsigned char this_digit; @@ -1142,6 +1160,7 @@ binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int foldspaces, goto error; } leftchar = this_ch == 'y' ? BASE85_A85_Y : BASE85_A85_Z; + from_z = (this_ch == 'z'); group_pos = 5; } else if (!ignorechar(this_ch, ignorechars, ignorecache)) { @@ -1159,11 +1178,62 @@ binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int foldspaces, } /* Write current chunk. */ - Py_ssize_t chunk_len = ascii_len < 1 ? 3 + ascii_len : 4; - for (Py_ssize_t i = 0; i < chunk_len; i++) { + int chunk_len = ascii_len < 1 ? 3 + (int)ascii_len : 4; + + /* A final partial 5-tuple containing only one character is an + * encoding violation per the PLRM spec; reject unconditionally. */ + if (chunk_len == 0) { + state = get_binascii_state(module); + if (state != NULL) { + PyErr_SetString(state->Error, + "Incomplete Ascii85 group"); + } + goto error; + } + + for (int i = 0; i < chunk_len; i++) { *bin_data++ = (leftchar >> (24 - 8 * i)) & 0xff; } + if (canonical) { + /* The PLRM spec requires all-zero groups to use the 'z' + * abbreviation. Reject '!!!!!' (five zero digits). */ + if (chunk_len == 4 && leftchar == 0 && !from_z) { + state = get_binascii_state(module); + if (state != NULL) { + PyErr_SetString(state->Error, + "Non-canonical encoding, " + "use 'z' for all-zero groups"); + } + goto error; + } + /* Reject non-canonical partial groups. + * + * A partial group of N chars (2-4) encodes N-1 bytes. + * The decoder pads missing chars with digit 84 (the max). + * The encoder produces the unique N chars for those bytes + * by zero-padding the bytes to a uint32 and taking the + * leading N base-85 digits. Two encodings are equivalent + * iff they yield the same quotient when divided by + * 85**(5-N). */ + if (chunk_len < 4) { + int n_pad = 4 - chunk_len; + uint32_t canonical_top = + (leftchar >> (n_pad * 8)) << (n_pad * 8); + if (canonical_top / pow85[n_pad] + != leftchar / pow85[n_pad]) + { + state = get_binascii_state(module); + if (state != NULL) { + PyErr_SetString(state->Error, + "Non-zero padding bits"); + } + goto error; + } + } + } + + from_z = 0; group_pos = 0; leftchar = 0; } @@ -1315,14 +1385,17 @@ binascii.a2b_base85 alphabet: PyBytesObject(c_default="NULL") = BASE85_ALPHABET ignorechars: Py_buffer = b'' A byte string containing characters to ignore from the input. + canonical: bool = False + When set to true, reject non-canonical encodings. Decode a line of Base85 data. [clinic start generated code]*/ static PyObject * binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, - PyBytesObject *alphabet, Py_buffer *ignorechars) -/*[clinic end generated code: output=6a8d6eae798818d7 input=04d72a319712bdf3]*/ + PyBytesObject *alphabet, Py_buffer *ignorechars, + int canonical) +/*[clinic end generated code: output=90dfef0c6b51e5f3 input=2819dc8aeffee5a2]*/ { const unsigned char *ascii_data = data->buf; Py_ssize_t ascii_len = data->len; @@ -1403,11 +1476,41 @@ binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, } /* Write current chunk. */ - Py_ssize_t chunk_len = ascii_len < 1 ? 3 + ascii_len : 4; - for (Py_ssize_t i = 0; i < chunk_len; i++) { + int chunk_len = ascii_len < 1 ? 3 + (int)ascii_len : 4; + + /* A 1-char final group is an encoding violation (no conforming + * encoder produces it); reject unconditionally. */ + if (chunk_len == 0) { + state = get_binascii_state(module); + if (state != NULL) { + PyErr_SetString(state->Error, + "Incomplete Base85 group"); + } + goto error; + } + + for (int i = 0; i < chunk_len; i++) { *bin_data++ = (leftchar >> (24 - 8 * i)) & 0xff; } + /* Reject non-canonical encodings in the final group. + * See the comment in a2b_ascii85 for the full explanation. */ + if (canonical && chunk_len < 4) { + int n_pad = 4 - chunk_len; + uint32_t canonical_top = + (leftchar >> (n_pad * 8)) << (n_pad * 8); + if (canonical_top / pow85[n_pad] + != leftchar / pow85[n_pad]) + { + state = get_binascii_state(module); + if (state != NULL) { + PyErr_SetString(state->Error, + "Non-zero padding bits"); + } + goto error; + } + } + group_pos = 0; leftchar = 0; } @@ -1535,14 +1638,17 @@ binascii.a2b_base32 alphabet: PyBytesObject(c_default="NULL") = BASE32_ALPHABET ignorechars: Py_buffer = b'' A byte string containing characters to ignore from the input. + canonical: bool = False + When set to true, reject non-zero padding bits per RFC 4648 section 3.5. Decode a line of base32 data. [clinic start generated code]*/ static PyObject * binascii_a2b_base32_impl(PyObject *module, Py_buffer *data, int padded, - PyBytesObject *alphabet, Py_buffer *ignorechars) -/*[clinic end generated code: output=7dbbaa816d956b1c input=07a3721acdf9b688]*/ + PyBytesObject *alphabet, Py_buffer *ignorechars, + int canonical) +/*[clinic end generated code: output=bc70f2bb6001fb55 input=5bfe6d1ea2f30e3b]*/ { const unsigned char *ascii_data = data->buf; Py_ssize_t ascii_len = data->len; @@ -1723,6 +1829,16 @@ binascii_a2b_base32_impl(PyObject *module, Py_buffer *data, int padded, goto error; } + /* https://datatracker.ietf.org/doc/html/rfc4648.html#section-3.5 + * Decoders MAY reject non-zero padding bits. */ + if (canonical && leftchar != 0) { + state = get_binascii_state(module); + if (state) { + PyErr_SetString(state->Error, "Non-zero padding bits"); + } + goto error; + } + Py_XDECREF(table_obj); return PyBytesWriter_FinishWithPointer(writer, bin_data); diff --git a/Modules/clinic/binascii.c.h b/Modules/clinic/binascii.c.h index 0a2d33c428d10a..ed695758ef998c 100644 --- a/Modules/clinic/binascii.c.h +++ b/Modules/clinic/binascii.c.h @@ -119,7 +119,7 @@ binascii_b2a_uu(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj PyDoc_STRVAR(binascii_a2b_base64__doc__, "a2b_base64($module, data, /, *, strict_mode=,\n" " padded=True, alphabet=BASE64_ALPHABET,\n" -" ignorechars=)\n" +" ignorechars=, canonical=False)\n" "--\n" "\n" "Decode a line of base64 data.\n" @@ -132,7 +132,9 @@ PyDoc_STRVAR(binascii_a2b_base64__doc__, " When set to false, padding in input is not required.\n" " ignorechars\n" " A byte string containing characters to ignore from the input when\n" -" strict_mode is true."); +" strict_mode is true.\n" +" canonical\n" +" When set to true, reject non-zero padding bits per RFC 4648 section 3.5."); #define BINASCII_A2B_BASE64_METHODDEF \ {"a2b_base64", _PyCFunction_CAST(binascii_a2b_base64), METH_FASTCALL|METH_KEYWORDS, binascii_a2b_base64__doc__}, @@ -140,7 +142,7 @@ PyDoc_STRVAR(binascii_a2b_base64__doc__, static PyObject * binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode, int padded, PyBytesObject *alphabet, - Py_buffer *ignorechars); + Py_buffer *ignorechars, int canonical); static PyObject * binascii_a2b_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -148,7 +150,7 @@ binascii_a2b_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 4 + #define NUM_KEYWORDS 5 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -157,7 +159,7 @@ binascii_a2b_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(strict_mode), &_Py_ID(padded), &_Py_ID(alphabet), &_Py_ID(ignorechars), }, + .ob_item = { &_Py_ID(strict_mode), &_Py_ID(padded), &_Py_ID(alphabet), &_Py_ID(ignorechars), &_Py_ID(canonical), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -166,20 +168,21 @@ binascii_a2b_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"", "strict_mode", "padded", "alphabet", "ignorechars", NULL}; + static const char * const _keywords[] = {"", "strict_mode", "padded", "alphabet", "ignorechars", "canonical", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "a2b_base64", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[5]; + PyObject *argsbuf[6]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; Py_buffer data = {NULL, NULL}; int strict_mode = -1; int padded = 1; PyBytesObject *alphabet = NULL; Py_buffer ignorechars = {NULL, NULL}; + int canonical = 0; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -220,11 +223,20 @@ binascii_a2b_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P goto skip_optional_kwonly; } } - if (PyObject_GetBuffer(args[4], &ignorechars, PyBUF_SIMPLE) != 0) { + if (args[4]) { + if (PyObject_GetBuffer(args[4], &ignorechars, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + canonical = PyObject_IsTrue(args[5]); + if (canonical < 0) { goto exit; } skip_optional_kwonly: - return_value = binascii_a2b_base64_impl(module, &data, strict_mode, padded, alphabet, &ignorechars); + return_value = binascii_a2b_base64_impl(module, &data, strict_mode, padded, alphabet, &ignorechars, canonical); exit: /* Cleanup for data */ @@ -352,7 +364,7 @@ binascii_b2a_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P PyDoc_STRVAR(binascii_a2b_ascii85__doc__, "a2b_ascii85($module, data, /, *, foldspaces=False, adobe=False,\n" -" ignorechars=b\'\')\n" +" ignorechars=b\'\', canonical=False)\n" "--\n" "\n" "Decode Ascii85 data.\n" @@ -362,14 +374,16 @@ PyDoc_STRVAR(binascii_a2b_ascii85__doc__, " adobe\n" " Expect data to be wrapped in \'<~\' and \'~>\' as in Adobe Ascii85.\n" " ignorechars\n" -" A byte string containing characters to ignore from the input."); +" A byte string containing characters to ignore from the input.\n" +" canonical\n" +" When set to true, reject non-canonical encodings."); #define BINASCII_A2B_ASCII85_METHODDEF \ {"a2b_ascii85", _PyCFunction_CAST(binascii_a2b_ascii85), METH_FASTCALL|METH_KEYWORDS, binascii_a2b_ascii85__doc__}, static PyObject * binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int foldspaces, - int adobe, Py_buffer *ignorechars); + int adobe, Py_buffer *ignorechars, int canonical); static PyObject * binascii_a2b_ascii85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -377,7 +391,7 @@ binascii_a2b_ascii85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 3 + #define NUM_KEYWORDS 4 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -386,7 +400,7 @@ binascii_a2b_ascii85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(foldspaces), &_Py_ID(adobe), &_Py_ID(ignorechars), }, + .ob_item = { &_Py_ID(foldspaces), &_Py_ID(adobe), &_Py_ID(ignorechars), &_Py_ID(canonical), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -395,19 +409,20 @@ binascii_a2b_ascii85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"", "foldspaces", "adobe", "ignorechars", NULL}; + static const char * const _keywords[] = {"", "foldspaces", "adobe", "ignorechars", "canonical", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "a2b_ascii85", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[4]; + PyObject *argsbuf[5]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; Py_buffer data = {NULL, NULL}; int foldspaces = 0; int adobe = 0; Py_buffer ignorechars = {.buf = "", .obj = NULL, .len = 0}; + int canonical = 0; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -438,11 +453,20 @@ binascii_a2b_ascii85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, goto skip_optional_kwonly; } } - if (PyObject_GetBuffer(args[3], &ignorechars, PyBUF_SIMPLE) != 0) { + if (args[3]) { + if (PyObject_GetBuffer(args[3], &ignorechars, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + canonical = PyObject_IsTrue(args[4]); + if (canonical < 0) { goto exit; } skip_optional_kwonly: - return_value = binascii_a2b_ascii85_impl(module, &data, foldspaces, adobe, &ignorechars); + return_value = binascii_a2b_ascii85_impl(module, &data, foldspaces, adobe, &ignorechars, canonical); exit: /* Cleanup for data */ @@ -573,20 +597,23 @@ binascii_b2a_ascii85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyDoc_STRVAR(binascii_a2b_base85__doc__, "a2b_base85($module, data, /, *, alphabet=BASE85_ALPHABET,\n" -" ignorechars=b\'\')\n" +" ignorechars=b\'\', canonical=False)\n" "--\n" "\n" "Decode a line of Base85 data.\n" "\n" " ignorechars\n" -" A byte string containing characters to ignore from the input."); +" A byte string containing characters to ignore from the input.\n" +" canonical\n" +" When set to true, reject non-canonical encodings."); #define BINASCII_A2B_BASE85_METHODDEF \ {"a2b_base85", _PyCFunction_CAST(binascii_a2b_base85), METH_FASTCALL|METH_KEYWORDS, binascii_a2b_base85__doc__}, static PyObject * binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, - PyBytesObject *alphabet, Py_buffer *ignorechars); + PyBytesObject *alphabet, Py_buffer *ignorechars, + int canonical); static PyObject * binascii_a2b_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -594,7 +621,7 @@ binascii_a2b_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -603,7 +630,7 @@ binascii_a2b_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(alphabet), &_Py_ID(ignorechars), }, + .ob_item = { &_Py_ID(alphabet), &_Py_ID(ignorechars), &_Py_ID(canonical), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -612,18 +639,19 @@ binascii_a2b_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"", "alphabet", "ignorechars", NULL}; + static const char * const _keywords[] = {"", "alphabet", "ignorechars", "canonical", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "a2b_base85", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[3]; + PyObject *argsbuf[4]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; Py_buffer data = {NULL, NULL}; PyBytesObject *alphabet = NULL; Py_buffer ignorechars = {.buf = "", .obj = NULL, .len = 0}; + int canonical = 0; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -646,11 +674,20 @@ binascii_a2b_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P goto skip_optional_kwonly; } } - if (PyObject_GetBuffer(args[2], &ignorechars, PyBUF_SIMPLE) != 0) { + if (args[2]) { + if (PyObject_GetBuffer(args[2], &ignorechars, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + canonical = PyObject_IsTrue(args[3]); + if (canonical < 0) { goto exit; } skip_optional_kwonly: - return_value = binascii_a2b_base85_impl(module, &data, alphabet, &ignorechars); + return_value = binascii_a2b_base85_impl(module, &data, alphabet, &ignorechars, canonical); exit: /* Cleanup for data */ @@ -768,7 +805,7 @@ binascii_b2a_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P PyDoc_STRVAR(binascii_a2b_base32__doc__, "a2b_base32($module, data, /, *, padded=True, alphabet=BASE32_ALPHABET,\n" -" ignorechars=b\'\')\n" +" ignorechars=b\'\', canonical=False)\n" "--\n" "\n" "Decode a line of base32 data.\n" @@ -776,14 +813,17 @@ PyDoc_STRVAR(binascii_a2b_base32__doc__, " padded\n" " When set to false, padding in input is not required.\n" " ignorechars\n" -" A byte string containing characters to ignore from the input."); +" A byte string containing characters to ignore from the input.\n" +" canonical\n" +" When set to true, reject non-zero padding bits per RFC 4648 section 3.5."); #define BINASCII_A2B_BASE32_METHODDEF \ {"a2b_base32", _PyCFunction_CAST(binascii_a2b_base32), METH_FASTCALL|METH_KEYWORDS, binascii_a2b_base32__doc__}, static PyObject * binascii_a2b_base32_impl(PyObject *module, Py_buffer *data, int padded, - PyBytesObject *alphabet, Py_buffer *ignorechars); + PyBytesObject *alphabet, Py_buffer *ignorechars, + int canonical); static PyObject * binascii_a2b_base32(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -791,7 +831,7 @@ binascii_a2b_base32(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 3 + #define NUM_KEYWORDS 4 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -800,7 +840,7 @@ binascii_a2b_base32(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(padded), &_Py_ID(alphabet), &_Py_ID(ignorechars), }, + .ob_item = { &_Py_ID(padded), &_Py_ID(alphabet), &_Py_ID(ignorechars), &_Py_ID(canonical), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -809,19 +849,20 @@ binascii_a2b_base32(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"", "padded", "alphabet", "ignorechars", NULL}; + static const char * const _keywords[] = {"", "padded", "alphabet", "ignorechars", "canonical", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "a2b_base32", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[4]; + PyObject *argsbuf[5]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; Py_buffer data = {NULL, NULL}; int padded = 1; PyBytesObject *alphabet = NULL; Py_buffer ignorechars = {.buf = "", .obj = NULL, .len = 0}; + int canonical = 0; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -853,11 +894,20 @@ binascii_a2b_base32(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P goto skip_optional_kwonly; } } - if (PyObject_GetBuffer(args[3], &ignorechars, PyBUF_SIMPLE) != 0) { + if (args[3]) { + if (PyObject_GetBuffer(args[3], &ignorechars, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + canonical = PyObject_IsTrue(args[4]); + if (canonical < 0) { goto exit; } skip_optional_kwonly: - return_value = binascii_a2b_base32_impl(module, &data, padded, alphabet, &ignorechars); + return_value = binascii_a2b_base32_impl(module, &data, padded, alphabet, &ignorechars, canonical); exit: /* Cleanup for data */ @@ -1634,4 +1684,4 @@ binascii_b2a_qp(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj return return_value; } -/*[clinic end generated code: output=2acab1ceb0058b1a input=a9049054013a1b77]*/ +/*[clinic end generated code: output=b41544f39b0ef681 input=a9049054013a1b77]*/