Write a proper implementation for base16 (#6909)

According to CONTRIBUTING.md:
"Algorithms in this repo should not be how-to examples for existing Python packages."
This commit is contained in:
CenTdemeern1 2022-10-25 23:09:28 -07:00 committed by GitHub
parent 68f6e9ac30
commit abf0909b68
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,34 +1,63 @@
import base64
def base16_encode(inp: str) -> bytes:
def base16_encode(data: bytes) -> str:
"""
Encodes a given utf-8 string into base-16.
Encodes the given bytes into base16.
>>> base16_encode('Hello World!')
b'48656C6C6F20576F726C6421'
>>> base16_encode('HELLO WORLD!')
b'48454C4C4F20574F524C4421'
>>> base16_encode('')
b''
"""
# encode the input into a bytes-like object and then encode b16encode that
return base64.b16encode(inp.encode("utf-8"))
def base16_decode(b16encoded: bytes) -> str:
"""
Decodes from base-16 to a utf-8 string.
>>> base16_decode(b'48656C6C6F20576F726C6421')
'Hello World!'
>>> base16_decode(b'48454C4C4F20574F524C4421')
'HELLO WORLD!'
>>> base16_decode(b'')
>>> base16_encode(b'Hello World!')
'48656C6C6F20576F726C6421'
>>> base16_encode(b'HELLO WORLD!')
'48454C4C4F20574F524C4421'
>>> base16_encode(b'')
''
"""
# b16decode the input into bytes and decode that into a human readable string
return base64.b16decode(b16encoded).decode("utf-8")
# Turn the data into a list of integers (where each integer is a byte),
# Then turn each byte into its hexadecimal representation, make sure
# it is uppercase, and then join everything together and return it.
return "".join([hex(byte)[2:].zfill(2).upper() for byte in list(data)])
def base16_decode(data: str) -> bytes:
"""
Decodes the given base16 encoded data into bytes.
>>> base16_decode('48656C6C6F20576F726C6421')
b'Hello World!'
>>> base16_decode('48454C4C4F20574F524C4421')
b'HELLO WORLD!'
>>> base16_decode('')
b''
>>> base16_decode('486')
Traceback (most recent call last):
...
ValueError: Base16 encoded data is invalid:
Data does not have an even number of hex digits.
>>> base16_decode('48656c6c6f20576f726c6421')
Traceback (most recent call last):
...
ValueError: Base16 encoded data is invalid:
Data is not uppercase hex or it contains invalid characters.
>>> base16_decode('This is not base64 encoded data.')
Traceback (most recent call last):
...
ValueError: Base16 encoded data is invalid:
Data is not uppercase hex or it contains invalid characters.
"""
# Check data validity, following RFC3548
# https://www.ietf.org/rfc/rfc3548.txt
if (len(data) % 2) != 0:
raise ValueError(
"""Base16 encoded data is invalid:
Data does not have an even number of hex digits."""
)
# Check the character set - the standard base16 alphabet
# is uppercase according to RFC3548 section 6
if not set(data) <= set("0123456789ABCDEF"):
raise ValueError(
"""Base16 encoded data is invalid:
Data is not uppercase hex or it contains invalid characters."""
)
# For every two hexadecimal digits (= a byte), turn it into an integer.
# Then, string the result together into bytes, and return it.
return bytes(int(data[i] + data[i + 1], 16) for i in range(0, len(data), 2))
if __name__ == "__main__":