Add unicode support in ciphers/base64_cipher.py script. (#1316)

* Add unicode support in ciphers/base64_cipher.py script. * Add doctests and correct the padding length computation in base64_cipher.
2025-05-28 19:36:36 +00:00 · 2019-10-18 08:35:13 +02:00 · 2019-10-18 08:35:13 +02:00 · e177198177
commit e177198177
parent 2197bfa029
1 changed files with 36 additions and 19 deletions
--- a/ciphers/base64_cipher.py
+++ b/ciphers/base64_cipher.py
@ -1,35 +1,52 @@
-def encodeBase64(text):
-    base64chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
+def encode_base64(text):
+    r"""
+    >>> encode_base64('WELCOME to base64 encoding 😁')
+    'V0VMQ09NRSB0byBiYXNlNjQgZW5jb2Rpbmcg8J+YgQ=='
+    >>> encode_base64('AÅᐃ𐀏🤓')
+    'QcOF4ZCD8JCAj/CfpJM='
+    >>> encode_base64('A'*60)
+    'QUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFB\r\nQUFB'
+    """
+    base64_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"

+    byte_text = bytes(text, "utf-8")  # put text in bytes for unicode support
    r = ""  # the result
-    c = 3 - len(text) % 3  # the length of padding
+    c = -len(byte_text) % 3  # the length of padding
    p = "=" * c  # the padding
-    s = text + "\0" * c  # the text to encode
+    s = byte_text + b"\x00" * c  # the text to encode

    i = 0
    while i < len(s):
        if i > 0 and ((i / 3 * 4) % 76) == 0:
-            r = r + "\r\n"
+            r = r + "\r\n" # for unix newline, put "\n"

-        n = (ord(s[i]) << 16) + (ord(s[i + 1]) << 8) + ord(s[i + 2])
+        n = (s[i] << 16) + (s[i + 1] << 8) + s[i + 2]

        n1 = (n >> 18) & 63
        n2 = (n >> 12) & 63
        n3 = (n >> 6) & 63
        n4 = n & 63

-        r += base64chars[n1] + base64chars[n2] + base64chars[n3] + base64chars[n4]
+        r += base64_chars[n1] + base64_chars[n2] + base64_chars[n3] + base64_chars[n4]
        i += 3

    return r[0 : len(r) - len(p)] + p


-def decodeBase64(text):
-    base64chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
+def decode_base64(text):
+    r"""
+    >>> decode_base64('V0VMQ09NRSB0byBiYXNlNjQgZW5jb2Rpbmcg8J+YgQ==')
+    'WELCOME to base64 encoding 😁'
+    >>> decode_base64('QcOF4ZCD8JCAj/CfpJM=')
+    'AÅᐃ𐀏🤓'
+    >>> decode_base64("QUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFB\r\nQUFB")
+    'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'
+    """
+    base64_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
    s = ""

    for i in text:
-        if i in base64chars:
+        if i in base64_chars:
            s += i
            c = ""
        else:
@ -43,28 +60,28 @@ def decodeBase64(text):
        if c == "==":
            p = "AA"

-    r = ""
+    r = b""
    s = s + p

    i = 0
    while i < len(s):
        n = (
-            (base64chars.index(s[i]) << 18)
-            + (base64chars.index(s[i + 1]) << 12)
-            + (base64chars.index(s[i + 2]) << 6)
-            + base64chars.index(s[i + 3])
+            (base64_chars.index(s[i]) << 18)
+            + (base64_chars.index(s[i + 1]) << 12)
+            + (base64_chars.index(s[i + 2]) << 6)
+            + base64_chars.index(s[i + 3])
        )

-        r += chr((n >> 16) & 255) + chr((n >> 8) & 255) + chr(n & 255)
+        r += bytes([(n >> 16) & 255]) + bytes([(n >> 8) & 255]) + bytes([n & 255])

        i += 4

-    return r[0 : len(r) - len(p)]
+    return str(r[0 : len(r) - len(p)], "utf-8")


 def main():
-    print(encodeBase64("WELCOME to base64 encoding"))
-    print(decodeBase64(encodeBase64("WELCOME to base64 encoding")))
+    print(encode_base64("WELCOME to base64 encoding 😁"))
+    print(decode_base64(encode_base64("WELCOME to base64 encoding 😁")))


 if __name__ == "__main__":