diff --git a/DIRECTORY.md b/DIRECTORY.md index 34967082b..b1adc23f6 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -717,6 +717,7 @@ * [Archimedes Principle](physics/archimedes_principle.py) * [Casimir Effect](physics/casimir_effect.py) * [Centripetal Force](physics/centripetal_force.py) + * [Grahams Law](physics/grahams_law.py) * [Horizontal Projectile Motion](physics/horizontal_projectile_motion.py) * [Hubble Parameter](physics/hubble_parameter.py) * [Ideal Gas Law](physics/ideal_gas_law.py) diff --git a/hashes/md5.py b/hashes/md5.py index 2020bf2e5..2187006ec 100644 --- a/hashes/md5.py +++ b/hashes/md5.py @@ -1,91 +1,223 @@ -import math +""" +The MD5 algorithm is a hash function that's commonly used as a checksum to +detect data corruption. The algorithm works by processing a given message in +blocks of 512 bits, padding the message as needed. It uses the blocks to operate +a 128-bit state and performs a total of 64 such operations. Note that all values +are little-endian, so inputs are converted as needed. + +Although MD5 was used as a cryptographic hash function in the past, it's since +been cracked, so it shouldn't be used for security purposes. + +For more info, see https://en.wikipedia.org/wiki/MD5 +""" + +from collections.abc import Generator +from math import sin -def rearrange(bit_string_32): - """[summary] - Regroups the given binary string. +def to_little_endian(string_32: bytes) -> bytes: + """ + Converts the given string to little-endian in groups of 8 chars. Arguments: - bitString32 {[string]} -- [32 bit binary] + string_32 {[string]} -- [32-char string] Raises: - ValueError -- [if the given string not are 32 bit binary string] + ValueError -- [input is not 32 char] Returns: - [string] -- [32 bit binary string] - >>> rearrange('1234567890abcdfghijklmnopqrstuvw') - 'pqrstuvwhijklmno90abcdfg12345678' + 32-char little-endian string + >>> to_little_endian(b'1234567890abcdfghijklmnopqrstuvw') + b'pqrstuvwhijklmno90abcdfg12345678' + >>> to_little_endian(b'1234567890') + Traceback (most recent call last): + ... + ValueError: Input must be of length 32 """ + if len(string_32) != 32: + raise ValueError("Input must be of length 32") - if len(bit_string_32) != 32: - raise ValueError("Need length 32") - new_string = "" + little_endian = b"" for i in [3, 2, 1, 0]: - new_string += bit_string_32[8 * i : 8 * i + 8] - return new_string + little_endian += string_32[8 * i : 8 * i + 8] + return little_endian -def reformat_hex(i): - """[summary] - Converts the given integer into 8-digit hex number. +def reformat_hex(i: int) -> bytes: + """ + Converts the given non-negative integer to hex string. + + Example: Suppose the input is the following: + i = 1234 + + The input is 0x000004d2 in hex, so the little-endian hex string is + "d2040000". Arguments: - i {[int]} -- [integer] + i {[int]} -- [integer] + + Raises: + ValueError -- [input is negative] + + Returns: + 8-char little-endian hex string + + >>> reformat_hex(1234) + b'd2040000' >>> reformat_hex(666) - '9a020000' + b'9a020000' + >>> reformat_hex(0) + b'00000000' + >>> reformat_hex(1234567890) + b'd2029649' + >>> reformat_hex(1234567890987654321) + b'b11c6cb1' + >>> reformat_hex(-1) + Traceback (most recent call last): + ... + ValueError: Input must be non-negative """ + if i < 0: + raise ValueError("Input must be non-negative") - hexrep = format(i, "08x") - thing = "" + hex_rep = format(i, "08x")[-8:] + little_endian_hex = b"" for i in [3, 2, 1, 0]: - thing += hexrep[2 * i : 2 * i + 2] - return thing + little_endian_hex += hex_rep[2 * i : 2 * i + 2].encode("utf-8") + return little_endian_hex -def pad(bit_string): - """[summary] - Fills up the binary string to a 512 bit binary string +def preprocess(message: bytes) -> bytes: + """ + Preprocesses the message string: + - Convert message to bit string + - Pad bit string to a multiple of 512 chars: + - Append a 1 + - Append 0's until length = 448 (mod 512) + - Append length of original message (64 chars) + + Example: Suppose the input is the following: + message = "a" + + The message bit string is "01100001", which is 8 bits long. Thus, the + bit string needs 439 bits of padding so that + (bit_string + "1" + padding) = 448 (mod 512). + The message length is "000010000...0" in 64-bit little-endian binary. + The combined bit string is then 512 bits long. Arguments: - bitString {[string]} -- [binary string] + message {[string]} -- [message string] Returns: - [string] -- [binary string] + processed bit string padded to a multiple of 512 chars + + >>> preprocess(b"a") == (b"01100001" + b"1" + + ... (b"0" * 439) + b"00001000" + (b"0" * 56)) + True + >>> preprocess(b"") == b"1" + (b"0" * 447) + (b"0" * 64) + True """ - start_length = len(bit_string) - bit_string += "1" + bit_string = b"" + for char in message: + bit_string += format(char, "08b").encode("utf-8") + start_len = format(len(bit_string), "064b").encode("utf-8") + + # Pad bit_string to a multiple of 512 chars + bit_string += b"1" while len(bit_string) % 512 != 448: - bit_string += "0" - last_part = format(start_length, "064b") - bit_string += rearrange(last_part[32:]) + rearrange(last_part[:32]) + bit_string += b"0" + bit_string += to_little_endian(start_len[32:]) + to_little_endian(start_len[:32]) + return bit_string -def get_block(bit_string): - """[summary] - Iterator: - Returns by each call a list of length 16 with the 32 bit - integer blocks. +def get_block_words(bit_string: bytes) -> Generator[list[int], None, None]: + """ + Splits bit string into blocks of 512 chars and yields each block as a list + of 32-bit words + + Example: Suppose the input is the following: + bit_string = + "000000000...0" + # 0x00 (32 bits, padded to the right) + "000000010...0" + # 0x01 (32 bits, padded to the right) + "000000100...0" + # 0x02 (32 bits, padded to the right) + "000000110...0" + # 0x03 (32 bits, padded to the right) + ... + "000011110...0" # 0x0a (32 bits, padded to the right) + + Then len(bit_string) == 512, so there'll be 1 block. The block is split + into 32-bit words, and each word is converted to little endian. The + first word is interpreted as 0 in decimal, the second word is + interpreted as 1 in decimal, etc. + + Thus, block_words == [[0, 1, 2, 3, ..., 15]]. Arguments: - bit_string {[string]} -- [binary string >= 512] + bit_string {[string]} -- [bit string with multiple of 512 as length] + + Raises: + ValueError -- [length of bit string isn't multiple of 512] + + Yields: + a list of 16 32-bit words + + >>> test_string = ("".join(format(n << 24, "032b") for n in range(16)) + ... .encode("utf-8")) + >>> list(get_block_words(test_string)) + [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]] + >>> list(get_block_words(test_string * 4)) == [list(range(16))] * 4 + True + >>> list(get_block_words(b"1" * 512)) == [[4294967295] * 16] + True + >>> list(get_block_words(b"")) + [] + >>> list(get_block_words(b"1111")) + Traceback (most recent call last): + ... + ValueError: Input must have length that's a multiple of 512 """ + if len(bit_string) % 512 != 0: + raise ValueError("Input must have length that's a multiple of 512") - curr_pos = 0 - while curr_pos < len(bit_string): - curr_part = bit_string[curr_pos : curr_pos + 512] - my_splits = [] - for i in range(16): - my_splits.append(int(rearrange(curr_part[32 * i : 32 * i + 32]), 2)) - yield my_splits - curr_pos += 512 + for pos in range(0, len(bit_string), 512): + block = bit_string[pos : pos + 512] + block_words = [] + for i in range(0, 512, 32): + block_words.append(int(to_little_endian(block[i : i + 32]), 2)) + yield block_words -def not32(i): +def not_32(i: int) -> int: """ - >>> not32(34) + Perform bitwise NOT on given int. + + Arguments: + i {[int]} -- [given int] + + Raises: + ValueError -- [input is negative] + + Returns: + Result of bitwise NOT on i + + >>> not_32(34) 4294967261 + >>> not_32(1234) + 4294966061 + >>> not_32(4294966061) + 1234 + >>> not_32(0) + 4294967295 + >>> not_32(1) + 4294967294 + >>> not_32(-1) + Traceback (most recent call last): + ... + ValueError: Input must be non-negative """ + if i < 0: + raise ValueError("Input must be non-negative") + i_str = format(i, "032b") new_str = "" for c in i_str: @@ -93,35 +225,114 @@ def not32(i): return int(new_str, 2) -def sum32(a, b): +def sum_32(a: int, b: int) -> int: + """ + Add two numbers as 32-bit ints. + + Arguments: + a {[int]} -- [first given int] + b {[int]} -- [second given int] + + Returns: + (a + b) as an unsigned 32-bit int + + >>> sum_32(1, 1) + 2 + >>> sum_32(2, 3) + 5 + >>> sum_32(0, 0) + 0 + >>> sum_32(-1, -1) + 4294967294 + >>> sum_32(4294967295, 1) + 0 + """ return (a + b) % 2**32 -def leftrot32(i, s): - return (i << s) ^ (i >> (32 - s)) - - -def md5me(test_string): - """[summary] - Returns a 32-bit hash code of the string 'testString' +def left_rotate_32(i: int, shift: int) -> int: + """ + Rotate the bits of a given int left by a given amount. Arguments: - testString {[string]} -- [message] + i {[int]} -- [given int] + shift {[int]} -- [shift amount] + + Raises: + ValueError -- [either given int or shift is negative] + + Returns: + `i` rotated to the left by `shift` bits + + >>> left_rotate_32(1234, 1) + 2468 + >>> left_rotate_32(1111, 4) + 17776 + >>> left_rotate_32(2147483648, 1) + 1 + >>> left_rotate_32(2147483648, 3) + 4 + >>> left_rotate_32(4294967295, 4) + 4294967295 + >>> left_rotate_32(1234, 0) + 1234 + >>> left_rotate_32(0, 0) + 0 + >>> left_rotate_32(-1, 0) + Traceback (most recent call last): + ... + ValueError: Input must be non-negative + >>> left_rotate_32(0, -1) + Traceback (most recent call last): + ... + ValueError: Shift must be non-negative + """ + if i < 0: + raise ValueError("Input must be non-negative") + if shift < 0: + raise ValueError("Shift must be non-negative") + return ((i << shift) ^ (i >> (32 - shift))) % 2**32 + + +def md5_me(message: bytes) -> bytes: + """ + Returns the 32-char MD5 hash of a given message. + + Reference: https://en.wikipedia.org/wiki/MD5#Algorithm + + Arguments: + message {[string]} -- [message] + + Returns: + 32-char MD5 hash string + + >>> md5_me(b"") + b'd41d8cd98f00b204e9800998ecf8427e' + >>> md5_me(b"The quick brown fox jumps over the lazy dog") + b'9e107d9d372bb6826bd81d3542a419d6' + >>> md5_me(b"The quick brown fox jumps over the lazy dog.") + b'e4d909c290d0fb1ca068ffaddf22cbd0' + + >>> import hashlib + >>> from string import ascii_letters + >>> msgs = [b"", ascii_letters.encode("utf-8"), "Üñîçø∂é".encode("utf-8"), + ... b"The quick brown fox jumps over the lazy dog."] + >>> all(md5_me(msg) == hashlib.md5(msg).hexdigest().encode("utf-8") for msg in msgs) + True """ - bs = "" - for i in test_string: - bs += format(ord(i), "08b") - bs = pad(bs) + # Convert to bit string, add padding and append message length + bit_string = preprocess(message) - tvals = [int(2**32 * abs(math.sin(i + 1))) for i in range(64)] + added_consts = [int(2**32 * abs(sin(i + 1))) for i in range(64)] + # Starting states a0 = 0x67452301 b0 = 0xEFCDAB89 c0 = 0x98BADCFE d0 = 0x10325476 - s = [ + shift_amounts = [ 7, 12, 17, @@ -188,51 +399,46 @@ def md5me(test_string): 21, ] - for m in get_block(bs): + # Process bit string in chunks, each with 16 32-char words + for block_words in get_block_words(bit_string): a = a0 b = b0 c = c0 d = d0 + + # Hash current chunk for i in range(64): if i <= 15: - # f = (B & C) | (not32(B) & D) + # f = (b & c) | (not_32(b) & d) # Alternate definition for f f = d ^ (b & (c ^ d)) g = i elif i <= 31: - # f = (D & B) | (not32(D) & C) + # f = (d & b) | (not_32(d) & c) # Alternate definition for f f = c ^ (d & (b ^ c)) g = (5 * i + 1) % 16 elif i <= 47: f = b ^ c ^ d g = (3 * i + 5) % 16 else: - f = c ^ (b | not32(d)) + f = c ^ (b | not_32(d)) g = (7 * i) % 16 - dtemp = d + f = (f + a + added_consts[i] + block_words[g]) % 2**32 + a = d d = c c = b - b = sum32(b, leftrot32((a + f + tvals[i] + m[g]) % 2**32, s[i])) - a = dtemp - a0 = sum32(a0, a) - b0 = sum32(b0, b) - c0 = sum32(c0, c) - d0 = sum32(d0, d) + b = sum_32(b, left_rotate_32(f, shift_amounts[i])) + + # Add hashed chunk to running total + a0 = sum_32(a0, a) + b0 = sum_32(b0, b) + c0 = sum_32(c0, c) + d0 = sum_32(d0, d) digest = reformat_hex(a0) + reformat_hex(b0) + reformat_hex(c0) + reformat_hex(d0) return digest -def test(): - assert md5me("") == "d41d8cd98f00b204e9800998ecf8427e" - assert ( - md5me("The quick brown fox jumps over the lazy dog") - == "9e107d9d372bb6826bd81d3542a419d6" - ) - print("Success.") - - if __name__ == "__main__": - test() import doctest doctest.testmod()