Python/hashes/sha1.py

169 lines
6.2 KiB
Python
Raw Normal View History

2018-09-22 23:55:07 +00:00
"""
Implementation of the SHA1 hash function and gives utilities to find hash of string or
hash of text from a file. Also contains a Test class to verify that the generated hash
matches what is returned by the hashlib library
2018-09-25 13:51:29 +00:00
Usage: python sha1.py --string "Hello World!!"
2019-08-06 00:06:15 +00:00
python sha1.py --file "hello_world.txt"
When run without any arguments, it prints the hash of the string "Hello World!!
Welcome to Cryptography"
2018-09-22 23:55:07 +00:00
SHA1 hash or SHA1 sum of a string is a cryptographic function, which means it is easy
to calculate forwards but extremely difficult to calculate backwards. What this means
is you can easily calculate the hash of a string, but it is extremely difficult to know
the original string if you have its hash. This property is useful for communicating
securely, send encrypted messages and is very useful in payment systems, blockchain and
cryptocurrency etc.
The algorithm as described in the reference:
2018-09-22 23:55:07 +00:00
First we start with a message. The message is padded and the length of the message
2018-09-25 13:51:29 +00:00
is added to the end. It is then split into blocks of 512 bits or 64 bytes. The blocks
are then processed one at a time. Each block must be expanded and compressed.
The value after each compression is added to a 160-bit buffer called the current hash
state. After the last block is processed, the current hash state is returned as
2018-09-22 23:55:07 +00:00
the final hash.
2018-09-23 14:14:37 +00:00
Reference: https://deadhacker.com/2006/02/21/sha-1-illustrated/
2018-09-22 23:55:07 +00:00
"""
2018-09-22 23:55:07 +00:00
import argparse
2019-10-05 05:14:13 +00:00
import hashlib # hashlib is only used inside the Test class
import struct
2018-09-23 14:14:37 +00:00
class SHA1Hash:
2018-09-22 23:55:07 +00:00
"""
Class to contain the entire pipeline for SHA1 hashing algorithm
2019-08-06 00:06:15 +00:00
>>> SHA1Hash(bytes('Allan', 'utf-8')).final_hash()
'872af2d8ac3d8695387e7c804bf0e02c18df9e6e'
2018-09-22 23:55:07 +00:00
"""
2019-10-05 05:14:13 +00:00
2018-09-22 23:55:07 +00:00
def __init__(self, data):
2018-09-25 13:51:29 +00:00
"""
Initiates the variables data and h. h is a list of 5 8-digit hexadecimal
numbers corresponding to
(1732584193, 4023233417, 2562383102, 271733878, 3285377520)
2018-09-25 13:51:29 +00:00
respectively. We will start with this as a message digest. 0x is how you write
hexadecimal numbers in Python
2018-09-25 13:51:29 +00:00
"""
2018-09-22 23:55:07 +00:00
self.data = data
2018-09-25 13:51:29 +00:00
self.h = [0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0]
2018-09-22 23:55:07 +00:00
2018-09-25 13:51:29 +00:00
@staticmethod
def rotate(n, b):
"""
Static method to be used inside other methods. Left rotates n by b.
2019-08-06 00:06:15 +00:00
>>> SHA1Hash('').rotate(12,2)
48
2018-09-25 13:51:29 +00:00
"""
2019-10-05 05:14:13 +00:00
return ((n << b) | (n >> (32 - b))) & 0xFFFFFFFF
2018-09-22 23:55:07 +00:00
2018-09-25 13:51:29 +00:00
def padding(self):
"""
Pads the input message with zeros so that padded_data has 64 bytes or 512 bits
"""
2019-10-05 05:14:13 +00:00
padding = b"\x80" + b"\x00" * (63 - (len(self.data) + 8) % 64)
padded_data = self.data + padding + struct.pack(">Q", 8 * len(self.data))
2018-09-25 13:51:29 +00:00
return padded_data
def split_blocks(self):
"""
Returns a list of bytestrings each of length 64
"""
2019-10-05 05:14:13 +00:00
return [
self.padded_data[i : i + 64] for i in range(0, len(self.padded_data), 64)
]
2018-09-25 13:51:29 +00:00
# @staticmethod
def expand_block(self, block):
"""
Takes a bytestring-block of length 64, unpacks it to a list of integers and
returns a list of 80 integers after some bit operations
2018-09-25 13:51:29 +00:00
"""
2019-10-05 05:14:13 +00:00
w = list(struct.unpack(">16L", block)) + [0] * 64
2018-09-25 13:51:29 +00:00
for i in range(16, 80):
2019-10-05 05:14:13 +00:00
w[i] = self.rotate((w[i - 3] ^ w[i - 8] ^ w[i - 14] ^ w[i - 16]), 1)
2018-09-25 13:51:29 +00:00
return w
2018-09-22 23:55:07 +00:00
def final_hash(self):
2018-09-25 13:51:29 +00:00
"""
Calls all the other methods to process the input. Pads the data, then splits
into blocks and then does a series of operations for each block (including
expansion).
For each block, the variable h that was initialized is copied to a,b,c,d,e
and these 5 variables a,b,c,d,e undergo several changes. After all the blocks
are processed, these 5 variables are pairwise added to h ie a to h[0], b to h[1]
and so on. This h becomes our final hash which is returned.
2018-09-25 13:51:29 +00:00
"""
self.padded_data = self.padding()
self.blocks = self.split_blocks()
for block in self.blocks:
expanded_block = self.expand_block(block)
a, b, c, d, e = self.h
for i in range(80):
2018-09-25 13:51:29 +00:00
if 0 <= i < 20:
f = (b & c) | ((~b) & d)
k = 0x5A827999
elif 20 <= i < 40:
f = b ^ c ^ d
k = 0x6ED9EBA1
elif 40 <= i < 60:
f = (b & c) | (b & d) | (c & d)
k = 0x8F1BBCDC
elif 60 <= i < 80:
f = b ^ c ^ d
k = 0xCA62C1D6
2019-10-05 05:14:13 +00:00
a, b, c, d, e = (
self.rotate(a, 5) + f + e + k + expanded_block[i] & 0xFFFFFFFF,
a,
self.rotate(b, 30),
c,
d,
)
self.h = (
self.h[0] + a & 0xFFFFFFFF,
self.h[1] + b & 0xFFFFFFFF,
self.h[2] + c & 0xFFFFFFFF,
self.h[3] + d & 0xFFFFFFFF,
self.h[4] + e & 0xFFFFFFFF,
)
return ("{:08x}" * 5).format(*self.h)
2018-09-25 13:51:29 +00:00
def test_sha1_hash():
msg = b"Test String"
assert SHA1Hash(msg).final_hash() == hashlib.sha1(msg).hexdigest() # noqa: S324
2018-09-22 23:55:07 +00:00
def main():
2018-09-25 13:51:29 +00:00
"""
Provides option 'string' or 'file' to take input and prints the calculated SHA1
hash. unittest.main() has been commented out because we probably don't want to run
the test each time.
2018-09-25 13:51:29 +00:00
"""
# unittest.main()
2019-10-05 05:14:13 +00:00
parser = argparse.ArgumentParser(description="Process some strings or files")
parser.add_argument(
"--string",
dest="input_string",
default="Hello World!! Welcome to Cryptography",
help="Hash the string",
)
parser.add_argument("--file", dest="input_file", help="Hash contents of a file")
2018-09-22 23:55:07 +00:00
args = parser.parse_args()
input_string = args.input_string
2019-10-05 05:14:13 +00:00
# In any case hash input should be a bytestring
2018-09-22 23:55:07 +00:00
if args.input_file:
2019-10-05 05:14:13 +00:00
with open(args.input_file, "rb") as f:
hash_input = f.read()
2018-09-22 23:55:07 +00:00
else:
2019-10-05 05:14:13 +00:00
hash_input = bytes(input_string, "utf-8")
2018-09-23 14:14:37 +00:00
print(SHA1Hash(hash_input).final_hash())
2018-09-22 23:55:07 +00:00
2019-10-05 05:14:13 +00:00
if __name__ == "__main__":
2018-09-22 23:55:07 +00:00
main()
2019-08-06 00:06:15 +00:00
import doctest
2019-10-05 05:14:13 +00:00
doctest.testmod()