mirror of
https://github.com/TheAlgorithms/Python.git
synced 2025-04-07 06:15:55 +00:00
Refactor LSTM network implementation and improve code readability
This commit is contained in:
parent
21dab0f1c1
commit
5a00ca63fc
@ -7,42 +7,46 @@ Detail: Total 3 layers neural network
|
|||||||
* Output layer
|
* Output layer
|
||||||
Author: Shashank Tyagi
|
Author: Shashank Tyagi
|
||||||
Github: LEVII007
|
Github: LEVII007
|
||||||
link : https://www.kaggle.com/code/navjindervirdee/lstm-neural-network-from-scratch
|
Date: [Current Date]
|
||||||
"""
|
"""
|
||||||
|
|
||||||
##### Explanation #####
|
#### Explanation #####
|
||||||
# This script implements a Long Short-Term Memory (LSTM) network to learn
|
# This script implements a Long Short-Term Memory (LSTM)
|
||||||
# and predict sequences of characters.
|
# network to learn and predict sequences of characters.
|
||||||
# It uses numpy for numerical operations and tqdm for progress visualization.
|
# It uses numpy for numerical operations and tqdm for progress visualization.
|
||||||
|
|
||||||
# The data is a paragraph about LSTM, converted to lowercase and split into
|
# The data is a paragraph about LSTM, converted to
|
||||||
# characters. Each character is one-hot encoded for training.
|
# lowercase and split into characters.
|
||||||
|
# Each character is one-hot encoded for training.
|
||||||
|
|
||||||
# The LSTM class initializes weights and biases for the forget, input, candidate,
|
# The LSTM class initializes weights and biases for the
|
||||||
# and output gates. It also initializes weights and biases for the final output layer.
|
# forget, input, candidate, and output gates.
|
||||||
|
# It also initializes weights and biases for the final output layer.
|
||||||
|
|
||||||
# The forward method performs forward propagation through the LSTM network,
|
# The forward method performs forward propagation
|
||||||
# computing hidden and cell states. It uses sigmoid and tanh activation
|
# through the LSTM network, computing hidden and cell states.
|
||||||
# functions for the gates and cell states.
|
# It uses sigmoid and tanh activation functions for the gates and cell states.
|
||||||
|
|
||||||
# The backward method performs backpropagation through time, computing gradients
|
# The backward method performs backpropagation
|
||||||
# for the weights and biases. It updates the weights and biases using
|
# through time, computing gradients for the weights and biases.
|
||||||
# the computed gradients and the learning rate.
|
# It updates the weights and biases using the
|
||||||
|
# computed gradients and the learning rate.
|
||||||
|
|
||||||
# The train method trains the LSTM network on the input data for a specified
|
# The train method trains the LSTM network on
|
||||||
# number of epochs. It uses one-hot encoded inputs and computes errors
|
# the input data for a specified number of epochs.
|
||||||
# using the softmax function.
|
# It uses one-hot encoded inputs and computes
|
||||||
|
# errors using the softmax function.
|
||||||
|
|
||||||
# The test method evaluates the trained LSTM network on the input data,
|
# The test method evaluates the trained LSTM
|
||||||
# computing accuracy based on predictions.
|
# network on the input data, computing accuracy based on predictions.
|
||||||
|
|
||||||
# The script initializes the LSTM network with specified hyperparameters
|
# The script initializes the LSTM network with
|
||||||
# and trains it on the input data. Finally, it tests the trained network
|
# specified hyperparameters and trains it on the input data.
|
||||||
# and prints the accuracy of the predictions.
|
# Finally, it tests the trained network and prints the accuracy of the predictions.
|
||||||
|
|
||||||
##### Imports #####
|
##### Imports #####
|
||||||
from tqdm import tqdm
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
|
||||||
class LSTM:
|
class LSTM:
|
||||||
@ -68,7 +72,7 @@ class LSTM:
|
|||||||
print(f"Data size: {self.data_size}, Char Size: {self.char_size}")
|
print(f"Data size: {self.data_size}, Char Size: {self.char_size}")
|
||||||
|
|
||||||
self.char_to_idx = {c: i for i, c in enumerate(self.chars)}
|
self.char_to_idx = {c: i for i, c in enumerate(self.chars)}
|
||||||
self.idx_to_char = {i: c for i, c in enumerate(self.chars)}
|
self.idx_to_char = dict(enumerate(self.chars))
|
||||||
|
|
||||||
self.train_X, self.train_y = self.data[:-1], self.data[1:]
|
self.train_X, self.train_y = self.data[:-1], self.data[1:]
|
||||||
|
|
||||||
@ -90,30 +94,42 @@ class LSTM:
|
|||||||
"""
|
"""
|
||||||
Initialize the weights and biases for the LSTM network.
|
Initialize the weights and biases for the LSTM network.
|
||||||
"""
|
"""
|
||||||
self.wf = self.init_weights(self.char_size + self.hidden_dim, self.hidden_dim)
|
rng = np.random.default_rng()
|
||||||
|
self.wf = self.init_weights(
|
||||||
|
self.char_size + self.hidden_dim, self.hidden_dim, rng
|
||||||
|
)
|
||||||
self.bf = np.zeros((self.hidden_dim, 1))
|
self.bf = np.zeros((self.hidden_dim, 1))
|
||||||
|
|
||||||
self.wi = self.init_weights(self.char_size + self.hidden_dim, self.hidden_dim)
|
self.wi = self.init_weights(
|
||||||
|
self.char_size + self.hidden_dim, self.hidden_dim, rng
|
||||||
|
)
|
||||||
self.bi = np.zeros((self.hidden_dim, 1))
|
self.bi = np.zeros((self.hidden_dim, 1))
|
||||||
|
|
||||||
self.wc = self.init_weights(self.char_size + self.hidden_dim, self.hidden_dim)
|
self.wc = self.init_weights(
|
||||||
|
self.char_size + self.hidden_dim, self.hidden_dim, rng
|
||||||
|
)
|
||||||
self.bc = np.zeros((self.hidden_dim, 1))
|
self.bc = np.zeros((self.hidden_dim, 1))
|
||||||
|
|
||||||
self.wo = self.init_weights(self.char_size + self.hidden_dim, self.hidden_dim)
|
self.wo = self.init_weights(
|
||||||
|
self.char_size + self.hidden_dim, self.hidden_dim, rng
|
||||||
|
)
|
||||||
self.bo = np.zeros((self.hidden_dim, 1))
|
self.bo = np.zeros((self.hidden_dim, 1))
|
||||||
|
|
||||||
self.wy = self.init_weights(self.hidden_dim, self.char_size)
|
self.wy = self.init_weights(self.hidden_dim, self.char_size, rng)
|
||||||
self.by = np.zeros((self.char_size, 1))
|
self.by = np.zeros((self.char_size, 1))
|
||||||
|
|
||||||
def init_weights(self, input_dim: int, output_dim: int) -> np.ndarray:
|
def init_weights(
|
||||||
|
self, input_dim: int, output_dim: int, rng: np.random.Generator
|
||||||
|
) -> np.ndarray:
|
||||||
"""
|
"""
|
||||||
Initialize weights with random values.
|
Initialize weights with random values.
|
||||||
|
|
||||||
:param input_dim: The input dimension.
|
:param input_dim: The input dimension.
|
||||||
:param output_dim: The output dimension.
|
:param output_dim: The output dimension.
|
||||||
|
:param rng: The random number generator.
|
||||||
:return: A matrix of initialized weights.
|
:return: A matrix of initialized weights.
|
||||||
"""
|
"""
|
||||||
return np.random.uniform(-1, 1, (output_dim, input_dim)) * np.sqrt(
|
return rng.uniform(-1, 1, (output_dim, input_dim)) * np.sqrt(
|
||||||
6 / (input_dim + output_dim)
|
6 / (input_dim + output_dim)
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -280,79 +296,95 @@ class LSTM:
|
|||||||
d_wc += np.dot(d_c, inputs[t].T)
|
d_wc += np.dot(d_c, inputs[t].T)
|
||||||
d_bc += d_c
|
d_bc += d_c
|
||||||
|
|
||||||
# Update the next hidden and cell state errors
|
# Concatenated Input Error (Sum of Error at Each Gate!)
|
||||||
dh_next = (
|
d_z = (
|
||||||
np.dot(self.wf.T, d_f)
|
np.dot(self.wf.T, d_f)
|
||||||
+ np.dot(self.wi.T, d_i)
|
+ np.dot(self.wi.T, d_i)
|
||||||
+ np.dot(self.wo.T, d_o)
|
|
||||||
+ np.dot(self.wc.T, d_c)
|
+ np.dot(self.wc.T, d_c)
|
||||||
|
+ np.dot(self.wo.T, d_o)
|
||||||
)
|
)
|
||||||
dc_next = d_cs * self.forget_gates[t]
|
|
||||||
|
|
||||||
# Apply gradients to weights and biases
|
# Error of Hidden State and Cell State at Next Time Step
|
||||||
for param, grad in zip(
|
dh_next = d_z[: self.hidden_dim, :]
|
||||||
[self.wf, self.wi, self.wc, self.wo, self.wy],
|
dc_next = self.forget_gates[t] * d_cs
|
||||||
[d_wf, d_wi, d_wc, d_wo, d_wy],
|
|
||||||
):
|
|
||||||
param -= self.lr * grad
|
|
||||||
|
|
||||||
for param, grad in zip(
|
for d_ in (d_wf, d_bf, d_wi, d_bi, d_wc, d_bc, d_wo, d_bo, d_wy, d_by):
|
||||||
[self.bf, self.bi, self.bc, self.bo, self.by],
|
np.clip(d_, -1, 1, out=d_)
|
||||||
[d_bf, d_bi, d_bc, d_bo, d_by],
|
|
||||||
):
|
self.wf += d_wf * self.lr
|
||||||
param -= self.lr * grad
|
self.bf += d_bf * self.lr
|
||||||
|
|
||||||
|
self.wi += d_wi * self.lr
|
||||||
|
self.bi += d_bi * self.lr
|
||||||
|
|
||||||
|
self.wc += d_wc * self.lr
|
||||||
|
self.bc += d_bc * self.lr
|
||||||
|
|
||||||
|
self.wo += d_wo * self.lr
|
||||||
|
self.bo += d_bo * self.lr
|
||||||
|
|
||||||
|
self.wy += d_wy * self.lr
|
||||||
|
self.by += d_by * self.lr
|
||||||
|
|
||||||
def train(self) -> None:
|
def train(self) -> None:
|
||||||
"""
|
"""
|
||||||
Train the LSTM network on the input data for a specified number of epochs.
|
Train the LSTM network on the input data.
|
||||||
"""
|
"""
|
||||||
for epoch in tqdm(range(self.epochs)):
|
inputs = [self.one_hot_encode(char) for char in self.train_X]
|
||||||
inputs = [self.one_hot_encode(char) for char in self.train_X]
|
|
||||||
targets = [self.one_hot_encode(char) for char in self.train_y]
|
|
||||||
|
|
||||||
# Forward pass
|
for _ in tqdm(range(self.epochs)):
|
||||||
outputs = self.forward(inputs)
|
predictions = self.forward(inputs)
|
||||||
|
|
||||||
# Compute error at each time step
|
errors = []
|
||||||
errors = [output - target for output, target in zip(outputs, targets)]
|
for t in range(len(predictions)):
|
||||||
|
errors.append(-self.softmax(predictions[t]))
|
||||||
|
errors[-1][self.char_to_idx[self.train_y[t]]] += 1
|
||||||
|
|
||||||
# Backward pass and weight updates
|
self.backward(errors, self.concat_inputs)
|
||||||
self.backward(errors, inputs)
|
|
||||||
|
|
||||||
def predict(self, inputs: list) -> str:
|
|
||||||
"""
|
|
||||||
Predict the next character in the sequence.
|
|
||||||
|
|
||||||
:param inputs: The input data as a list of one-hot encoded vectors.
|
|
||||||
:return: The predicted character.
|
|
||||||
"""
|
|
||||||
output = self.forward(inputs)[-1]
|
|
||||||
return self.idx_to_char[np.argmax(self.softmax(output))]
|
|
||||||
|
|
||||||
def test(self) -> None:
|
def test(self) -> None:
|
||||||
"""
|
"""
|
||||||
Test the LSTM network on the input data and compute accuracy.
|
Test the trained LSTM network on the input data and print the accuracy.
|
||||||
"""
|
"""
|
||||||
inputs = [self.one_hot_encode(char) for char in self.train_X]
|
accuracy = 0
|
||||||
correct_predictions = sum(
|
probabilities = self.forward(
|
||||||
self.idx_to_char[np.argmax(self.softmax(output))] == target
|
[self.one_hot_encode(char) for char in self.train_X]
|
||||||
for output, target in zip(self.forward(inputs), self.train_y)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
accuracy = (correct_predictions / len(self.train_y)) * 100
|
output = ""
|
||||||
print(f"Accuracy: {accuracy:.2f}%")
|
for t in range(len(self.train_y)):
|
||||||
|
prediction = self.idx_to_char[
|
||||||
|
np.random.choice(
|
||||||
|
range(self.char_size), p=self.softmax(probabilities[t].reshape(-1))
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
output += prediction
|
||||||
|
|
||||||
|
if prediction == self.train_y[t]:
|
||||||
|
accuracy += 1
|
||||||
|
|
||||||
|
print(f"Ground Truth:\n{self.train_y}\n")
|
||||||
|
print(f"Predictions:\n{output}\n")
|
||||||
|
|
||||||
|
print(f"Accuracy: {round(accuracy * 100 / len(self.train_X), 2)}%")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# Define the input data and hyperparameters
|
data = """Long Short-Term Memory (LSTM) networks are a type
|
||||||
data = "LSTM Neural Networks are designed to handle sequences of data.This is just rantom test data"
|
of recurrent neural network (RNN) capable of learning "
|
||||||
# hidden_dim = 50
|
"order dependence in sequence prediction problems.
|
||||||
# epochs = 1000
|
This behavior is required in complex problem domains like "
|
||||||
# lr = 0.01
|
"machine translation, speech recognition, and more.
|
||||||
|
iter and Schmidhuber in 1997, and were refined and "
|
||||||
|
"popularized by many people in following work."""
|
||||||
|
|
||||||
# # Initialize and train the LSTM network
|
lstm = LSTM(data=data, hidden_dim=25, epochs=1000, lr=0.05)
|
||||||
# lstm = LSTM(data, hidden_dim, epochs, lr)
|
|
||||||
# lstm.train()
|
|
||||||
|
|
||||||
# # Test the LSTM network and compute accuracy
|
##### Training #####
|
||||||
# lstm.test()
|
lstm.train()
|
||||||
|
|
||||||
|
##### Testing #####
|
||||||
|
lstm.test()
|
||||||
|
|
||||||
|
# testing can be done by uncommenting the above lines of code.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user