From 39fd7135430ff3bffd5c40dda424ad075076465c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 15 Oct 2024 05:08:45 +0000 Subject: [PATCH] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- neural_network/lstm.py | 173 +++++++++++++++++++++++++---------------- 1 file changed, 104 insertions(+), 69 deletions(-) diff --git a/neural_network/lstm.py b/neural_network/lstm.py index 21ffe3490..f99bb9965 100644 --- a/neural_network/lstm.py +++ b/neural_network/lstm.py @@ -11,45 +11,47 @@ link : https://www.kaggle.com/code/navjindervirdee/lstm-neural-network-from-scra """ ##### Explanation ##### -# This script implements a Long Short-Term Memory (LSTM) network to learn +# This script implements a Long Short-Term Memory (LSTM) network to learn # and predict sequences of characters. # It uses numpy for numerical operations and tqdm for progress visualization. -# The data is a paragraph about LSTM, converted to lowercase and split into +# The data is a paragraph about LSTM, converted to lowercase and split into # characters. Each character is one-hot encoded for training. -# The LSTM class initializes weights and biases for the forget, input, candidate, +# The LSTM class initializes weights and biases for the forget, input, candidate, # and output gates. It also initializes weights and biases for the final output layer. -# The forward method performs forward propagation through the LSTM network, -# computing hidden and cell states. It uses sigmoid and tanh activation +# The forward method performs forward propagation through the LSTM network, +# computing hidden and cell states. It uses sigmoid and tanh activation # functions for the gates and cell states. -# The backward method performs backpropagation through time, computing gradients -# for the weights and biases. It updates the weights and biases using +# The backward method performs backpropagation through time, computing gradients +# for the weights and biases. It updates the weights and biases using # the computed gradients and the learning rate. -# The train method trains the LSTM network on the input data for a specified -# number of epochs. It uses one-hot encoded inputs and computes errors +# The train method trains the LSTM network on the input data for a specified +# number of epochs. It uses one-hot encoded inputs and computes errors # using the softmax function. -# The test method evaluates the trained LSTM network on the input data, +# The test method evaluates the trained LSTM network on the input data, # computing accuracy based on predictions. -# The script initializes the LSTM network with specified hyperparameters -# and trains it on the input data. Finally, it tests the trained network +# The script initializes the LSTM network with specified hyperparameters +# and trains it on the input data. Finally, it tests the trained network # and prints the accuracy of the predictions. ##### Imports ##### from tqdm import tqdm import numpy as np + class LSTM: - def __init__(self, data: str, hidden_dim: int = 25, - epochs: int = 1000, lr: float = 0.05) -> None: + def __init__( + self, data: str, hidden_dim: int = 25, epochs: int = 1000, lr: float = 0.05 + ) -> None: """ Initialize the LSTM network with the given data and hyperparameters. - + :param data: The input data as a string. :param hidden_dim: The number of hidden units in the LSTM layer. :param epochs: The number of training epochs. @@ -63,7 +65,7 @@ class LSTM: self.chars = set(self.data) self.data_size, self.char_size = len(self.data), len(self.chars) - print(f'Data size: {self.data_size}, Char Size: {self.char_size}') + print(f"Data size: {self.data_size}, Char Size: {self.char_size}") self.char_to_idx = {c: i for i, c in enumerate(self.chars)} self.idx_to_char = {i: c for i, c in enumerate(self.chars)} @@ -76,7 +78,7 @@ class LSTM: def one_hot_encode(self, char: str) -> np.ndarray: """ One-hot encode a character. - + :param char: The character to encode. :return: A one-hot encoded vector. """ @@ -88,20 +90,16 @@ class LSTM: """ Initialize the weights and biases for the LSTM network. """ - self.wf = self.init_weights(self.char_size + self.hidden_dim, - self.hidden_dim) + self.wf = self.init_weights(self.char_size + self.hidden_dim, self.hidden_dim) self.bf = np.zeros((self.hidden_dim, 1)) - self.wi = self.init_weights(self.char_size + self.hidden_dim, - self.hidden_dim) + self.wi = self.init_weights(self.char_size + self.hidden_dim, self.hidden_dim) self.bi = np.zeros((self.hidden_dim, 1)) - self.wc = self.init_weights(self.char_size + self.hidden_dim, - self.hidden_dim) + self.wc = self.init_weights(self.char_size + self.hidden_dim, self.hidden_dim) self.bc = np.zeros((self.hidden_dim, 1)) - self.wo = self.init_weights(self.char_size + self.hidden_dim, - self.hidden_dim) + self.wo = self.init_weights(self.char_size + self.hidden_dim, self.hidden_dim) self.bo = np.zeros((self.hidden_dim, 1)) self.wy = self.init_weights(self.hidden_dim, self.char_size) @@ -110,19 +108,20 @@ class LSTM: def init_weights(self, input_dim: int, output_dim: int) -> np.ndarray: """ Initialize weights with random values. - + :param input_dim: The input dimension. :param output_dim: The output dimension. :return: A matrix of initialized weights. """ - return np.random.uniform(-1, 1, (output_dim, input_dim)) * \ - np.sqrt(6 / (input_dim + output_dim)) + return np.random.uniform(-1, 1, (output_dim, input_dim)) * np.sqrt( + 6 / (input_dim + output_dim) + ) ##### Activation Functions ##### def sigmoid(self, x: np.ndarray, derivative: bool = False) -> np.ndarray: """ Sigmoid activation function. - + :param x: The input array. :param derivative: Whether to compute the derivative. :return: The sigmoid activation or its derivative. @@ -134,19 +133,19 @@ class LSTM: def tanh(self, x: np.ndarray, derivative: bool = False) -> np.ndarray: """ Tanh activation function. - + :param x: The input array. :param derivative: Whether to compute the derivative. :return: The tanh activation or its derivative. """ if derivative: - return 1 - x ** 2 + return 1 - x**2 return np.tanh(x) def softmax(self, x: np.ndarray) -> np.ndarray: """ Softmax activation function. - + :param x: The input array. :return: The softmax activation. """ @@ -173,7 +172,7 @@ class LSTM: def forward(self, inputs: list) -> list: """ Perform forward propagation through the LSTM network. - + :param inputs: The input data as a list of one-hot encoded vectors. :return: The outputs of the network. """ @@ -182,21 +181,29 @@ class LSTM: outputs = [] for t in range(len(inputs)): self.concat_inputs[t] = np.concatenate( - (self.hidden_states[t - 1], inputs[t])) + (self.hidden_states[t - 1], inputs[t]) + ) - self.forget_gates[t] = self.sigmoid(np.dot(self.wf, - self.concat_inputs[t]) + self.bf) - self.input_gates[t] = self.sigmoid(np.dot(self.wi, - self.concat_inputs[t]) + self.bi) - self.candidate_gates[t] = self.tanh(np.dot(self.wc, - self.concat_inputs[t]) + self.bc) - self.output_gates[t] = self.sigmoid(np.dot(self.wo, - self.concat_inputs[t]) + self.bo) + self.forget_gates[t] = self.sigmoid( + np.dot(self.wf, self.concat_inputs[t]) + self.bf + ) + self.input_gates[t] = self.sigmoid( + np.dot(self.wi, self.concat_inputs[t]) + self.bi + ) + self.candidate_gates[t] = self.tanh( + np.dot(self.wc, self.concat_inputs[t]) + self.bc + ) + self.output_gates[t] = self.sigmoid( + np.dot(self.wo, self.concat_inputs[t]) + self.bo + ) - self.cell_states[t] = self.forget_gates[t] * self.cell_states[t - 1] + \ - self.input_gates[t] * self.candidate_gates[t] - self.hidden_states[t] = self.output_gates[t] * \ - self.tanh(self.cell_states[t]) + self.cell_states[t] = ( + self.forget_gates[t] * self.cell_states[t - 1] + + self.input_gates[t] * self.candidate_gates[t] + ) + self.hidden_states[t] = self.output_gates[t] * self.tanh( + self.cell_states[t] + ) outputs.append(np.dot(self.wy, self.hidden_states[t]) + self.by) @@ -205,7 +212,7 @@ class LSTM: def backward(self, errors: list, inputs: list) -> None: """ Perform backpropagation through time to compute gradients and update weights. - + :param errors: The errors at each time step. :param inputs: The input data as a list of one-hot encoded vectors. """ @@ -215,8 +222,10 @@ class LSTM: d_wo, d_bo = 0, 0 d_wy, d_by = 0, 0 - dh_next, dc_next = np.zeros_like(self.hidden_states[0]), \ - np.zeros_like(self.cell_states[0]) + dh_next, dc_next = ( + np.zeros_like(self.hidden_states[0]), + np.zeros_like(self.cell_states[0]), + ) for t in reversed(range(len(inputs))): error = errors[t] @@ -228,45 +237,69 @@ class LSTM: d_hs = np.dot(self.wy.T, error) + dh_next # Output Gate Weights and Biases Errors - d_o = self.tanh(self.cell_states[t]) * d_hs * \ - self.sigmoid(self.output_gates[t], derivative=True) + d_o = ( + self.tanh(self.cell_states[t]) + * d_hs + * self.sigmoid(self.output_gates[t], derivative=True) + ) d_wo += np.dot(d_o, inputs[t].T) d_bo += d_o # Cell State Error - d_cs = self.tanh(self.tanh(self.cell_states[t]), - derivative=True) * self.output_gates[t] * d_hs + dc_next + d_cs = ( + self.tanh(self.tanh(self.cell_states[t]), derivative=True) + * self.output_gates[t] + * d_hs + + dc_next + ) # Forget Gate Weights and Biases Errors - d_f = d_cs * self.cell_states[t - 1] * \ - self.sigmoid(self.forget_gates[t], derivative=True) + d_f = ( + d_cs + * self.cell_states[t - 1] + * self.sigmoid(self.forget_gates[t], derivative=True) + ) d_wf += np.dot(d_f, inputs[t].T) d_bf += d_f # Input Gate Weights and Biases Errors - d_i = d_cs * self.candidate_gates[t] * \ - self.sigmoid(self.input_gates[t], derivative=True) + d_i = ( + d_cs + * self.candidate_gates[t] + * self.sigmoid(self.input_gates[t], derivative=True) + ) d_wi += np.dot(d_i, inputs[t].T) d_bi += d_i # Candidate Gate Weights and Biases Errors - d_c = d_cs * self.input_gates[t] * self.tanh(self.candidate_gates[t], - derivative=True) + d_c = ( + d_cs + * self.input_gates[t] + * self.tanh(self.candidate_gates[t], derivative=True) + ) d_wc += np.dot(d_c, inputs[t].T) d_bc += d_c # Update the next hidden and cell state errors - dh_next = np.dot(self.wf.T, d_f) + np.dot(self.wi.T, d_i) + \ - np.dot(self.wo.T, d_o) + np.dot(self.wc.T, d_c) + dh_next = ( + np.dot(self.wf.T, d_f) + + np.dot(self.wi.T, d_i) + + np.dot(self.wo.T, d_o) + + np.dot(self.wc.T, d_c) + ) dc_next = d_cs * self.forget_gates[t] # Apply gradients to weights and biases - for param, grad in zip([self.wf, self.wi, self.wc, self.wo, self.wy], - [d_wf, d_wi, d_wc, d_wo, d_wy]): + for param, grad in zip( + [self.wf, self.wi, self.wc, self.wo, self.wy], + [d_wf, d_wi, d_wc, d_wo, d_wy], + ): param -= self.lr * grad - for param, grad in zip([self.bf, self.bi, self.bc, self.bo, self.by], - [d_bf, d_bi, d_bc, d_bo, d_by]): + for param, grad in zip( + [self.bf, self.bi, self.bc, self.bo, self.by], + [d_bf, d_bi, d_bc, d_bo, d_by], + ): param -= self.lr * grad def train(self) -> None: @@ -289,7 +322,7 @@ class LSTM: def predict(self, inputs: list) -> str: """ Predict the next character in the sequence. - + :param inputs: The input data as a list of one-hot encoded vectors. :return: The predicted character. """ @@ -301,11 +334,13 @@ class LSTM: Test the LSTM network on the input data and compute accuracy. """ inputs = [self.one_hot_encode(char) for char in self.train_X] - correct_predictions = sum(self.idx_to_char[np.argmax(self.softmax(output))] == target - for output, target in zip(self.forward(inputs), self.train_y)) + correct_predictions = sum( + self.idx_to_char[np.argmax(self.softmax(output))] == target + for output, target in zip(self.forward(inputs), self.train_y) + ) accuracy = (correct_predictions / len(self.train_y)) * 100 - print(f'Accuracy: {accuracy:.2f}%') + print(f"Accuracy: {accuracy:.2f}%") if __name__ == "__main__":