From 750c9f6fc868d06bbd26fdb094e4cb02e1478751 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CShashank?= Date: Tue, 15 Oct 2024 23:39:50 +0530 Subject: [PATCH] added doct tests for each function --- neural_network/lstm.py | 109 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 98 insertions(+), 11 deletions(-) diff --git a/neural_network/lstm.py b/neural_network/lstm.py index 37ca602bf..7e464ecc7 100644 --- a/neural_network/lstm.py +++ b/neural_network/lstm.py @@ -80,6 +80,18 @@ class LongShortTermMemory: :param char: The character to encode. :return: A one-hot encoded vector. + + >>> lstm = LongShortTermMemory("abcde" * 50, hidden_layer_size=10) + >>> output = lstm.one_hot_encode('a') + >>> isinstance(output, np.ndarray) + True + >>> output.shape + (5, 1) + >>> output = lstm.one_hot_encode('c') + >>> isinstance(output, np.ndarray) + True + >>> output.shape + (5, 1) """ vector = np.zeros((self.vocabulary_size, 1)) vector[self.char_to_index[char]] = 1 @@ -88,8 +100,48 @@ class LongShortTermMemory: def initialize_weights(self) -> None: """ Initialize the weights and biases for the LSTM network. - """ + This method initializes the forget gate, input gate, + cell candidate, and output gate weights + and biases, as well as the output layer weights and biases. + It ensures that the weights + and biases have the correct shapes. + + >>> lstm = LongShortTermMemory("abcde" * 50, hidden_layer_size=10) + + # Check the shapes of the weights and biases after initialization + >>> lstm.initialize_weights() + + # Forget gate weights and bias + >>> lstm.forget_gate_weights.shape + (10, 15) + >>> lstm.forget_gate_bias.shape + (10, 1) + + # Input gate weights and bias + >>> lstm.input_gate_weights.shape + (10, 15) + >>> lstm.input_gate_bias.shape + (10, 1) + + # Cell candidate weights and bias + >>> lstm.cell_candidate_weights.shape + (10, 15) + >>> lstm.cell_candidate_bias.shape + (10, 1) + + # Output gate weights and bias + >>> lstm.output_gate_weights.shape + (10, 15) + >>> lstm.output_gate_bias.shape + (10, 1) + + # Output layer weights and bias + >>> lstm.output_layer_weights.shape + (5, 10) + >>> lstm.output_layer_bias.shape + (5, 1) + """ self.forget_gate_weights = self.init_weights( self.vocabulary_size + self.hidden_layer_size, self.hidden_layer_size ) @@ -110,10 +162,10 @@ class LongShortTermMemory: ) self.output_gate_bias = np.zeros((self.hidden_layer_size, 1)) - self.output_layer_weights: np.ndarray = self.init_weights( + self.output_layer_weights = self.init_weights( self.hidden_layer_size, self.vocabulary_size ) - self.output_layer_bias: np.ndarray = np.zeros((self.vocabulary_size, 1)) + self.output_layer_bias = np.zeros((self.vocabulary_size, 1)) def init_weights(self, input_dim: int, output_dim: int) -> np.ndarray: """ @@ -134,6 +186,16 @@ class LongShortTermMemory: :param x: The input array. :param derivative: Whether to compute the derivative. :return: The sigmoid activation or its derivative. + + >>> lstm = LongShortTermMemory("abcde" * 50, hidden_layer_size=10) + >>> output = lstm.sigmoid(np.array([[1, 2, 3]])) + >>> isinstance(output, np.ndarray) + True + >>> np.round(output, 3) + array([[0.731, 0.881, 0.953]]) + >>> derivative_output = lstm.sigmoid(output, derivative=True) + >>> np.round(derivative_output, 3) + array([[0.197, 0.105, 0.045]]) """ if derivative: return x * (1 - x) @@ -146,6 +208,16 @@ class LongShortTermMemory: :param x: The input array. :param derivative: Whether to compute the derivative. :return: The tanh activation or its derivative. + + >>> lstm = LongShortTermMemory("abcde" * 50, hidden_layer_size=10) + >>> output = lstm.tanh(np.array([[1, 2, 3]])) + >>> isinstance(output, np.ndarray) + True + >>> np.round(output, 3) + array([[0.762, 0.964, 0.995]]) + >>> derivative_output = lstm.tanh(output, derivative=True) + >>> np.round(derivative_output, 3) + array([[0.42 , 0.071, 0.01 ]]) """ if derivative: return 1 - x**2 @@ -157,6 +229,13 @@ class LongShortTermMemory: :param x: The input array. :return: The softmax activation. + + >>> lstm = LongShortTermMemory("abcde" * 50, hidden_layer_size=10) + >>> output = lstm.softmax(np.array([1, 2, 3])) + >>> isinstance(output, np.ndarray) + True + >>> np.round(output, 3) + array([0.09 , 0.245, 0.665]) """ exp_x = np.exp(x - np.max(x)) return exp_x / exp_x.sum(axis=0) @@ -164,6 +243,20 @@ class LongShortTermMemory: def reset_network_state(self) -> None: """ Reset the LSTM network states. + + Resets the internal states of the LSTM network, including the combined inputs, + hidden states, cell states, gate activations, and network outputs. + + >>> lstm = LongShortTermMemory("abcde" * 50, hidden_layer_size=10) + >>> lstm.reset_network_state() + >>> lstm.hidden_states[-1].shape == (10, 1) + True + >>> lstm.cell_states[-1].shape == (10, 1) + True + >>> lstm.combined_inputs == {} + True + >>> lstm.network_outputs == {} + True """ self.combined_inputs = {} self.hidden_states = {-1: np.zeros((self.hidden_layer_size, 1))} @@ -232,12 +325,6 @@ class LongShortTermMemory: return outputs def backward_pass(self, errors: list[np.ndarray], inputs: list[np.ndarray]) -> None: - """ - Perform backpropagation through time to compute gradients and update weights. - - :param errors: The errors at each time step. - :param inputs: The input data as a list of one-hot encoded vectors. - """ d_forget_gate_weights, d_forget_gate_bias = 0, 0 d_input_gate_weights, d_input_gate_bias = 0, 0 d_cell_candidate_weights, d_cell_candidate_bias = 0, 0 @@ -400,8 +487,8 @@ if __name__ == "__main__": # learning_rate=0.05, # ) - ##### Training ##### + # #### Training ##### # lstm_model.train() - ##### Testing ##### + # #### Testing ##### # lstm_model.test()