From f0919fed68131d035dbd46ae174f06db3af7cdbe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9CShashank?= <shashanktyagiji12345@gmail.com>
Date: Wed, 16 Oct 2024 11:11:00 +0530
Subject: [PATCH] written doc tests for backward pass and forward pass, fixed
 variable names in sigmoid function from x to input array

---
 neural_network/lstm.py | 77 ++++++++++++++++++++++++++++++------------
 1 file changed, 56 insertions(+), 21 deletions(-)

diff --git a/neural_network/lstm.py b/neural_network/lstm.py
index d29b9776d..c9d11d905 100644
--- a/neural_network/lstm.py
+++ b/neural_network/lstm.py
@@ -175,12 +175,20 @@ class LongShortTermMemory:
         :param input_dim: The input dimension.
         :param output_dim: The output dimension.
         :return: A matrix of initialized weights.
+
+        Example:
+        >>> lstm = LongShortTermMemory("abcde" * 50, hidden_layer_size=10)
+        >>> weights = lstm.init_weights(5, 10)
+        >>> isinstance(weights, np.ndarray)
+        True
+        >>> weights.shape
+        (10, 5)
         """
         return self.random_generator.uniform(-1, 1, (output_dim, input_dim)) * np.sqrt(
             6 / (input_dim + output_dim)
         )
 
-    def sigmoid(self, x: np.ndarray, derivative: bool = False) -> np.ndarray:
+    def sigmoid(self, input_array: np.ndarray, derivative: bool = False) -> np.ndarray:
         """
         Sigmoid activation function.
 
@@ -199,10 +207,10 @@ class LongShortTermMemory:
         array([[0.197, 0.105, 0.045]])
         """
         if derivative:
-            return x * (1 - x)
-        return 1 / (1 + np.exp(-x))
+            return input_array * (1 - input_array)
+        return 1 / (1 + np.exp(-input_array))
 
-    def tanh(self, x: np.ndarray, derivative: bool = False) -> np.ndarray:
+    def tanh(self, input_array: np.ndarray, derivative: bool = False) -> np.ndarray:
         """
         Tanh activation function.
 
@@ -221,10 +229,10 @@ class LongShortTermMemory:
         array([[0.42 , 0.071, 0.01 ]])
         """
         if derivative:
-            return 1 - x**2
-        return np.tanh(x)
+            return 1 - input_array**2
+        return np.tanh(input_array)
 
-    def softmax(self, x: np.ndarray) -> np.ndarray:
+    def softmax(self, input_array: np.ndarray) -> np.ndarray:
         """
         Softmax activation function.
 
@@ -238,7 +246,7 @@ class LongShortTermMemory:
         >>> np.round(output, 3)
         array([0.09 , 0.245, 0.665])
         """
-        exp_x = np.exp(x - np.max(x))
+        exp_x = np.exp(input_array - np.max(input_array))
         return exp_x / exp_x.sum(axis=0)
 
     def reset_network_state(self) -> None:
@@ -270,17 +278,14 @@ class LongShortTermMemory:
 
     def forward_pass(self, inputs: list[np.ndarray]) -> list[np.ndarray]:
         """
-        Perform forward propagation through the LSTM network.
+        Perform a forward pass through the LSTM network for the given inputs.
 
-        :param inputs: The input data as a list of one-hot encoded vectors.
-        :return: The outputs of the network.
-        """
-        """
-        Forward pass through the LSTM network.
+        :param inputs: A list of input arrays (sequences).
+        :return: A list of network outputs.
 
-        >>> lstm = LongShortTermMemory(input_data="abcde", hidden_layer_size=10,
-        training_epochs=1, learning_rate=0.01)
-        >>> inputs = [lstm.one_hot_encode(char) for char in lstm.input_sequence]
+        Example:
+        >>> lstm = LongShortTermMemory("abcde" * 50, hidden_layer_size=10)
+        >>> inputs = [np.random.rand(5, 1) for _ in range(5)]
         >>> outputs = lstm.forward_pass(inputs)
         >>> len(outputs) == len(inputs)
         True
@@ -326,6 +331,21 @@ class LongShortTermMemory:
         return outputs
 
     def backward_pass(self, errors: list[np.ndarray], inputs: list[np.ndarray]) -> None:
+        """
+        Perform the backward pass for the LSTM model, adjusting weights and biases.
+
+        :param errors: A list of errors computed from the output layer.
+        :param inputs: A list of input one-hot encoded vectors.
+
+        Example:
+        >>> lstm = LongShortTermMemory("abcde" * 50, hidden_layer_size=10)
+        >>> inputs = [lstm.one_hot_encode(char) for char in lstm.input_sequence]
+        >>> predictions = lstm.forward_pass(inputs)
+        >>> errors = [-lstm.softmax(predictions[t]) for t in range(len(predictions))]
+        >>> for t in range(len(predictions)):
+        ...     errors[t][lstm.char_to_index[lstm.target_sequence[t]]] += 1
+        >>> lstm.backward_pass(errors, inputs)  # Should run without any errors
+        """
         d_forget_gate_weights, d_forget_gate_bias = 0, 0
         d_input_gate_weights, d_input_gate_bias = 0, 0
         d_cell_candidate_weights, d_cell_candidate_bias = 0, 0
@@ -422,6 +442,13 @@ class LongShortTermMemory:
         self.output_layer_bias += d_output_layer_bias * self.learning_rate
 
     def train(self) -> None:
+        """
+        Train the LSTM model.
+
+        Example:
+        >>> lstm = LongShortTermMemory("abcde" * 50, hidden_layer_size=10)
+        >>> lstm.train()
+        """
         inputs = [self.one_hot_encode(char) for char in self.input_sequence]
 
         for _ in range(self.training_epochs):
@@ -434,12 +461,20 @@ class LongShortTermMemory:
 
             self.backward_pass(errors, inputs)
 
-    def test(self):
+    def test(self) -> None:
         """
         Test the LSTM model.
 
         Returns:
             str: The output predictions.
+
+        Example:
+        >>> lstm = LongShortTermMemory("abcde" * 50, hidden_layer_size=10)
+        >>> output = lstm.test()
+        >>> isinstance(output, str)
+        True
+        >>> len(output) == len(lstm.input_sequence)
+        True
         """
         accuracy = 0
         probabilities = self.forward_pass(
@@ -461,9 +496,9 @@ class LongShortTermMemory:
             if prediction == self.target_sequence[t]:
                 accuracy += 1
 
-        print(f"Ground Truth:\n{self.target_sequence}\n")
-        print(f"Predictions:\n{output}\n")
-        print(f"Accuracy: {round(accuracy * 100 / len(self.input_sequence), 2)}%")
+        # print(f"Ground Truth:\n{self.target_sequence}\n")
+        # print(f"Predictions:\n{output}\n")
+        # print(f"Accuracy: {round(accuracy * 100 / len(self.input_sequence), 2)}%")
 
         return output