added doct tests for each function

2025-04-22 13:47:37 +00:00 · 2024-10-15 23:39:50 +05:30 · 2024-10-15 23:39:50 +05:30 · 750c9f6fc8
commit 750c9f6fc8
parent f058116f95
1 changed files with 98 additions and 11 deletions
--- a/neural_network/lstm.py
+++ b/neural_network/lstm.py
@ -80,6 +80,18 @@ class LongShortTermMemory:

        :param char: The character to encode.
        :return: A one-hot encoded vector.
+
+        >>> lstm = LongShortTermMemory("abcde" * 50, hidden_layer_size=10)
+        >>> output = lstm.one_hot_encode('a')
+        >>> isinstance(output, np.ndarray)
+        True
+        >>> output.shape
+        (5, 1)
+        >>> output = lstm.one_hot_encode('c')
+        >>> isinstance(output, np.ndarray)
+        True
+        >>> output.shape
+        (5, 1)
        """
        vector = np.zeros((self.vocabulary_size, 1))
        vector[self.char_to_index[char]] = 1
@ -88,8 +100,48 @@ class LongShortTermMemory:
    def initialize_weights(self) -> None:
        """
        Initialize the weights and biases for the LSTM network.
-        """

+        This method initializes the forget gate, input gate,
+        cell candidate, and output gate weights
+        and biases, as well as the output layer weights and biases.
+        It ensures that the weights
+        and biases have the correct shapes.
+
+        >>> lstm = LongShortTermMemory("abcde" * 50, hidden_layer_size=10)
+
+        # Check the shapes of the weights and biases after initialization
+        >>> lstm.initialize_weights()
+
+        # Forget gate weights and bias
+        >>> lstm.forget_gate_weights.shape
+        (10, 15)
+        >>> lstm.forget_gate_bias.shape
+        (10, 1)
+
+        # Input gate weights and bias
+        >>> lstm.input_gate_weights.shape
+        (10, 15)
+        >>> lstm.input_gate_bias.shape
+        (10, 1)
+
+        # Cell candidate weights and bias
+        >>> lstm.cell_candidate_weights.shape
+        (10, 15)
+        >>> lstm.cell_candidate_bias.shape
+        (10, 1)
+
+        # Output gate weights and bias
+        >>> lstm.output_gate_weights.shape
+        (10, 15)
+        >>> lstm.output_gate_bias.shape
+        (10, 1)
+
+        # Output layer weights and bias
+        >>> lstm.output_layer_weights.shape
+        (5, 10)
+        >>> lstm.output_layer_bias.shape
+        (5, 1)
+        """
        self.forget_gate_weights = self.init_weights(
            self.vocabulary_size + self.hidden_layer_size, self.hidden_layer_size
        )
@ -110,10 +162,10 @@ class LongShortTermMemory:
        )
        self.output_gate_bias = np.zeros((self.hidden_layer_size, 1))

-        self.output_layer_weights: np.ndarray = self.init_weights(
+        self.output_layer_weights = self.init_weights(
            self.hidden_layer_size, self.vocabulary_size
        )
-        self.output_layer_bias: np.ndarray = np.zeros((self.vocabulary_size, 1))
+        self.output_layer_bias = np.zeros((self.vocabulary_size, 1))

    def init_weights(self, input_dim: int, output_dim: int) -> np.ndarray:
        """
@ -134,6 +186,16 @@ class LongShortTermMemory:
        :param x: The input array.
        :param derivative: Whether to compute the derivative.
        :return: The sigmoid activation or its derivative.
+
+        >>> lstm = LongShortTermMemory("abcde" * 50, hidden_layer_size=10)
+        >>> output = lstm.sigmoid(np.array([[1, 2, 3]]))
+        >>> isinstance(output, np.ndarray)
+        True
+        >>> np.round(output, 3)
+        array([[0.731, 0.881, 0.953]])
+        >>> derivative_output = lstm.sigmoid(output, derivative=True)
+        >>> np.round(derivative_output, 3)
+        array([[0.197, 0.105, 0.045]])
        """
        if derivative:
            return x * (1 - x)
@ -146,6 +208,16 @@ class LongShortTermMemory:
        :param x: The input array.
        :param derivative: Whether to compute the derivative.
        :return: The tanh activation or its derivative.
+
+        >>> lstm = LongShortTermMemory("abcde" * 50, hidden_layer_size=10)
+        >>> output = lstm.tanh(np.array([[1, 2, 3]]))
+        >>> isinstance(output, np.ndarray)
+        True
+        >>> np.round(output, 3)
+        array([[0.762, 0.964, 0.995]])
+        >>> derivative_output = lstm.tanh(output, derivative=True)
+        >>> np.round(derivative_output, 3)
+        array([[0.42 , 0.071, 0.01 ]])
        """
        if derivative:
            return 1 - x**2
@ -157,6 +229,13 @@ class LongShortTermMemory:

        :param x: The input array.
        :return: The softmax activation.
+
+        >>> lstm = LongShortTermMemory("abcde" * 50, hidden_layer_size=10)
+        >>> output = lstm.softmax(np.array([1, 2, 3]))
+        >>> isinstance(output, np.ndarray)
+        True
+        >>> np.round(output, 3)
+        array([0.09 , 0.245, 0.665])
        """
        exp_x = np.exp(x - np.max(x))
        return exp_x / exp_x.sum(axis=0)
@ -164,6 +243,20 @@ class LongShortTermMemory:
    def reset_network_state(self) -> None:
        """
        Reset the LSTM network states.
+
+        Resets the internal states of the LSTM network, including the combined inputs,
+        hidden states, cell states, gate activations, and network outputs.
+
+        >>> lstm = LongShortTermMemory("abcde" * 50, hidden_layer_size=10)
+        >>> lstm.reset_network_state()
+        >>> lstm.hidden_states[-1].shape == (10, 1)
+        True
+        >>> lstm.cell_states[-1].shape == (10, 1)
+        True
+        >>> lstm.combined_inputs == {}
+        True
+        >>> lstm.network_outputs == {}
+        True
        """
        self.combined_inputs = {}
        self.hidden_states = {-1: np.zeros((self.hidden_layer_size, 1))}
@ -232,12 +325,6 @@ class LongShortTermMemory:
        return outputs

    def backward_pass(self, errors: list[np.ndarray], inputs: list[np.ndarray]) -> None:
-        """
-        Perform backpropagation through time to compute gradients and update weights.
-
-        :param errors: The errors at each time step.
-        :param inputs: The input data as a list of one-hot encoded vectors.
-        """
        d_forget_gate_weights, d_forget_gate_bias = 0, 0
        d_input_gate_weights, d_input_gate_bias = 0, 0
        d_cell_candidate_weights, d_cell_candidate_bias = 0, 0