mirror of
https://github.com/TheAlgorithms/Python.git
synced 2025-04-05 13:26:43 +00:00
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
This commit is contained in:
parent
4c2ec80aec
commit
39fd713543
@ -44,9 +44,11 @@ link : https://www.kaggle.com/code/navjindervirdee/lstm-neural-network-from-scra
|
|||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
class LSTM:
|
class LSTM:
|
||||||
def __init__(self, data: str, hidden_dim: int = 25,
|
def __init__(
|
||||||
epochs: int = 1000, lr: float = 0.05) -> None:
|
self, data: str, hidden_dim: int = 25, epochs: int = 1000, lr: float = 0.05
|
||||||
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Initialize the LSTM network with the given data and hyperparameters.
|
Initialize the LSTM network with the given data and hyperparameters.
|
||||||
|
|
||||||
@ -63,7 +65,7 @@ class LSTM:
|
|||||||
self.chars = set(self.data)
|
self.chars = set(self.data)
|
||||||
self.data_size, self.char_size = len(self.data), len(self.chars)
|
self.data_size, self.char_size = len(self.data), len(self.chars)
|
||||||
|
|
||||||
print(f'Data size: {self.data_size}, Char Size: {self.char_size}')
|
print(f"Data size: {self.data_size}, Char Size: {self.char_size}")
|
||||||
|
|
||||||
self.char_to_idx = {c: i for i, c in enumerate(self.chars)}
|
self.char_to_idx = {c: i for i, c in enumerate(self.chars)}
|
||||||
self.idx_to_char = {i: c for i, c in enumerate(self.chars)}
|
self.idx_to_char = {i: c for i, c in enumerate(self.chars)}
|
||||||
@ -88,20 +90,16 @@ class LSTM:
|
|||||||
"""
|
"""
|
||||||
Initialize the weights and biases for the LSTM network.
|
Initialize the weights and biases for the LSTM network.
|
||||||
"""
|
"""
|
||||||
self.wf = self.init_weights(self.char_size + self.hidden_dim,
|
self.wf = self.init_weights(self.char_size + self.hidden_dim, self.hidden_dim)
|
||||||
self.hidden_dim)
|
|
||||||
self.bf = np.zeros((self.hidden_dim, 1))
|
self.bf = np.zeros((self.hidden_dim, 1))
|
||||||
|
|
||||||
self.wi = self.init_weights(self.char_size + self.hidden_dim,
|
self.wi = self.init_weights(self.char_size + self.hidden_dim, self.hidden_dim)
|
||||||
self.hidden_dim)
|
|
||||||
self.bi = np.zeros((self.hidden_dim, 1))
|
self.bi = np.zeros((self.hidden_dim, 1))
|
||||||
|
|
||||||
self.wc = self.init_weights(self.char_size + self.hidden_dim,
|
self.wc = self.init_weights(self.char_size + self.hidden_dim, self.hidden_dim)
|
||||||
self.hidden_dim)
|
|
||||||
self.bc = np.zeros((self.hidden_dim, 1))
|
self.bc = np.zeros((self.hidden_dim, 1))
|
||||||
|
|
||||||
self.wo = self.init_weights(self.char_size + self.hidden_dim,
|
self.wo = self.init_weights(self.char_size + self.hidden_dim, self.hidden_dim)
|
||||||
self.hidden_dim)
|
|
||||||
self.bo = np.zeros((self.hidden_dim, 1))
|
self.bo = np.zeros((self.hidden_dim, 1))
|
||||||
|
|
||||||
self.wy = self.init_weights(self.hidden_dim, self.char_size)
|
self.wy = self.init_weights(self.hidden_dim, self.char_size)
|
||||||
@ -115,8 +113,9 @@ class LSTM:
|
|||||||
:param output_dim: The output dimension.
|
:param output_dim: The output dimension.
|
||||||
:return: A matrix of initialized weights.
|
:return: A matrix of initialized weights.
|
||||||
"""
|
"""
|
||||||
return np.random.uniform(-1, 1, (output_dim, input_dim)) * \
|
return np.random.uniform(-1, 1, (output_dim, input_dim)) * np.sqrt(
|
||||||
np.sqrt(6 / (input_dim + output_dim))
|
6 / (input_dim + output_dim)
|
||||||
|
)
|
||||||
|
|
||||||
##### Activation Functions #####
|
##### Activation Functions #####
|
||||||
def sigmoid(self, x: np.ndarray, derivative: bool = False) -> np.ndarray:
|
def sigmoid(self, x: np.ndarray, derivative: bool = False) -> np.ndarray:
|
||||||
@ -182,21 +181,29 @@ class LSTM:
|
|||||||
outputs = []
|
outputs = []
|
||||||
for t in range(len(inputs)):
|
for t in range(len(inputs)):
|
||||||
self.concat_inputs[t] = np.concatenate(
|
self.concat_inputs[t] = np.concatenate(
|
||||||
(self.hidden_states[t - 1], inputs[t]))
|
(self.hidden_states[t - 1], inputs[t])
|
||||||
|
)
|
||||||
|
|
||||||
self.forget_gates[t] = self.sigmoid(np.dot(self.wf,
|
self.forget_gates[t] = self.sigmoid(
|
||||||
self.concat_inputs[t]) + self.bf)
|
np.dot(self.wf, self.concat_inputs[t]) + self.bf
|
||||||
self.input_gates[t] = self.sigmoid(np.dot(self.wi,
|
)
|
||||||
self.concat_inputs[t]) + self.bi)
|
self.input_gates[t] = self.sigmoid(
|
||||||
self.candidate_gates[t] = self.tanh(np.dot(self.wc,
|
np.dot(self.wi, self.concat_inputs[t]) + self.bi
|
||||||
self.concat_inputs[t]) + self.bc)
|
)
|
||||||
self.output_gates[t] = self.sigmoid(np.dot(self.wo,
|
self.candidate_gates[t] = self.tanh(
|
||||||
self.concat_inputs[t]) + self.bo)
|
np.dot(self.wc, self.concat_inputs[t]) + self.bc
|
||||||
|
)
|
||||||
|
self.output_gates[t] = self.sigmoid(
|
||||||
|
np.dot(self.wo, self.concat_inputs[t]) + self.bo
|
||||||
|
)
|
||||||
|
|
||||||
self.cell_states[t] = self.forget_gates[t] * self.cell_states[t - 1] + \
|
self.cell_states[t] = (
|
||||||
self.input_gates[t] * self.candidate_gates[t]
|
self.forget_gates[t] * self.cell_states[t - 1]
|
||||||
self.hidden_states[t] = self.output_gates[t] * \
|
+ self.input_gates[t] * self.candidate_gates[t]
|
||||||
self.tanh(self.cell_states[t])
|
)
|
||||||
|
self.hidden_states[t] = self.output_gates[t] * self.tanh(
|
||||||
|
self.cell_states[t]
|
||||||
|
)
|
||||||
|
|
||||||
outputs.append(np.dot(self.wy, self.hidden_states[t]) + self.by)
|
outputs.append(np.dot(self.wy, self.hidden_states[t]) + self.by)
|
||||||
|
|
||||||
@ -215,8 +222,10 @@ class LSTM:
|
|||||||
d_wo, d_bo = 0, 0
|
d_wo, d_bo = 0, 0
|
||||||
d_wy, d_by = 0, 0
|
d_wy, d_by = 0, 0
|
||||||
|
|
||||||
dh_next, dc_next = np.zeros_like(self.hidden_states[0]), \
|
dh_next, dc_next = (
|
||||||
np.zeros_like(self.cell_states[0])
|
np.zeros_like(self.hidden_states[0]),
|
||||||
|
np.zeros_like(self.cell_states[0]),
|
||||||
|
)
|
||||||
for t in reversed(range(len(inputs))):
|
for t in reversed(range(len(inputs))):
|
||||||
error = errors[t]
|
error = errors[t]
|
||||||
|
|
||||||
@ -228,45 +237,69 @@ class LSTM:
|
|||||||
d_hs = np.dot(self.wy.T, error) + dh_next
|
d_hs = np.dot(self.wy.T, error) + dh_next
|
||||||
|
|
||||||
# Output Gate Weights and Biases Errors
|
# Output Gate Weights and Biases Errors
|
||||||
d_o = self.tanh(self.cell_states[t]) * d_hs * \
|
d_o = (
|
||||||
self.sigmoid(self.output_gates[t], derivative=True)
|
self.tanh(self.cell_states[t])
|
||||||
|
* d_hs
|
||||||
|
* self.sigmoid(self.output_gates[t], derivative=True)
|
||||||
|
)
|
||||||
d_wo += np.dot(d_o, inputs[t].T)
|
d_wo += np.dot(d_o, inputs[t].T)
|
||||||
d_bo += d_o
|
d_bo += d_o
|
||||||
|
|
||||||
# Cell State Error
|
# Cell State Error
|
||||||
d_cs = self.tanh(self.tanh(self.cell_states[t]),
|
d_cs = (
|
||||||
derivative=True) * self.output_gates[t] * d_hs + dc_next
|
self.tanh(self.tanh(self.cell_states[t]), derivative=True)
|
||||||
|
* self.output_gates[t]
|
||||||
|
* d_hs
|
||||||
|
+ dc_next
|
||||||
|
)
|
||||||
|
|
||||||
# Forget Gate Weights and Biases Errors
|
# Forget Gate Weights and Biases Errors
|
||||||
d_f = d_cs * self.cell_states[t - 1] * \
|
d_f = (
|
||||||
self.sigmoid(self.forget_gates[t], derivative=True)
|
d_cs
|
||||||
|
* self.cell_states[t - 1]
|
||||||
|
* self.sigmoid(self.forget_gates[t], derivative=True)
|
||||||
|
)
|
||||||
d_wf += np.dot(d_f, inputs[t].T)
|
d_wf += np.dot(d_f, inputs[t].T)
|
||||||
d_bf += d_f
|
d_bf += d_f
|
||||||
|
|
||||||
# Input Gate Weights and Biases Errors
|
# Input Gate Weights and Biases Errors
|
||||||
d_i = d_cs * self.candidate_gates[t] * \
|
d_i = (
|
||||||
self.sigmoid(self.input_gates[t], derivative=True)
|
d_cs
|
||||||
|
* self.candidate_gates[t]
|
||||||
|
* self.sigmoid(self.input_gates[t], derivative=True)
|
||||||
|
)
|
||||||
d_wi += np.dot(d_i, inputs[t].T)
|
d_wi += np.dot(d_i, inputs[t].T)
|
||||||
d_bi += d_i
|
d_bi += d_i
|
||||||
|
|
||||||
# Candidate Gate Weights and Biases Errors
|
# Candidate Gate Weights and Biases Errors
|
||||||
d_c = d_cs * self.input_gates[t] * self.tanh(self.candidate_gates[t],
|
d_c = (
|
||||||
derivative=True)
|
d_cs
|
||||||
|
* self.input_gates[t]
|
||||||
|
* self.tanh(self.candidate_gates[t], derivative=True)
|
||||||
|
)
|
||||||
d_wc += np.dot(d_c, inputs[t].T)
|
d_wc += np.dot(d_c, inputs[t].T)
|
||||||
d_bc += d_c
|
d_bc += d_c
|
||||||
|
|
||||||
# Update the next hidden and cell state errors
|
# Update the next hidden and cell state errors
|
||||||
dh_next = np.dot(self.wf.T, d_f) + np.dot(self.wi.T, d_i) + \
|
dh_next = (
|
||||||
np.dot(self.wo.T, d_o) + np.dot(self.wc.T, d_c)
|
np.dot(self.wf.T, d_f)
|
||||||
|
+ np.dot(self.wi.T, d_i)
|
||||||
|
+ np.dot(self.wo.T, d_o)
|
||||||
|
+ np.dot(self.wc.T, d_c)
|
||||||
|
)
|
||||||
dc_next = d_cs * self.forget_gates[t]
|
dc_next = d_cs * self.forget_gates[t]
|
||||||
|
|
||||||
# Apply gradients to weights and biases
|
# Apply gradients to weights and biases
|
||||||
for param, grad in zip([self.wf, self.wi, self.wc, self.wo, self.wy],
|
for param, grad in zip(
|
||||||
[d_wf, d_wi, d_wc, d_wo, d_wy]):
|
[self.wf, self.wi, self.wc, self.wo, self.wy],
|
||||||
|
[d_wf, d_wi, d_wc, d_wo, d_wy],
|
||||||
|
):
|
||||||
param -= self.lr * grad
|
param -= self.lr * grad
|
||||||
|
|
||||||
for param, grad in zip([self.bf, self.bi, self.bc, self.bo, self.by],
|
for param, grad in zip(
|
||||||
[d_bf, d_bi, d_bc, d_bo, d_by]):
|
[self.bf, self.bi, self.bc, self.bo, self.by],
|
||||||
|
[d_bf, d_bi, d_bc, d_bo, d_by],
|
||||||
|
):
|
||||||
param -= self.lr * grad
|
param -= self.lr * grad
|
||||||
|
|
||||||
def train(self) -> None:
|
def train(self) -> None:
|
||||||
@ -301,11 +334,13 @@ class LSTM:
|
|||||||
Test the LSTM network on the input data and compute accuracy.
|
Test the LSTM network on the input data and compute accuracy.
|
||||||
"""
|
"""
|
||||||
inputs = [self.one_hot_encode(char) for char in self.train_X]
|
inputs = [self.one_hot_encode(char) for char in self.train_X]
|
||||||
correct_predictions = sum(self.idx_to_char[np.argmax(self.softmax(output))] == target
|
correct_predictions = sum(
|
||||||
for output, target in zip(self.forward(inputs), self.train_y))
|
self.idx_to_char[np.argmax(self.softmax(output))] == target
|
||||||
|
for output, target in zip(self.forward(inputs), self.train_y)
|
||||||
|
)
|
||||||
|
|
||||||
accuracy = (correct_predictions / len(self.train_y)) * 100
|
accuracy = (correct_predictions / len(self.train_y)) * 100
|
||||||
print(f'Accuracy: {accuracy:.2f}%')
|
print(f"Accuracy: {accuracy:.2f}%")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
Loading…
x
Reference in New Issue
Block a user