Add files via upload

This commit is contained in:
Pritam Das 2024-10-20 22:15:04 +05:30 committed by GitHub
parent b3c2a73a10
commit 9e9a3131a3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,7 +1,8 @@
""" """
- - - - - -- - - - - - - - - - - - - - - - - - - - - - - - - - - - -- - - - - - - - - - - - - - - - - - - - - - -
Name - - sliding_window_attention.py Name - - sliding_window_attention.py
Goal - - Implement a neural network architecture using sliding window attention for sequence modeling tasks. Goal - - Implement a neural network architecture using sliding window attention for sequence
modeling tasks.
Detail: Total 5 layers neural network Detail: Total 5 layers neural network
* Input layer * Input layer
* Sliding Window Attention Layer * Sliding Window Attention Layer
@ -11,8 +12,10 @@ Author: Stephen Lee
Github: 245885195@qq.com Github: 245885195@qq.com
Date: 2024.10.20 Date: 2024.10.20
References: References:
1. Choromanska, A., et al. (2020). "On the Importance of Initialization and Momentum in Deep Learning." *Proceedings of the 37th International Conference on Machine Learning*. 1. Choromanska, A., et al. (2020). "On the Importance of Initialization and Momentum in
2. Dai, Z., et al. (2020). "Transformers are RNNs: Fast Autoregressive Transformers with Linear Attention." *arXiv preprint arXiv:2006.16236*. Deep Learning." *Proceedings of the 37th International Conference on Machine Learning*.
2. Dai, Z., et al. (2020). "Transformers are RNNs: Fast Autoregressive Transformers
with Linear Attention." *arXiv preprint arXiv:2006.16236*.
3. [Attention Mechanisms in Neural Networks](https://en.wikipedia.org/wiki/Attention_(machine_learning)) 3. [Attention Mechanisms in Neural Networks](https://en.wikipedia.org/wiki/Attention_(machine_learning))
- - - - - -- - - - - - - - - - - - - - - - - - - - - - - - - - - - -- - - - - - - - - - - - - - - - - - - - - - -
""" """
@ -31,7 +34,7 @@ class SlidingWindowAttention:
embed_dim (int): The dimensionality of the input embeddings. embed_dim (int): The dimensionality of the input embeddings.
""" """
def __init__(self, embed_dim: int, window_size: int): def __init__(self, embed_dim: int, window_size: int) -> None:
""" """
Initialize the SlidingWindowAttention module. Initialize the SlidingWindowAttention module.
@ -41,14 +44,16 @@ class SlidingWindowAttention:
""" """
self.window_size = window_size self.window_size = window_size
self.embed_dim = embed_dim self.embed_dim = embed_dim
self.attention_weights = np.random.randn(embed_dim, embed_dim) rng = np.random.default_rng()
self.attention_weights = rng.standard_normal((embed_dim, embed_dim))
def forward(self, x: np.ndarray) -> np.ndarray: def forward(self, input_tensor: np.ndarray) -> np.ndarray:
""" """
Forward pass for the sliding window attention. Forward pass for the sliding window attention.
Args: Args:
x (np.ndarray): Input tensor of shape (batch_size, seq_length, embed_dim). input_tensor (np.ndarray): Input tensor of shape (batch_size, seq_length,
embed_dim).
Returns: Returns:
np.ndarray: Output tensor of shape (batch_size, seq_length, embed_dim). np.ndarray: Output tensor of shape (batch_size, seq_length, embed_dim).
@ -61,8 +66,8 @@ class SlidingWindowAttention:
>>> (output.sum() != 0).item() # Check if output is non-zero >>> (output.sum() != 0).item() # Check if output is non-zero
True True
""" """
batch_size, seq_length, _ = x.shape batch_size, seq_length, _ = input_tensor.shape
output = np.zeros_like(x) output = np.zeros_like(input_tensor)
for i in range(seq_length): for i in range(seq_length):
# Define the window range # Define the window range
@ -70,7 +75,7 @@ class SlidingWindowAttention:
end = min(seq_length, i + self.window_size // 2 + 1) end = min(seq_length, i + self.window_size // 2 + 1)
# Extract the local window # Extract the local window
local_window = x[:, start:end, :] local_window = input_tensor[:, start:end, :]
# Compute attention scores # Compute attention scores
attention_scores = np.matmul(local_window, self.attention_weights) attention_scores = np.matmul(local_window, self.attention_weights)
@ -86,10 +91,9 @@ if __name__ == "__main__":
doctest.testmod() doctest.testmod()
# Example usage # usage
x = np.random.randn( rng = np.random.default_rng()
2, 10, 4 x = rng.standard_normal((2, 10, 4)) # Batch size 2, sequence length 10, embedding dimension 4
) # Batch size 2, sequence length 10, embedding dimension 4
attention = SlidingWindowAttention(embed_dim=4, window_size=3) attention = SlidingWindowAttention(embed_dim=4, window_size=3)
output = attention.forward(x) output = attention.forward(x)
print(output) print(output)