Add files via upload

This commit is contained in:
Pritam Das 2024-10-20 22:21:22 +05:30 committed by GitHub
parent 041571772e
commit 3b8848430c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,8 +1,8 @@
""" """
- - - - - -- - - - - - - - - - - - - - - - - - - - - - - - - - - - -- - - - - - - - - - - - - - - - - - - - - - -
Name - - sliding_window_attention.py Name - - sliding_window_attention.py
Goal - - Implement a neural network architecture using sliding window attention for sequence Goal - - Implement a neural network architecture using sliding
modeling tasks. window attention for sequence modeling tasks.
Detail: Total 5 layers neural network Detail: Total 5 layers neural network
* Input layer * Input layer
* Sliding Window Attention Layer * Sliding Window Attention Layer
@ -12,10 +12,12 @@ Author: Stephen Lee
Github: 245885195@qq.com Github: 245885195@qq.com
Date: 2024.10.20 Date: 2024.10.20
References: References:
1. Choromanska, A., et al. (2020). "On the Importance of Initialization and Momentum in 1. Choromanska, A., et al. (2020). "On the Importance of
Deep Learning." *Proceedings of the 37th International Conference on Machine Learning*. Initialization and Momentum in Deep Learning." *Proceedings
2. Dai, Z., et al. (2020). "Transformers are RNNs: Fast Autoregressive Transformers of the 37th International Conference on Machine Learning*.
with Linear Attention." *arXiv preprint arXiv:2006.16236*. 2. Dai, Z., et al. (2020). "Transformers are RNNs: Fast
Autoregressive Transformers with Linear Attention."
*arXiv preprint arXiv:2006.16236*.
3. [Attention Mechanisms in Neural Networks](https://en.wikipedia.org/wiki/Attention_(machine_learning)) 3. [Attention Mechanisms in Neural Networks](https://en.wikipedia.org/wiki/Attention_(machine_learning))
- - - - - -- - - - - - - - - - - - - - - - - - - - - - - - - - - - -- - - - - - - - - - - - - - - - - - - - - - -
""" """
@ -26,8 +28,8 @@ import numpy as np
class SlidingWindowAttention: class SlidingWindowAttention:
"""Sliding Window Attention Module. """Sliding Window Attention Module.
This class implements a sliding window attention mechanism where the model This class implements a sliding window attention mechanism where
attends to a fixed-size window of context around each token. the model attends to a fixed-size window of context around each token.
Attributes: Attributes:
window_size (int): The size of the attention window. window_size (int): The size of the attention window.
@ -52,13 +54,13 @@ class SlidingWindowAttention:
Forward pass for the sliding window attention. Forward pass for the sliding window attention.
Args: Args:
input_tensor (np.ndarray): Input tensor of shape (batch_size, seq_length, input_tensor (np.ndarray): Input tensor of shape (batch_size,
embed_dim). seq_length, embed_dim).
Returns: Returns:
np.ndarray: Output tensor of shape (batch_size, seq_length, embed_dim). np.ndarray: Output tensor of shape (batch_size, seq_length, embed_dim).
>>> x = np.random.randn(2, 10, 4) # Batch size 2, sequence length 10, embedding dimension 4 >>> x = np.random.randn(2, 10, 4) # Batch size 2, sequence
>>> attention = SlidingWindowAttention(embed_dim=4, window_size=3) >>> attention = SlidingWindowAttention(embed_dim=4, window_size=3)
>>> output = attention.forward(x) >>> output = attention.forward(x)
>>> output.shape >>> output.shape
@ -93,9 +95,7 @@ if __name__ == "__main__":
# usage # usage
rng = np.random.default_rng() rng = np.random.default_rng()
x = rng.standard_normal( x = rng.standard_normal((2, 10, 4)) # Batch size 2,
(2, 10, 4)
) # Batch size 2, sequence length 10, embedding dimension 4
attention = SlidingWindowAttention(embed_dim=4, window_size=3) attention = SlidingWindowAttention(embed_dim=4, window_size=3)
output = attention.forward(x) output = attention.forward(x)
print(output) print(output)