mirror of
https://github.com/TheAlgorithms/Python.git
synced 2025-03-15 02:59:50 +00:00
Add files via upload
This commit is contained in:
parent
b3c2a73a10
commit
9e9a3131a3
@ -1,7 +1,8 @@
|
|||||||
"""
|
"""
|
||||||
- - - - - -- - - - - - - - - - - - - - - - - - - - - - -
|
- - - - - -- - - - - - - - - - - - - - - - - - - - - - -
|
||||||
Name - - sliding_window_attention.py
|
Name - - sliding_window_attention.py
|
||||||
Goal - - Implement a neural network architecture using sliding window attention for sequence modeling tasks.
|
Goal - - Implement a neural network architecture using sliding window attention for sequence
|
||||||
|
modeling tasks.
|
||||||
Detail: Total 5 layers neural network
|
Detail: Total 5 layers neural network
|
||||||
* Input layer
|
* Input layer
|
||||||
* Sliding Window Attention Layer
|
* Sliding Window Attention Layer
|
||||||
@ -11,8 +12,10 @@ Author: Stephen Lee
|
|||||||
Github: 245885195@qq.com
|
Github: 245885195@qq.com
|
||||||
Date: 2024.10.20
|
Date: 2024.10.20
|
||||||
References:
|
References:
|
||||||
1. Choromanska, A., et al. (2020). "On the Importance of Initialization and Momentum in Deep Learning." *Proceedings of the 37th International Conference on Machine Learning*.
|
1. Choromanska, A., et al. (2020). "On the Importance of Initialization and Momentum in
|
||||||
2. Dai, Z., et al. (2020). "Transformers are RNNs: Fast Autoregressive Transformers with Linear Attention." *arXiv preprint arXiv:2006.16236*.
|
Deep Learning." *Proceedings of the 37th International Conference on Machine Learning*.
|
||||||
|
2. Dai, Z., et al. (2020). "Transformers are RNNs: Fast Autoregressive Transformers
|
||||||
|
with Linear Attention." *arXiv preprint arXiv:2006.16236*.
|
||||||
3. [Attention Mechanisms in Neural Networks](https://en.wikipedia.org/wiki/Attention_(machine_learning))
|
3. [Attention Mechanisms in Neural Networks](https://en.wikipedia.org/wiki/Attention_(machine_learning))
|
||||||
- - - - - -- - - - - - - - - - - - - - - - - - - - - - -
|
- - - - - -- - - - - - - - - - - - - - - - - - - - - - -
|
||||||
"""
|
"""
|
||||||
@ -31,7 +34,7 @@ class SlidingWindowAttention:
|
|||||||
embed_dim (int): The dimensionality of the input embeddings.
|
embed_dim (int): The dimensionality of the input embeddings.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, embed_dim: int, window_size: int):
|
def __init__(self, embed_dim: int, window_size: int) -> None:
|
||||||
"""
|
"""
|
||||||
Initialize the SlidingWindowAttention module.
|
Initialize the SlidingWindowAttention module.
|
||||||
|
|
||||||
@ -41,14 +44,16 @@ class SlidingWindowAttention:
|
|||||||
"""
|
"""
|
||||||
self.window_size = window_size
|
self.window_size = window_size
|
||||||
self.embed_dim = embed_dim
|
self.embed_dim = embed_dim
|
||||||
self.attention_weights = np.random.randn(embed_dim, embed_dim)
|
rng = np.random.default_rng()
|
||||||
|
self.attention_weights = rng.standard_normal((embed_dim, embed_dim))
|
||||||
|
|
||||||
def forward(self, x: np.ndarray) -> np.ndarray:
|
def forward(self, input_tensor: np.ndarray) -> np.ndarray:
|
||||||
"""
|
"""
|
||||||
Forward pass for the sliding window attention.
|
Forward pass for the sliding window attention.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
x (np.ndarray): Input tensor of shape (batch_size, seq_length, embed_dim).
|
input_tensor (np.ndarray): Input tensor of shape (batch_size, seq_length,
|
||||||
|
embed_dim).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
np.ndarray: Output tensor of shape (batch_size, seq_length, embed_dim).
|
np.ndarray: Output tensor of shape (batch_size, seq_length, embed_dim).
|
||||||
@ -61,8 +66,8 @@ class SlidingWindowAttention:
|
|||||||
>>> (output.sum() != 0).item() # Check if output is non-zero
|
>>> (output.sum() != 0).item() # Check if output is non-zero
|
||||||
True
|
True
|
||||||
"""
|
"""
|
||||||
batch_size, seq_length, _ = x.shape
|
batch_size, seq_length, _ = input_tensor.shape
|
||||||
output = np.zeros_like(x)
|
output = np.zeros_like(input_tensor)
|
||||||
|
|
||||||
for i in range(seq_length):
|
for i in range(seq_length):
|
||||||
# Define the window range
|
# Define the window range
|
||||||
@ -70,7 +75,7 @@ class SlidingWindowAttention:
|
|||||||
end = min(seq_length, i + self.window_size // 2 + 1)
|
end = min(seq_length, i + self.window_size // 2 + 1)
|
||||||
|
|
||||||
# Extract the local window
|
# Extract the local window
|
||||||
local_window = x[:, start:end, :]
|
local_window = input_tensor[:, start:end, :]
|
||||||
|
|
||||||
# Compute attention scores
|
# Compute attention scores
|
||||||
attention_scores = np.matmul(local_window, self.attention_weights)
|
attention_scores = np.matmul(local_window, self.attention_weights)
|
||||||
@ -86,10 +91,9 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
doctest.testmod()
|
doctest.testmod()
|
||||||
|
|
||||||
# Example usage
|
# usage
|
||||||
x = np.random.randn(
|
rng = np.random.default_rng()
|
||||||
2, 10, 4
|
x = rng.standard_normal((2, 10, 4)) # Batch size 2, sequence length 10, embedding dimension 4
|
||||||
) # Batch size 2, sequence length 10, embedding dimension 4
|
|
||||||
attention = SlidingWindowAttention(embed_dim=4, window_size=3)
|
attention = SlidingWindowAttention(embed_dim=4, window_size=3)
|
||||||
output = attention.forward(x)
|
output = attention.forward(x)
|
||||||
print(output)
|
print(output)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user