mirror of
https://github.com/TheAlgorithms/Python.git
synced 2025-02-25 10:28:39 +00:00
Add files via upload
This commit is contained in:
parent
041571772e
commit
3b8848430c
@ -1,8 +1,8 @@
|
|||||||
"""
|
"""
|
||||||
- - - - - -- - - - - - - - - - - - - - - - - - - - - - -
|
- - - - - -- - - - - - - - - - - - - - - - - - - - - - -
|
||||||
Name - - sliding_window_attention.py
|
Name - - sliding_window_attention.py
|
||||||
Goal - - Implement a neural network architecture using sliding window attention for sequence
|
Goal - - Implement a neural network architecture using sliding
|
||||||
modeling tasks.
|
window attention for sequence modeling tasks.
|
||||||
Detail: Total 5 layers neural network
|
Detail: Total 5 layers neural network
|
||||||
* Input layer
|
* Input layer
|
||||||
* Sliding Window Attention Layer
|
* Sliding Window Attention Layer
|
||||||
@ -12,10 +12,12 @@ Author: Stephen Lee
|
|||||||
Github: 245885195@qq.com
|
Github: 245885195@qq.com
|
||||||
Date: 2024.10.20
|
Date: 2024.10.20
|
||||||
References:
|
References:
|
||||||
1. Choromanska, A., et al. (2020). "On the Importance of Initialization and Momentum in
|
1. Choromanska, A., et al. (2020). "On the Importance of
|
||||||
Deep Learning." *Proceedings of the 37th International Conference on Machine Learning*.
|
Initialization and Momentum in Deep Learning." *Proceedings
|
||||||
2. Dai, Z., et al. (2020). "Transformers are RNNs: Fast Autoregressive Transformers
|
of the 37th International Conference on Machine Learning*.
|
||||||
with Linear Attention." *arXiv preprint arXiv:2006.16236*.
|
2. Dai, Z., et al. (2020). "Transformers are RNNs: Fast
|
||||||
|
Autoregressive Transformers with Linear Attention."
|
||||||
|
*arXiv preprint arXiv:2006.16236*.
|
||||||
3. [Attention Mechanisms in Neural Networks](https://en.wikipedia.org/wiki/Attention_(machine_learning))
|
3. [Attention Mechanisms in Neural Networks](https://en.wikipedia.org/wiki/Attention_(machine_learning))
|
||||||
- - - - - -- - - - - - - - - - - - - - - - - - - - - - -
|
- - - - - -- - - - - - - - - - - - - - - - - - - - - - -
|
||||||
"""
|
"""
|
||||||
@ -26,8 +28,8 @@ import numpy as np
|
|||||||
class SlidingWindowAttention:
|
class SlidingWindowAttention:
|
||||||
"""Sliding Window Attention Module.
|
"""Sliding Window Attention Module.
|
||||||
|
|
||||||
This class implements a sliding window attention mechanism where the model
|
This class implements a sliding window attention mechanism where
|
||||||
attends to a fixed-size window of context around each token.
|
the model attends to a fixed-size window of context around each token.
|
||||||
|
|
||||||
Attributes:
|
Attributes:
|
||||||
window_size (int): The size of the attention window.
|
window_size (int): The size of the attention window.
|
||||||
@ -52,13 +54,13 @@ class SlidingWindowAttention:
|
|||||||
Forward pass for the sliding window attention.
|
Forward pass for the sliding window attention.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
input_tensor (np.ndarray): Input tensor of shape (batch_size, seq_length,
|
input_tensor (np.ndarray): Input tensor of shape (batch_size,
|
||||||
embed_dim).
|
seq_length, embed_dim).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
np.ndarray: Output tensor of shape (batch_size, seq_length, embed_dim).
|
np.ndarray: Output tensor of shape (batch_size, seq_length, embed_dim).
|
||||||
|
|
||||||
>>> x = np.random.randn(2, 10, 4) # Batch size 2, sequence length 10, embedding dimension 4
|
>>> x = np.random.randn(2, 10, 4) # Batch size 2, sequence
|
||||||
>>> attention = SlidingWindowAttention(embed_dim=4, window_size=3)
|
>>> attention = SlidingWindowAttention(embed_dim=4, window_size=3)
|
||||||
>>> output = attention.forward(x)
|
>>> output = attention.forward(x)
|
||||||
>>> output.shape
|
>>> output.shape
|
||||||
@ -93,9 +95,7 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
# usage
|
# usage
|
||||||
rng = np.random.default_rng()
|
rng = np.random.default_rng()
|
||||||
x = rng.standard_normal(
|
x = rng.standard_normal((2, 10, 4)) # Batch size 2,
|
||||||
(2, 10, 4)
|
|
||||||
) # Batch size 2, sequence length 10, embedding dimension 4
|
|
||||||
attention = SlidingWindowAttention(embed_dim=4, window_size=3)
|
attention = SlidingWindowAttention(embed_dim=4, window_size=3)
|
||||||
output = attention.forward(x)
|
output = attention.forward(x)
|
||||||
print(output)
|
print(output)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user