From 3b8848430c1c0f8404b0404857b7debaa08e7473 Mon Sep 17 00:00:00 2001 From: Pritam Das <69068731+Pritam3355@users.noreply.github.com> Date: Sun, 20 Oct 2024 22:21:22 +0530 Subject: [PATCH] Add files via upload --- neural_network/sliding_window_attention.py | 28 +++++++++++----------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/neural_network/sliding_window_attention.py b/neural_network/sliding_window_attention.py index b329e4fce..54d3fec2c 100644 --- a/neural_network/sliding_window_attention.py +++ b/neural_network/sliding_window_attention.py @@ -1,8 +1,8 @@ """ - - - - - -- - - - - - - - - - - - - - - - - - - - - - - Name - - sliding_window_attention.py -Goal - - Implement a neural network architecture using sliding window attention for sequence - modeling tasks. +Goal - - Implement a neural network architecture using sliding + window attention for sequence modeling tasks. Detail: Total 5 layers neural network * Input layer * Sliding Window Attention Layer @@ -12,10 +12,12 @@ Author: Stephen Lee Github: 245885195@qq.com Date: 2024.10.20 References: - 1. Choromanska, A., et al. (2020). "On the Importance of Initialization and Momentum in - Deep Learning." *Proceedings of the 37th International Conference on Machine Learning*. - 2. Dai, Z., et al. (2020). "Transformers are RNNs: Fast Autoregressive Transformers - with Linear Attention." *arXiv preprint arXiv:2006.16236*. + 1. Choromanska, A., et al. (2020). "On the Importance of + Initialization and Momentum in Deep Learning." *Proceedings + of the 37th International Conference on Machine Learning*. + 2. Dai, Z., et al. (2020). "Transformers are RNNs: Fast + Autoregressive Transformers with Linear Attention." + *arXiv preprint arXiv:2006.16236*. 3. [Attention Mechanisms in Neural Networks](https://en.wikipedia.org/wiki/Attention_(machine_learning)) - - - - - -- - - - - - - - - - - - - - - - - - - - - - - """ @@ -26,8 +28,8 @@ import numpy as np class SlidingWindowAttention: """Sliding Window Attention Module. - This class implements a sliding window attention mechanism where the model - attends to a fixed-size window of context around each token. + This class implements a sliding window attention mechanism where + the model attends to a fixed-size window of context around each token. Attributes: window_size (int): The size of the attention window. @@ -52,13 +54,13 @@ class SlidingWindowAttention: Forward pass for the sliding window attention. Args: - input_tensor (np.ndarray): Input tensor of shape (batch_size, seq_length, - embed_dim). + input_tensor (np.ndarray): Input tensor of shape (batch_size, + seq_length, embed_dim). Returns: np.ndarray: Output tensor of shape (batch_size, seq_length, embed_dim). - >>> x = np.random.randn(2, 10, 4) # Batch size 2, sequence length 10, embedding dimension 4 + >>> x = np.random.randn(2, 10, 4) # Batch size 2, sequence >>> attention = SlidingWindowAttention(embed_dim=4, window_size=3) >>> output = attention.forward(x) >>> output.shape @@ -93,9 +95,7 @@ if __name__ == "__main__": # usage rng = np.random.default_rng() - x = rng.standard_normal( - (2, 10, 4) - ) # Batch size 2, sequence length 10, embedding dimension 4 + x = rng.standard_normal((2, 10, 4)) # Batch size 2, attention = SlidingWindowAttention(embed_dim=4, window_size=3) output = attention.forward(x) print(output)