From 47ddba1d914bf5955a244056e794e718dee9ead1 Mon Sep 17 00:00:00 2001 From: Kushagra Makharia Date: Sat, 29 Oct 2022 21:08:40 +0530 Subject: [PATCH] Added cosine similarity (#7001) * Added cosine similarity * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- machine_learning/similarity_search.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/machine_learning/similarity_search.py b/machine_learning/similarity_search.py index 2f5fc46c0..72979181f 100644 --- a/machine_learning/similarity_search.py +++ b/machine_learning/similarity_search.py @@ -12,6 +12,7 @@ from __future__ import annotations import math import numpy as np +from numpy.linalg import norm def euclidean(input_a: np.ndarray, input_b: np.ndarray) -> float: @@ -135,6 +136,22 @@ def similarity_search( return answer +def cosine_similarity(input_a: np.ndarray, input_b: np.ndarray) -> float: + """ + Calculates cosine similarity between two data. + :param input_a: ndarray of first vector. + :param input_b: ndarray of second vector. + :return: Cosine similarity of input_a and input_b. By using math.sqrt(), + result will be float. + + >>> cosine_similarity(np.array([1]), np.array([1])) + 1.0 + >>> cosine_similarity(np.array([1, 2]), np.array([6, 32])) + 0.9615239476408232 + """ + return np.dot(input_a, input_b) / (norm(input_a) * norm(input_b)) + + if __name__ == "__main__": import doctest