Merge 400f4ea124 into fcf82a1eda

Implemented Exponential Search with binary search for improved perfor… (#11666 )
* Implemented Exponential Search with binary search for improved performance on large sorted arrays. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Added type hints and doctests for binary_search and exponential_search functions. Improved code documentation and ensured testability. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update and rename Exponential_Search.py to exponential_search.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2024-10-05 21:29:29 +00:00 · 2024-10-06 00:08:01 +05:30 · 2024-10-05 10:34:48 -07:00 · 2024-10-05 10:24:58 -07:00 · 2024-10-05 22:54:53 +05:30 · 2024-10-05 10:21:43 -07:00
5 changed files with 469 additions and 1 deletions
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -96,7 +96,7 @@ We want your work to be readable by others; therefore, we encourage you to note

  ```bash
  python3 -m pip install ruff  # only required the first time
-  ruff .
+  ruff check
  ```

 - Original code submission require docstrings or comments to describe your work.
--- a/data_structures/stacks/lexicographical_numbers.py
+++ b/data_structures/stacks/lexicographical_numbers.py
@ -0,0 +1,38 @@
+from collections.abc import Iterator
+
+
+def lexical_order(max_number: int) -> Iterator[int]:
+    """
+    Generate numbers in lexical order from 1 to max_number.
+
+    >>> " ".join(map(str, lexical_order(13)))
+    '1 10 11 12 13 2 3 4 5 6 7 8 9'
+    >>> list(lexical_order(1))
+    [1]
+    >>> " ".join(map(str, lexical_order(20)))
+    '1 10 11 12 13 14 15 16 17 18 19 2 20 3 4 5 6 7 8 9'
+    >>> " ".join(map(str, lexical_order(25)))
+    '1 10 11 12 13 14 15 16 17 18 19 2 20 21 22 23 24 25 3 4 5 6 7 8 9'
+    >>> list(lexical_order(12))
+    [1, 10, 11, 12, 2, 3, 4, 5, 6, 7, 8, 9]
+    """
+
+    stack = [1]
+
+    while stack:
+        num = stack.pop()
+        if num > max_number:
+            continue
+
+        yield num
+        if (num % 10) != 9:
+            stack.append(num + 1)
+
+        stack.append(num * 10)
+
+
+if __name__ == "__main__":
+    from doctest import testmod
+
+    testmod()
+    print(f"Numbers from 1 to 25 in lexical order: {list(lexical_order(26))}")
--- a/dynamic_programming/longest_common_subsequence.py
+++ b/dynamic_programming/longest_common_subsequence.py
@ -28,6 +28,24 @@ def longest_common_subsequence(x: str, y: str):
    (2, 'ph')
    >>> longest_common_subsequence("computer", "food")
    (1, 'o')
+    >>> longest_common_subsequence("", "abc")  # One string is empty
+    (0, '')
+    >>> longest_common_subsequence("abc", "")  # Other string is empty
+    (0, '')
+    >>> longest_common_subsequence("", "")  # Both strings are empty
+    (0, '')
+    >>> longest_common_subsequence("abc", "def")  # No common subsequence
+    (0, '')
+    >>> longest_common_subsequence("abc", "abc")  # Identical strings
+    (3, 'abc')
+    >>> longest_common_subsequence("a", "a")  # Single character match
+    (1, 'a')
+    >>> longest_common_subsequence("a", "b")  # Single character no match
+    (0, '')
+    >>> longest_common_subsequence("abcdef", "ace")  # Interleaved subsequence
+    (3, 'ace')
+    >>> longest_common_subsequence("ABCD", "ACBD")  # No repeated characters
+    (3, 'ABD')
    """
    # find the length of strings

--- a/machine_learning/dbscan.py
+++ b/machine_learning/dbscan.py
@ -0,0 +1,299 @@
+"""
+
+Author : Gowtham Kamalasekar
+LinkedIn : https://www.linkedin.com/in/gowtham-kamalasekar/
+
+"""
+
+import math
+
+import matplotlib.patches as mpatches
+import matplotlib.pyplot as plt
+import pandas as pd
+
+
+class DbScan:
+    """
+    DBSCAN Algorithm :
+    Density-Based Spatial Clustering Of Applications With Noise
+    Refer this website for more details : https://en.wikipedia.org/wiki/DBSCAN
+
+    Functions:
+    ----------
+        __init__()       : Constructor that sets minPts, radius and file
+        perform_dbscan() : Invoked by constructor and calculates the core
+                            and noise points and returns a dictionary.
+        print_dbscan()   : Prints the core and noise points along
+                           with stating if the noise are border points or not.
+        plot_dbscan()    : Plots the points to show the core and noise point.
+
+    To create a object
+    ------------------
+    import dbscan
+    obj = dbscan.DbScan(minpts, radius, file)
+    obj.print_dbscan()
+    obj.plot_dbscan()
+    """
+
+    def __init__(
+        self,
+        minpts: int,
+        radius: int,
+        file: str = "None",
+    ) -> None:
+        """
+        Constructor
+
+        Args:
+        -----------
+            minpts (int) : Minimum number of points needed to be
+                           within the radius to considered as core
+            radius (int) : The radius from a given core point where
+                           other core points can be considered as core
+            file (csv)   : CSV file location. Should contain x and y
+                           coordinate value for each point.
+
+            Example :
+            minPts = 4
+            radius = 1.9
+            file = 'data_dbscan.csv'
+
+            File Structure of CSV Data:
+            ---------------------------
+            _____
+            x | y
+            -----
+            3 | 7
+            4 | 6
+            5 | 5
+            6 | 4
+            7 | 3
+            -----
+        """
+        self.minpts = minpts
+        self.radius = radius
+        self.file = (
+            file
+            if file != "None"
+            else (
+                {"x": 3, "y": 7},
+                {"x": 4, "y": 6},
+                {"x": 5, "y": 5},
+                {"x": 6, "y": 4},
+                {"x": 7, "y": 3},
+                {"x": 6, "y": 2},
+                {"x": 7, "y": 2},
+                {"x": 8, "y": 4},
+                {"x": 3, "y": 3},
+                {"x": 2, "y": 6},
+                {"x": 3, "y": 5},
+                {"x": 2, "y": 4},
+            )
+        )
+        self.dict1 = self.perform_dbscan()
+
+    def perform_dbscan(self) -> dict[int, list[int]]:
+        """
+        Args:
+        -----------
+            None
+
+        Return:
+        --------
+            Dictionary with points and the list
+            of points that lie in its radius
+
+        >>> result = DbScan(4, 1.9).perform_dbscan()
+        >>> for key in sorted(result):
+        ...     print(key, sorted(result[key]))
+        1 [1, 2, 10]
+        2 [1, 2, 3, 11]
+        3 [2, 3, 4]
+        4 [3, 4, 5]
+        5 [4, 5, 6, 7, 8]
+        6 [5, 6, 7]
+        7 [5, 6, 7]
+        8 [5, 8]
+        9 [9, 12]
+        10 [1, 10, 11]
+        11 [2, 10, 11, 12]
+        12 [9, 11, 12]
+
+        >>> result = DbScan(3, 2.5).perform_dbscan()
+        >>> for key in sorted(result):
+        ...     print(key, sorted(result[key]))
+        1 [1, 2, 10, 11]
+        2 [1, 2, 3, 10, 11]
+        3 [2, 3, 4, 11]
+        4 [3, 4, 5, 6, 7, 8]
+        5 [4, 5, 6, 7, 8]
+        6 [4, 5, 6, 7]
+        7 [4, 5, 6, 7, 8]
+        8 [4, 5, 7, 8]
+        9 [9, 11, 12]
+        10 [1, 2, 10, 11, 12]
+        11 [1, 2, 3, 9, 10, 11, 12]
+        12 [9, 10, 11, 12]
+
+        >>> result = DbScan(5, 2.5).perform_dbscan()
+        >>> for key in sorted(result):
+        ...     print(key, sorted(result[key]))
+        1 [1, 2, 10, 11]
+        2 [1, 2, 3, 10, 11]
+        3 [2, 3, 4, 11]
+        4 [3, 4, 5, 6, 7, 8]
+        5 [4, 5, 6, 7, 8]
+        6 [4, 5, 6, 7]
+        7 [4, 5, 6, 7, 8]
+        8 [4, 5, 7, 8]
+        9 [9, 11, 12]
+        10 [1, 2, 10, 11, 12]
+        11 [1, 2, 3, 9, 10, 11, 12]
+        12 [9, 10, 11, 12]
+
+        """
+        if type(self.file) is str:
+            data = pd.read_csv(self.file)
+        else:
+            data = pd.DataFrame(list(self.file))
+        e = self.radius
+        dict1: dict[int, list[int]] = {}
+        for i in range(len(data)):
+            for j in range(len(data)):
+                dist = math.sqrt(
+                    pow(data["x"][j] - data["x"][i], 2)
+                    + pow(data["y"][j] - data["y"][i], 2)
+                )
+                if dist < e:
+                    if i + 1 in dict1:
+                        dict1[i + 1].append(j + 1)
+                    else:
+                        dict1[i + 1] = [
+                            j + 1,
+                        ]
+        return dict1
+
+    def print_dbscan(self) -> None:
+        """
+        Outputs:
+        --------
+        Prints each point and if it is a core or a noise (w/ border)
+
+        >>> DbScan(4,1.9).print_dbscan()
+        1   [1, 2, 10] ---> Noise ---> Border
+        2   [1, 2, 3, 11] ---> Core
+        3   [2, 3, 4] ---> Noise ---> Border
+        4   [3, 4, 5] ---> Noise ---> Border
+        5   [4, 5, 6, 7, 8] ---> Core
+        6   [5, 6, 7] ---> Noise ---> Border
+        7   [5, 6, 7] ---> Noise ---> Border
+        8   [5, 8] ---> Noise ---> Border
+        9   [9, 12] ---> Noise
+        10   [1, 10, 11] ---> Noise ---> Border
+        11   [2, 10, 11, 12] ---> Core
+        12   [9, 11, 12] ---> Noise ---> Border
+
+        >>> DbScan(5,2.5).print_dbscan()
+        1   [1, 2, 10, 11] ---> Noise ---> Border
+        2   [1, 2, 3, 10, 11] ---> Core
+        3   [2, 3, 4, 11] ---> Noise ---> Border
+        4   [3, 4, 5, 6, 7, 8] ---> Core
+        5   [4, 5, 6, 7, 8] ---> Core
+        6   [4, 5, 6, 7] ---> Noise ---> Border
+        7   [4, 5, 6, 7, 8] ---> Core
+        8   [4, 5, 7, 8] ---> Noise ---> Border
+        9   [9, 11, 12] ---> Noise ---> Border
+        10   [1, 2, 10, 11, 12] ---> Core
+        11   [1, 2, 3, 9, 10, 11, 12] ---> Core
+        12   [9, 10, 11, 12] ---> Noise ---> Border
+
+        >>> DbScan(2,0.5).print_dbscan()
+        1   [1] ---> Noise
+        2   [2] ---> Noise
+        3   [3] ---> Noise
+        4   [4] ---> Noise
+        5   [5] ---> Noise
+        6   [6] ---> Noise
+        7   [7] ---> Noise
+        8   [8] ---> Noise
+        9   [9] ---> Noise
+        10   [10] ---> Noise
+        11   [11] ---> Noise
+        12   [12] ---> Noise
+
+        """
+        for i in self.dict1:
+            print(i, " ", self.dict1[i], end=" ---> ")
+            if len(self.dict1[i]) >= self.minpts:
+                print("Core")
+            else:
+                for j in self.dict1:
+                    if (
+                        i != j
+                        and len(self.dict1[j]) >= self.minpts
+                        and i in self.dict1[j]
+                    ):
+                        print("Noise ---> Border")
+                        break
+                else:
+                    print("Noise")
+
+    def plot_dbscan(self) -> None:
+        """
+        Output:
+        -------
+        A matplotlib plot that show points as core and noise along
+        with the circle that lie within it.
+
+        >>> DbScan(4,1.9).plot_dbscan()
+        Plotted Successfully
+
+        >>> DbScan(5,2.5).plot_dbscan()
+        Plotted Successfully
+
+        >>> DbScan(5,2.5).plot_dbscan()
+        Plotted Successfully
+
+        """
+        if type(self.file) is str:
+            data = pd.read_csv(self.file)
+        else:
+            data = pd.DataFrame(list(self.file))
+        e = self.radius
+        for i in self.dict1:
+            if len(self.dict1[i]) >= self.minpts:
+                plt.scatter(data["x"][i - 1], data["y"][i - 1], color="red")
+                circle = plt.Circle(
+                    (data["x"][i - 1], data["y"][i - 1]), e, color="blue", fill=False
+                )
+                plt.gca().add_artist(circle)
+                plt.text(
+                    data["x"][i - 1],
+                    data["y"][i - 1],
+                    "P" + str(i),
+                    ha="center",
+                    va="bottom",
+                )
+            else:
+                plt.scatter(data["x"][i - 1], data["y"][i - 1], color="green")
+                plt.text(
+                    data["x"][i - 1],
+                    data["y"][i - 1],
+                    "P" + str(i),
+                    ha="center",
+                    va="bottom",
+                )
+        core_legend = mpatches.Patch(color="red", label="Core")
+        noise_legend = mpatches.Patch(color="green", label="Noise")
+        plt.xlabel("X")
+        plt.ylabel("Y")
+        plt.title("DBSCAN Clustering")
+        plt.legend(handles=[core_legend, noise_legend])
+        plt.show()
+        print("Plotted Successfully")
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod()
--- a/searches/exponential_search.py
+++ b/searches/exponential_search.py
@ -0,0 +1,113 @@
+#!/usr/bin/env python3
+
+"""
+Pure Python implementation of exponential search algorithm
+
+For more information, see the Wikipedia page:
+https://en.wikipedia.org/wiki/Exponential_search
+
+For doctests run the following command:
+python3 -m doctest -v exponential_search.py
+
+For manual testing run:
+python3 exponential_search.py
+"""
+
+from __future__ import annotations
+
+
+def binary_search_by_recursion(
+    sorted_collection: list[int], item: int, left: int = 0, right: int = -1
+) -> int:
+    """Pure implementation of binary search algorithm in Python using recursion
+
+    Be careful: the collection must be ascending sorted otherwise, the result will be
+    unpredictable.
+
+    :param sorted_collection: some ascending sorted collection with comparable items
+    :param item: item value to search
+    :param left: starting index for the search
+    :param right: ending index for the search
+    :return: index of the found item or -1 if the item is not found
+
+    Examples:
+    >>> binary_search_by_recursion([0, 5, 7, 10, 15], 0, 0, 4)
+    0
+    >>> binary_search_by_recursion([0, 5, 7, 10, 15], 15, 0, 4)
+    4
+    >>> binary_search_by_recursion([0, 5, 7, 10, 15], 5, 0, 4)
+    1
+    >>> binary_search_by_recursion([0, 5, 7, 10, 15], 6, 0, 4)
+    -1
+    """
+    if right < 0:
+        right = len(sorted_collection) - 1
+    if list(sorted_collection) != sorted(sorted_collection):
+        raise ValueError("sorted_collection must be sorted in ascending order")
+    if right < left:
+        return -1
+
+    midpoint = left + (right - left) // 2
+
+    if sorted_collection[midpoint] == item:
+        return midpoint
+    elif sorted_collection[midpoint] > item:
+        return binary_search_by_recursion(sorted_collection, item, left, midpoint - 1)
+    else:
+        return binary_search_by_recursion(sorted_collection, item, midpoint + 1, right)
+
+
+def exponential_search(sorted_collection: list[int], item: int) -> int:
+    """
+    Pure implementation of an exponential search algorithm in Python.
+    For more information, refer to:
+    https://en.wikipedia.org/wiki/Exponential_search
+
+    Be careful: the collection must be ascending sorted, otherwise the result will be
+    unpredictable.
+
+    :param sorted_collection: some ascending sorted collection with comparable items
+    :param item: item value to search
+    :return: index of the found item or -1 if the item is not found
+
+    The time complexity of this algorithm is O(log i) where i is the index of the item.
+
+    Examples:
+    >>> exponential_search([0, 5, 7, 10, 15], 0)
+    0
+    >>> exponential_search([0, 5, 7, 10, 15], 15)
+    4
+    >>> exponential_search([0, 5, 7, 10, 15], 5)
+    1
+    >>> exponential_search([0, 5, 7, 10, 15], 6)
+    -1
+    """
+    if list(sorted_collection) != sorted(sorted_collection):
+        raise ValueError("sorted_collection must be sorted in ascending order")
+
+    if sorted_collection[0] == item:
+        return 0
+
+    bound = 1
+    while bound < len(sorted_collection) and sorted_collection[bound] < item:
+        bound *= 2
+
+    left = bound // 2
+    right = min(bound, len(sorted_collection) - 1)
+    return binary_search_by_recursion(sorted_collection, item, left, right)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod()
+
+    # Manual testing
+    user_input = input("Enter numbers separated by commas: ").strip()
+    collection = sorted(int(item) for item in user_input.split(","))
+    target = int(input("Enter a number to search for: "))
+    result = exponential_search(sorted_collection=collection, item=target)
+    if result == -1:
+        print(f"{target} was not found in {collection}.")
+    else:
+        print(f"{target} was found at index {result} in {collection}.")
Author	SHA1	Message	Date
Gowtham Kamalasekar	4a7da09039	Merge `400f4ea124` into `fcf82a1eda`	2024-10-06 00:08:01 +05:30
Vineet Kumar	fcf82a1eda	Implemented Exponential Search with binary search for improved perfor… (#11666 ) * Implemented Exponential Search with binary search for improved performance on large sorted arrays. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Added type hints and doctests for binary_search and exponential_search functions. Improved code documentation and ensured testability. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update and rename Exponential_Search.py to exponential_search.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>	2024-10-05 10:34:48 -07:00
Andrey Ivanov	ad6395d340	Update ruff usage example in CONTRIBUTING.md (#11772 ) * Update ruff usage example * Update CONTRIBUTING.md Co-authored-by: Tianyi Zheng <tianyizheng02@gmail.com> --------- Co-authored-by: Tianyi Zheng <tianyizheng02@gmail.com>	2024-10-05 10:24:58 -07:00
Gowtham Kamalasekar	400f4ea124	Merge branch 'TheAlgorithms:master' into master	2024-10-05 22:54:53 +05:30
Jeel Rupapara	50aca04c67	feat: increase test coverage of longest_common_subsequence to 75% (#11777 )	2024-10-05 10:21:43 -07:00
1227haran	5a8655d306	Added new algorithm to generate numbers in lexicographical order (#11674 ) * Added algorithm to generate numbers in lexicographical order * Removed the test cases * Updated camelcase to snakecase * Added doctest * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Added descriptive name for n * Reduced the number of letters * Updated the return type * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Updated import statement * Updated return type to Iterator[int] * removed parentheses --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>	2024-10-05 10:19:58 -07:00
tkgowtham	36700a13ee	Update dbscan.py with more test cases	2024-10-02 21:14:43 +05:30
tkgowtham	94caecf36e	Update dbscan.py with annotation for dict1	2024-10-02 20:12:01 +05:30
tkgowtham	3fa1d18426	Update dbscan.py removed typing	2024-10-02 20:07:45 +05:30
tkgowtham	d49fea0cc6	Changed typing accordingly	2024-10-02 20:03:34 +05:30
tkgowtham	7c76e5c992	full final update of dbscan	2024-10-02 19:53:08 +05:30
pre-commit-ci[bot]	5e148f524d	[pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci	2024-10-02 14:12:57 +00:00
tkgowtham	cd539e378d	Final Update of DBSCAN	2024-10-02 19:42:19 +05:30
tkgowtham	59f4a0e046	Delete machine_learning/dbscan.py	2024-10-02 19:41:32 +05:30
pre-commit-ci[bot]	d2dbdc1136	[pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci	2024-10-02 11:07:02 +00:00
tkgowtham	0708d4b851	Update final5 dbscan.py	2024-10-02 16:36:03 +05:30
pre-commit-ci[bot]	61beb79437	[pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci	2024-10-02 11:03:29 +00:00
tkgowtham	e13b9d9ef2	Update final3 dbscan.py	2024-10-02 16:33:05 +05:30
pre-commit-ci[bot]	0b6579460e	[pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci	2024-10-02 10:55:14 +00:00
tkgowtham	8b4d5e8338	Update final2 dbscan.py	2024-10-02 16:24:52 +05:30
pre-commit-ci[bot]	67ccda1f0e	[pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci	2024-10-02 10:41:26 +00:00
tkgowtham	ab2822788e	Update Final dbscan.py	2024-10-02 16:10:40 +05:30
pre-commit-ci[bot]	4d76e8236b	[pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci	2024-10-02 10:33:31 +00:00
tkgowtham	249b0e8871	Update dbscan.py	2024-10-02 16:03:06 +05:30
pre-commit-ci[bot]	b7e5e9c112	[pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci	2024-10-02 10:18:49 +00:00
tkgowtham	254854e832	Update dbscan.py	2024-10-02 15:43:16 +05:30
tkgowtham	8d86c62d56	Merge branch 'TheAlgorithms:master' into master	2024-10-02 15:40:31 +05:30
pre-commit-ci[bot]	a393075ede	[pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci	2024-10-01 15:51:04 +00:00
tkgowtham	12ac966b63	Update dbscan.py	2024-10-01 21:20:12 +05:30
pre-commit-ci[bot]	d61809015b	[pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci	2024-10-01 15:44:15 +00:00
tkgowtham	49e9f614f5	Update dbscan.py	2024-10-01 21:13:40 +05:30
pre-commit-ci[bot]	b526b4d4eb	[pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci	2024-10-01 15:27:03 +00:00
tkgowtham	da81c073eb	Update and rename DBSCAN.py to dbscan.py	2024-10-01 20:49:14 +05:30
tkgowtham	e107d6d5d0	Implementation of DBSCAN from Scratch	2024-10-01 20:06:58 +05:30