Merge branch 'TheAlgorithms:master' into master

2025-01-18 16:27:02 +00:00 · 2024-10-05 22:54:53 +05:30 · 2024-10-05 22:54:53 +05:30 · 400f4ea124
commit 400f4ea124
parent 36700a13ee 50aca04c67
9 changed files with 391 additions and 42 deletions
--- a/backtracking/word_break.py
+++ b/backtracking/word_break.py
@ -0,0 +1,71 @@
+"""
+Word Break Problem is a well-known problem in computer science.
+Given a string and a dictionary of words, the task is to determine if
+the string can be segmented into a sequence of one or more dictionary words.
+
+Wikipedia: https://en.wikipedia.org/wiki/Word_break_problem
+"""
+
+
+def backtrack(input_string: str, word_dict: set[str], start: int) -> bool:
+    """
+    Helper function that uses backtracking to determine if a valid
+    word segmentation is possible starting from index 'start'.
+
+    Parameters:
+    input_string (str): The input string to be segmented.
+    word_dict (set[str]): A set of valid dictionary words.
+    start (int): The starting index of the substring to be checked.
+
+    Returns:
+    bool: True if a valid segmentation is possible, otherwise False.
+
+    Example:
+    >>> backtrack("leetcode", {"leet", "code"}, 0)
+    True
+
+    >>> backtrack("applepenapple", {"apple", "pen"}, 0)
+    True
+
+    >>> backtrack("catsandog", {"cats", "dog", "sand", "and", "cat"}, 0)
+    False
+    """
+
+    # Base case: if the starting index has reached the end of the string
+    if start == len(input_string):
+        return True
+
+    # Try every possible substring from 'start' to 'end'
+    for end in range(start + 1, len(input_string) + 1):
+        if input_string[start:end] in word_dict and backtrack(
+            input_string, word_dict, end
+        ):
+            return True
+
+    return False
+
+
+def word_break(input_string: str, word_dict: set[str]) -> bool:
+    """
+    Determines if the input string can be segmented into a sequence of
+    valid dictionary words using backtracking.
+
+    Parameters:
+    input_string (str): The input string to segment.
+    word_dict (set[str]): The set of valid words.
+
+    Returns:
+    bool: True if the string can be segmented into valid words, otherwise False.
+
+    Example:
+    >>> word_break("leetcode", {"leet", "code"})
+    True
+
+    >>> word_break("applepenapple", {"apple", "pen"})
+    True
+
+    >>> word_break("catsandog", {"cats", "dog", "sand", "and", "cat"})
+    False
+    """
+
+    return backtrack(input_string, word_dict, 0)
--- a/data_structures/linked_list/has_loop.py
+++ b/data_structures/linked_list/has_loop.py
@ -14,11 +14,11 @@ class Node:

    def __iter__(self):
        node = self
-        visited = []
+        visited = set()
        while node:
            if node in visited:
                raise ContainsLoopError
-            visited.append(node)
+            visited.add(node)
            yield node.data
            node = node.next_node

--- a/data_structures/stacks/lexicographical_numbers.py
+++ b/data_structures/stacks/lexicographical_numbers.py
@ -0,0 +1,38 @@
+from collections.abc import Iterator
+
+
+def lexical_order(max_number: int) -> Iterator[int]:
+    """
+    Generate numbers in lexical order from 1 to max_number.
+
+    >>> " ".join(map(str, lexical_order(13)))
+    '1 10 11 12 13 2 3 4 5 6 7 8 9'
+    >>> list(lexical_order(1))
+    [1]
+    >>> " ".join(map(str, lexical_order(20)))
+    '1 10 11 12 13 14 15 16 17 18 19 2 20 3 4 5 6 7 8 9'
+    >>> " ".join(map(str, lexical_order(25)))
+    '1 10 11 12 13 14 15 16 17 18 19 2 20 21 22 23 24 25 3 4 5 6 7 8 9'
+    >>> list(lexical_order(12))
+    [1, 10, 11, 12, 2, 3, 4, 5, 6, 7, 8, 9]
+    """
+
+    stack = [1]
+
+    while stack:
+        num = stack.pop()
+        if num > max_number:
+            continue
+
+        yield num
+        if (num % 10) != 9:
+            stack.append(num + 1)
+
+        stack.append(num * 10)
+
+
+if __name__ == "__main__":
+    from doctest import testmod
+
+    testmod()
+    print(f"Numbers from 1 to 25 in lexical order: {list(lexical_order(26))}")
--- a/data_structures/stacks/next_greater_element.py
+++ b/data_structures/stacks/next_greater_element.py
@ -6,9 +6,20 @@ expect = [-5, 0, 5, 5.1, 11, 13, 21, -1, 4, -1, -10, -5, -1, 0, -1]

 def next_greatest_element_slow(arr: list[float]) -> list[float]:
    """
-    Get the Next Greatest Element (NGE) for all elements in a list.
-    Maximum element present after the current one which is also greater than the
-    current one.
+    Get the Next Greatest Element (NGE) for each element in the array
+    by checking all subsequent elements to find the next greater one.
+
+    This is a brute-force implementation, and it has a time complexity
+    of O(n^2), where n is the size of the array.
+
+    Args:
+        arr: List of numbers for which the NGE is calculated.
+
+    Returns:
+        List containing the next greatest elements. If no
+        greater element is found, -1 is placed in the result.
+
+    Example:
    >>> next_greatest_element_slow(arr) == expect
    True
    """
@ -28,9 +39,21 @@ def next_greatest_element_slow(arr: list[float]) -> list[float]:

 def next_greatest_element_fast(arr: list[float]) -> list[float]:
    """
-    Like next_greatest_element_slow() but changes the loops to use
-    enumerate() instead of range(len()) for the outer loop and
-    for in a slice of arr for the inner loop.
+    Find the Next Greatest Element (NGE) for each element in the array
+    using a more readable approach. This implementation utilizes
+    enumerate() for the outer loop and slicing for the inner loop.
+
+    While this improves readability over next_greatest_element_slow(),
+    it still has a time complexity of O(n^2).
+
+    Args:
+        arr: List of numbers for which the NGE is calculated.
+
+    Returns:
+        List containing the next greatest elements. If no
+        greater element is found, -1 is placed in the result.
+
+    Example:
    >>> next_greatest_element_fast(arr) == expect
    True
    """
@ -47,14 +70,23 @@ def next_greatest_element_fast(arr: list[float]) -> list[float]:

 def next_greatest_element(arr: list[float]) -> list[float]:
    """
-    Get the Next Greatest Element (NGE) for all elements in a list.
-    Maximum element present after the current one which is also greater than the
-    current one.
+    Efficient solution to find the Next Greatest Element (NGE) for all elements
+    using a stack. The time complexity is reduced to O(n), making it suitable
+    for larger arrays.

-    A naive way to solve this is to take two loops and check for the next bigger
-    number but that will make the time complexity as O(n^2). The better way to solve
-    this would be to use a stack to keep track of maximum number giving a linear time
-    solution.
+    The stack keeps track of elements for which the next greater element hasn't
+    been found yet. By iterating through the array in reverse (from the last
+    element to the first), the stack is used to efficiently determine the next
+    greatest element for each element.
+
+    Args:
+        arr: List of numbers for which the NGE is calculated.
+
+    Returns:
+        List containing the next greatest elements. If no
+        greater element is found, -1 is placed in the result.
+
+    Example:
    >>> next_greatest_element(arr) == expect
    True
    """
--- a/dynamic_programming/floyd_warshall.py
+++ b/dynamic_programming/floyd_warshall.py
@ -12,19 +12,58 @@ class Graph:
        ]  # dp[i][j] stores minimum distance from i to j

    def add_edge(self, u, v, w):
+        """
+        Adds a directed edge from node u
+        to node v with weight w.
+
+        >>> g = Graph(3)
+        >>> g.add_edge(0, 1, 5)
+        >>> g.dp[0][1]
+        5
+        """
        self.dp[u][v] = w

    def floyd_warshall(self):
+        """
+        Computes the shortest paths between all pairs of
+        nodes using the Floyd-Warshall algorithm.
+
+        >>> g = Graph(3)
+        >>> g.add_edge(0, 1, 1)
+        >>> g.add_edge(1, 2, 2)
+        >>> g.floyd_warshall()
+        >>> g.show_min(0, 2)
+        3
+        >>> g.show_min(2, 0)
+        inf
+        """
        for k in range(self.n):
            for i in range(self.n):
                for j in range(self.n):
                    self.dp[i][j] = min(self.dp[i][j], self.dp[i][k] + self.dp[k][j])

    def show_min(self, u, v):
+        """
+        Returns the minimum distance from node u to node v.
+
+        >>> g = Graph(3)
+        >>> g.add_edge(0, 1, 3)
+        >>> g.add_edge(1, 2, 4)
+        >>> g.floyd_warshall()
+        >>> g.show_min(0, 2)
+        7
+        >>> g.show_min(1, 0)
+        inf
+        """
        return self.dp[u][v]


 if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod()
+
+    # Example usage
    graph = Graph(5)
    graph.add_edge(0, 2, 9)
    graph.add_edge(0, 4, 10)
@ -38,5 +77,9 @@ if __name__ == "__main__":
    graph.add_edge(4, 2, 4)
    graph.add_edge(4, 3, 9)
    graph.floyd_warshall()
+    print(
        graph.show_min(1, 4)
+    )  # Should output the minimum distance from node 1 to node 4
+    print(
        graph.show_min(0, 3)
+    )  # Should output the minimum distance from node 0 to node 3
--- a/dynamic_programming/longest_common_subsequence.py
+++ b/dynamic_programming/longest_common_subsequence.py
@ -28,6 +28,24 @@ def longest_common_subsequence(x: str, y: str):
    (2, 'ph')
    >>> longest_common_subsequence("computer", "food")
    (1, 'o')
+    >>> longest_common_subsequence("", "abc")  # One string is empty
+    (0, '')
+    >>> longest_common_subsequence("abc", "")  # Other string is empty
+    (0, '')
+    >>> longest_common_subsequence("", "")  # Both strings are empty
+    (0, '')
+    >>> longest_common_subsequence("abc", "def")  # No common subsequence
+    (0, '')
+    >>> longest_common_subsequence("abc", "abc")  # Identical strings
+    (3, 'abc')
+    >>> longest_common_subsequence("a", "a")  # Single character match
+    (1, 'a')
+    >>> longest_common_subsequence("a", "b")  # Single character no match
+    (0, '')
+    >>> longest_common_subsequence("abcdef", "ace")  # Interleaved subsequence
+    (3, 'ace')
+    >>> longest_common_subsequence("ABCD", "ACBD")  # No repeated characters
+    (3, 'ABD')
    """
    # find the length of strings

--- a/graphs/kahns_algorithm_topo.py
+++ b/graphs/kahns_algorithm_topo.py
@ -1,36 +1,61 @@
-def topological_sort(graph):
+def topological_sort(graph: dict[int, list[int]]) -> list[int] | None:
    """
-    Kahn's Algorithm is used to find Topological ordering of Directed Acyclic Graph
-    using BFS
+    Perform topological sorting of a Directed Acyclic Graph (DAG)
+    using Kahn's Algorithm via Breadth-First Search (BFS).
+
+    Topological sorting is a linear ordering of vertices in a graph such that for
+    every directed edge u → v, vertex u comes before vertex v in the ordering.
+
+    Parameters:
+    graph: Adjacency list representing the directed graph where keys are
+           vertices, and values are lists of adjacent vertices.
+
+    Returns:
+    The topologically sorted order of vertices if the graph is a DAG.
+    Returns None if the graph contains a cycle.
+
+    Example:
+    >>> graph = {0: [1, 2], 1: [3], 2: [3], 3: [4, 5], 4: [], 5: []}
+    >>> topological_sort(graph)
+    [0, 1, 2, 3, 4, 5]
+
+    >>> graph_with_cycle = {0: [1], 1: [2], 2: [0]}
+    >>> topological_sort(graph_with_cycle)
    """
+
    indegree = [0] * len(graph)
    queue = []
-    topo = []
-    cnt = 0
+    topo_order = []
+    processed_vertices_count = 0

+    # Calculate the indegree of each vertex
    for values in graph.values():
        for i in values:
            indegree[i] += 1

+    # Add all vertices with 0 indegree to the queue
    for i in range(len(indegree)):
        if indegree[i] == 0:
            queue.append(i)

+    # Perform BFS
    while queue:
        vertex = queue.pop(0)
-        cnt += 1
-        topo.append(vertex)
-        for x in graph[vertex]:
-            indegree[x] -= 1
-            if indegree[x] == 0:
-                queue.append(x)
+        processed_vertices_count += 1
+        topo_order.append(vertex)

-    if cnt != len(graph):
-        print("Cycle exists")
-    else:
-        print(topo)
+        # Traverse neighbors
+        for neighbor in graph[vertex]:
+            indegree[neighbor] -= 1
+            if indegree[neighbor] == 0:
+                queue.append(neighbor)
+
+    if processed_vertices_count != len(graph):
+        return None  # no topological ordering exists due to cycle
+    return topo_order  # valid topological ordering


-# Adjacency List of Graph
-graph = {0: [1, 2], 1: [3], 2: [3], 3: [4, 5], 4: [], 5: []}
-topological_sort(graph)
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod()
--- a/maths/fibonacci.py
+++ b/maths/fibonacci.py
@ -7,6 +7,8 @@ the Binet's formula function because the Binet formula function  uses floats

 NOTE 2: the Binet's formula function is much more limited in the size of inputs
 that it can handle due to the size limitations of Python floats
+NOTE 3: the matrix function is the fastest and most memory efficient for large n
+

 See benchmark numbers in __main__ for performance comparisons/
 https://en.wikipedia.org/wiki/Fibonacci_number for more information
@ -17,6 +19,9 @@ from collections.abc import Iterator
 from math import sqrt
 from time import time

+import numpy as np
+from numpy import ndarray
+

 def time_func(func, *args, **kwargs):
    """
@ -230,6 +235,88 @@ def fib_binet(n: int) -> list[int]:
    return [round(phi**i / sqrt_5) for i in range(n + 1)]


+def matrix_pow_np(m: ndarray, power: int) -> ndarray:
+    """
+    Raises a matrix to the power of 'power' using binary exponentiation.
+
+    Args:
+        m: Matrix as a numpy array.
+        power: The power to which the matrix is to be raised.
+
+    Returns:
+        The matrix raised to the power.
+
+    Raises:
+        ValueError: If power is negative.
+
+    >>> m = np.array([[1, 1], [1, 0]], dtype=int)
+    >>> matrix_pow_np(m, 0)  # Identity matrix when raised to the power of 0
+    array([[1, 0],
+           [0, 1]])
+
+    >>> matrix_pow_np(m, 1)  # Same matrix when raised to the power of 1
+    array([[1, 1],
+           [1, 0]])
+
+    >>> matrix_pow_np(m, 5)
+    array([[8, 5],
+           [5, 3]])
+
+    >>> matrix_pow_np(m, -1)
+    Traceback (most recent call last):
+        ...
+    ValueError: power is negative
+    """
+    result = np.array([[1, 0], [0, 1]], dtype=int)  # Identity Matrix
+    base = m
+    if power < 0:  # Negative power is not allowed
+        raise ValueError("power is negative")
+    while power:
+        if power % 2 == 1:
+            result = np.dot(result, base)
+        base = np.dot(base, base)
+        power //= 2
+    return result
+
+
+def fib_matrix_np(n: int) -> int:
+    """
+    Calculates the n-th Fibonacci number using matrix exponentiation.
+    https://www.nayuki.io/page/fast-fibonacci-algorithms#:~:text=
+    Summary:%20The%20two%20fast%20Fibonacci%20algorithms%20are%20matrix
+
+    Args:
+        n: Fibonacci sequence index
+
+    Returns:
+        The n-th Fibonacci number.
+
+    Raises:
+        ValueError: If n is negative.
+
+    >>> fib_matrix_np(0)
+    0
+    >>> fib_matrix_np(1)
+    1
+    >>> fib_matrix_np(5)
+    5
+    >>> fib_matrix_np(10)
+    55
+    >>> fib_matrix_np(-1)
+    Traceback (most recent call last):
+        ...
+    ValueError: n is negative
+    """
+    if n < 0:
+        raise ValueError("n is negative")
+    if n == 0:
+        return 0
+
+    m = np.array([[1, 1], [1, 0]], dtype=int)
+    result = matrix_pow_np(m, n - 1)
+    return int(result[0, 0])
+
+
 if __name__ == "__main__":
    from doctest import testmod

@ -242,3 +329,4 @@ if __name__ == "__main__":
    time_func(fib_memoization, num)  # 0.0100 ms
    time_func(fib_recursive_cached, num)  # 0.0153 ms
    time_func(fib_recursive, num)  # 257.0910 ms
+    time_func(fib_matrix_np, num)  # 0.0000 ms
--- a/strings/min_cost_string_conversion.py
+++ b/strings/min_cost_string_conversion.py
@ -17,11 +17,27 @@ def compute_transform_tables(
    delete_cost: int,
    insert_cost: int,
 ) -> tuple[list[list[int]], list[list[str]]]:
+    """
+    Finds the most cost efficient sequence
+    for converting one string into another.
+
+    >>> costs, operations = compute_transform_tables("cat", "cut", 1, 2, 3, 3)
+    >>> costs[0][:4]
+    [0, 3, 6, 9]
+    >>> costs[2][:4]
+    [6, 4, 3, 6]
+    >>> operations[0][:4]
+    ['0', 'Ic', 'Iu', 'It']
+    >>> operations[3][:4]
+    ['Dt', 'Dt', 'Rtu', 'Ct']
+
+    >>> compute_transform_tables("", "", 1, 2, 3, 3)
+    ([[0]], [['0']])
+    """
    source_seq = list(source_string)
    destination_seq = list(destination_string)
    len_source_seq = len(source_seq)
    len_destination_seq = len(destination_seq)
-
    costs = [
        [0 for _ in range(len_destination_seq + 1)] for _ in range(len_source_seq + 1)
    ]
@ -31,33 +47,51 @@ def compute_transform_tables(

    for i in range(1, len_source_seq + 1):
        costs[i][0] = i * delete_cost
-        ops[i][0] = f"D{source_seq[i - 1]:c}"
+        ops[i][0] = f"D{source_seq[i - 1]}"

    for i in range(1, len_destination_seq + 1):
        costs[0][i] = i * insert_cost
-        ops[0][i] = f"I{destination_seq[i - 1]:c}"
+        ops[0][i] = f"I{destination_seq[i - 1]}"

    for i in range(1, len_source_seq + 1):
        for j in range(1, len_destination_seq + 1):
            if source_seq[i - 1] == destination_seq[j - 1]:
                costs[i][j] = costs[i - 1][j - 1] + copy_cost
-                ops[i][j] = f"C{source_seq[i - 1]:c}"
+                ops[i][j] = f"C{source_seq[i - 1]}"
            else:
                costs[i][j] = costs[i - 1][j - 1] + replace_cost
-                ops[i][j] = f"R{source_seq[i - 1]:c}" + str(destination_seq[j - 1])
+                ops[i][j] = f"R{source_seq[i - 1]}" + str(destination_seq[j - 1])

            if costs[i - 1][j] + delete_cost < costs[i][j]:
                costs[i][j] = costs[i - 1][j] + delete_cost
-                ops[i][j] = f"D{source_seq[i - 1]:c}"
+                ops[i][j] = f"D{source_seq[i - 1]}"

            if costs[i][j - 1] + insert_cost < costs[i][j]:
                costs[i][j] = costs[i][j - 1] + insert_cost
-                ops[i][j] = f"I{destination_seq[j - 1]:c}"
+                ops[i][j] = f"I{destination_seq[j - 1]}"

    return costs, ops


 def assemble_transformation(ops: list[list[str]], i: int, j: int) -> list[str]:
+    """
+    Assembles the transformations based on the ops table.
+
+    >>> ops = [['0', 'Ic', 'Iu', 'It'],
+    ...        ['Dc', 'Cc', 'Iu', 'It'],
+    ...        ['Da', 'Da', 'Rau', 'Rat'],
+    ...        ['Dt', 'Dt', 'Rtu', 'Ct']]
+    >>> x = len(ops) - 1
+    >>> y = len(ops[0]) - 1
+    >>> assemble_transformation(ops, x, y)
+    ['Cc', 'Rau', 'Ct']
+
+    >>> ops1 = [['0']]
+    >>> x1 = len(ops1) - 1
+    >>> y1 = len(ops1[0]) - 1
+    >>> assemble_transformation(ops1, x1, y1)
+    []
+    """
    if i == 0 and j == 0:
        return []
    elif ops[i][j][0] in {"C", "R"}: