mirror of
https://github.com/TheAlgorithms/Python.git
synced 2025-02-24 09:58:39 +00:00
Compare commits
4 Commits
c0892a0651
...
a2783c6597
Author | SHA1 | Date | |
---|---|---|---|
|
a2783c6597 | ||
|
61cfb43d2b | ||
|
3dc143f721 | ||
|
8102424950 |
@ -712,6 +712,7 @@
|
||||
* [Gauss Easter](other/gauss_easter.py)
|
||||
* [Graham Scan](other/graham_scan.py)
|
||||
* [Greedy](other/greedy.py)
|
||||
* [H Index](other/h_index.py)
|
||||
* [Least Recently Used](other/least_recently_used.py)
|
||||
* [Lfu Cache](other/lfu_cache.py)
|
||||
* [Linear Congruential Generator](other/linear_congruential_generator.py)
|
||||
|
@ -1,14 +1,55 @@
|
||||
"""
|
||||
Locally weighted linear regression, also called local regression, is a type of
|
||||
non-parametric linear regression that prioritizes data closest to a given
|
||||
prediction point. The algorithm estimates the vector of model coefficients β
|
||||
using weighted least squares regression:
|
||||
|
||||
β = (XᵀWX)⁻¹(XᵀWy),
|
||||
|
||||
where X is the design matrix, y is the response vector, and W is the diagonal
|
||||
weight matrix.
|
||||
|
||||
This implementation calculates wᵢ, the weight of the ith training sample, using
|
||||
the Gaussian weight:
|
||||
|
||||
wᵢ = exp(-‖xᵢ - x‖²/(2τ²)),
|
||||
|
||||
where xᵢ is the ith training sample, x is the prediction point, τ is the
|
||||
"bandwidth", and ‖x‖ is the Euclidean norm (also called the 2-norm or the L²
|
||||
norm). The bandwidth τ controls how quickly the weight of a training sample
|
||||
decreases as its distance from the prediction point increases. One can think of
|
||||
the Gaussian weight as a bell curve centered around the prediction point: a
|
||||
training sample is weighted lower if it's farther from the center, and τ
|
||||
controls the spread of the bell curve.
|
||||
|
||||
Other types of locally weighted regression such as locally estimated scatterplot
|
||||
smoothing (LOESS) typically use different weight functions.
|
||||
|
||||
References:
|
||||
- https://en.wikipedia.org/wiki/Local_regression
|
||||
- https://en.wikipedia.org/wiki/Weighted_least_squares
|
||||
- https://cs229.stanford.edu/notes2022fall/main_notes.pdf
|
||||
"""
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
|
||||
|
||||
def weighted_matrix(
|
||||
point: np.array, training_data_x: np.array, bandwidth: float
|
||||
) -> np.array:
|
||||
def weight_matrix(point: np.ndarray, x_train: np.ndarray, tau: float) -> np.ndarray:
|
||||
"""
|
||||
Calculate the weight for every point in the data set.
|
||||
point --> the x value at which we want to make predictions
|
||||
>>> weighted_matrix(
|
||||
Calculate the weight of every point in the training data around a given
|
||||
prediction point
|
||||
|
||||
Args:
|
||||
point: x-value at which the prediction is being made
|
||||
x_train: ndarray of x-values for training
|
||||
tau: bandwidth value, controls how quickly the weight of training values
|
||||
decreases as the distance from the prediction point increases
|
||||
|
||||
Returns:
|
||||
m x m weight matrix around the prediction point, where m is the size of
|
||||
the training set
|
||||
>>> weight_matrix(
|
||||
... np.array([1., 1.]),
|
||||
... np.array([[16.99, 10.34], [21.01,23.68], [24.59,25.69]]),
|
||||
... 0.6
|
||||
@ -17,25 +58,30 @@ def weighted_matrix(
|
||||
[0.00000000e+000, 0.00000000e+000, 0.00000000e+000],
|
||||
[0.00000000e+000, 0.00000000e+000, 0.00000000e+000]])
|
||||
"""
|
||||
m, _ = np.shape(training_data_x) # m is the number of training samples
|
||||
weights = np.eye(m) # Initializing weights as identity matrix
|
||||
|
||||
# calculating weights for all training examples [x(i)'s]
|
||||
m = len(x_train) # Number of training samples
|
||||
weights = np.eye(m) # Initialize weights as identity matrix
|
||||
for j in range(m):
|
||||
diff = point - training_data_x[j]
|
||||
weights[j, j] = np.exp(diff @ diff.T / (-2.0 * bandwidth**2))
|
||||
diff = point - x_train[j]
|
||||
weights[j, j] = np.exp(diff @ diff.T / (-2.0 * tau**2))
|
||||
|
||||
return weights
|
||||
|
||||
|
||||
def local_weight(
|
||||
point: np.array,
|
||||
training_data_x: np.array,
|
||||
training_data_y: np.array,
|
||||
bandwidth: float,
|
||||
) -> np.array:
|
||||
point: np.ndarray, x_train: np.ndarray, y_train: np.ndarray, tau: float
|
||||
) -> np.ndarray:
|
||||
"""
|
||||
Calculate the local weights using the weight_matrix function on training data.
|
||||
Return the weighted matrix.
|
||||
Calculate the local weights at a given prediction point using the weight
|
||||
matrix for that point
|
||||
|
||||
Args:
|
||||
point: x-value at which the prediction is being made
|
||||
x_train: ndarray of x-values for training
|
||||
y_train: ndarray of y-values for training
|
||||
tau: bandwidth value, controls how quickly the weight of training values
|
||||
decreases as the distance from the prediction point increases
|
||||
Returns:
|
||||
ndarray of local weights
|
||||
>>> local_weight(
|
||||
... np.array([1., 1.]),
|
||||
... np.array([[16.99, 10.34], [21.01,23.68], [24.59,25.69]]),
|
||||
@ -45,19 +91,28 @@ def local_weight(
|
||||
array([[0.00873174],
|
||||
[0.08272556]])
|
||||
"""
|
||||
weight = weighted_matrix(point, training_data_x, bandwidth)
|
||||
w = np.linalg.inv(training_data_x.T @ (weight @ training_data_x)) @ (
|
||||
training_data_x.T @ weight @ training_data_y.T
|
||||
weight_mat = weight_matrix(point, x_train, tau)
|
||||
weight = np.linalg.inv(x_train.T @ weight_mat @ x_train) @ (
|
||||
x_train.T @ weight_mat @ y_train.T
|
||||
)
|
||||
|
||||
return w
|
||||
return weight
|
||||
|
||||
|
||||
def local_weight_regression(
|
||||
training_data_x: np.array, training_data_y: np.array, bandwidth: float
|
||||
) -> np.array:
|
||||
x_train: np.ndarray, y_train: np.ndarray, tau: float
|
||||
) -> np.ndarray:
|
||||
"""
|
||||
Calculate predictions for each data point on axis
|
||||
Calculate predictions for each point in the training data
|
||||
|
||||
Args:
|
||||
x_train: ndarray of x-values for training
|
||||
y_train: ndarray of y-values for training
|
||||
tau: bandwidth value, controls how quickly the weight of training values
|
||||
decreases as the distance from the prediction point increases
|
||||
|
||||
Returns:
|
||||
ndarray of predictions
|
||||
>>> local_weight_regression(
|
||||
... np.array([[16.99, 10.34], [21.01, 23.68], [24.59, 25.69]]),
|
||||
... np.array([[1.01, 1.66, 3.5]]),
|
||||
@ -65,77 +120,57 @@ def local_weight_regression(
|
||||
... )
|
||||
array([1.07173261, 1.65970737, 3.50160179])
|
||||
"""
|
||||
m, _ = np.shape(training_data_x)
|
||||
ypred = np.zeros(m)
|
||||
y_pred = np.zeros(len(x_train)) # Initialize array of predictions
|
||||
for i, item in enumerate(x_train):
|
||||
y_pred[i] = item @ local_weight(item, x_train, y_train, tau)
|
||||
|
||||
for i, item in enumerate(training_data_x):
|
||||
ypred[i] = item @ local_weight(
|
||||
item, training_data_x, training_data_y, bandwidth
|
||||
)
|
||||
|
||||
return ypred
|
||||
return y_pred
|
||||
|
||||
|
||||
def load_data(
|
||||
dataset_name: str, cola_name: str, colb_name: str
|
||||
) -> tuple[np.array, np.array, np.array, np.array]:
|
||||
dataset_name: str, x_name: str, y_name: str
|
||||
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Load data from seaborn and split it into x and y points
|
||||
>>> pass # No doctests, function is for demo purposes only
|
||||
"""
|
||||
import seaborn as sns
|
||||
|
||||
data = sns.load_dataset(dataset_name)
|
||||
col_a = np.array(data[cola_name]) # total_bill
|
||||
col_b = np.array(data[colb_name]) # tip
|
||||
x_data = np.array(data[x_name])
|
||||
y_data = np.array(data[y_name])
|
||||
|
||||
mcol_a = col_a.copy()
|
||||
mcol_b = col_b.copy()
|
||||
one = np.ones(len(y_data))
|
||||
|
||||
one = np.ones(np.shape(mcol_b)[0], dtype=int)
|
||||
# pairing elements of one and x_data
|
||||
x_train = np.column_stack((one, x_data))
|
||||
|
||||
# pairing elements of one and mcol_a
|
||||
training_data_x = np.column_stack((one, mcol_a))
|
||||
|
||||
return training_data_x, mcol_b, col_a, col_b
|
||||
|
||||
|
||||
def get_preds(training_data_x: np.array, mcol_b: np.array, tau: float) -> np.array:
|
||||
"""
|
||||
Get predictions with minimum error for each training data
|
||||
>>> get_preds(
|
||||
... np.array([[16.99, 10.34], [21.01, 23.68], [24.59, 25.69]]),
|
||||
... np.array([[1.01, 1.66, 3.5]]),
|
||||
... 0.6
|
||||
... )
|
||||
array([1.07173261, 1.65970737, 3.50160179])
|
||||
"""
|
||||
ypred = local_weight_regression(training_data_x, mcol_b, tau)
|
||||
return ypred
|
||||
return x_train, x_data, y_data
|
||||
|
||||
|
||||
def plot_preds(
|
||||
training_data_x: np.array,
|
||||
predictions: np.array,
|
||||
col_x: np.array,
|
||||
col_y: np.array,
|
||||
cola_name: str,
|
||||
colb_name: str,
|
||||
) -> plt.plot:
|
||||
x_train: np.ndarray,
|
||||
preds: np.ndarray,
|
||||
x_data: np.ndarray,
|
||||
y_data: np.ndarray,
|
||||
x_name: str,
|
||||
y_name: str,
|
||||
) -> None:
|
||||
"""
|
||||
Plot predictions and display the graph
|
||||
>>> pass # No doctests, function is for demo purposes only
|
||||
"""
|
||||
xsort = training_data_x.copy()
|
||||
xsort.sort(axis=0)
|
||||
plt.scatter(col_x, col_y, color="blue")
|
||||
x_train_sorted = np.sort(x_train, axis=0)
|
||||
plt.scatter(x_data, y_data, color="blue")
|
||||
plt.plot(
|
||||
xsort[:, 1],
|
||||
predictions[training_data_x[:, 1].argsort(0)],
|
||||
x_train_sorted[:, 1],
|
||||
preds[x_train[:, 1].argsort(0)],
|
||||
color="yellow",
|
||||
linewidth=5,
|
||||
)
|
||||
plt.title("Local Weighted Regression")
|
||||
plt.xlabel(cola_name)
|
||||
plt.ylabel(colb_name)
|
||||
plt.xlabel(x_name)
|
||||
plt.ylabel(y_name)
|
||||
plt.show()
|
||||
|
||||
|
||||
@ -144,6 +179,7 @@ if __name__ == "__main__":
|
||||
|
||||
doctest.testmod()
|
||||
|
||||
training_data_x, mcol_b, col_a, col_b = load_data("tips", "total_bill", "tip")
|
||||
predictions = get_preds(training_data_x, mcol_b, 0.5)
|
||||
plot_preds(training_data_x, predictions, col_a, col_b, "total_bill", "tip")
|
||||
# Demo with a dataset from the seaborn module
|
||||
training_data_x, total_bill, tip = load_data("tips", "total_bill", "tip")
|
||||
predictions = local_weight_regression(training_data_x, tip, 5)
|
||||
plot_preds(training_data_x, predictions, total_bill, tip, "total_bill", "tip")
|
||||
|
42
maths/odd_sieve.py
Normal file
42
maths/odd_sieve.py
Normal file
@ -0,0 +1,42 @@
|
||||
from itertools import compress, repeat
|
||||
from math import ceil, sqrt
|
||||
|
||||
|
||||
def odd_sieve(num: int) -> list[int]:
|
||||
"""
|
||||
Returns the prime numbers < `num`. The prime numbers are calculated using an
|
||||
odd sieve implementation of the Sieve of Eratosthenes algorithm
|
||||
(see for reference https://en.wikipedia.org/wiki/Sieve_of_Eratosthenes).
|
||||
|
||||
>>> odd_sieve(2)
|
||||
[]
|
||||
>>> odd_sieve(3)
|
||||
[2]
|
||||
>>> odd_sieve(10)
|
||||
[2, 3, 5, 7]
|
||||
>>> odd_sieve(20)
|
||||
[2, 3, 5, 7, 11, 13, 17, 19]
|
||||
"""
|
||||
|
||||
if num <= 2:
|
||||
return []
|
||||
if num == 3:
|
||||
return [2]
|
||||
|
||||
# Odd sieve for numbers in range [3, num - 1]
|
||||
sieve = bytearray(b"\x01") * ((num >> 1) - 1)
|
||||
|
||||
for i in range(3, int(sqrt(num)) + 1, 2):
|
||||
if sieve[(i >> 1) - 1]:
|
||||
i_squared = i**2
|
||||
sieve[(i_squared >> 1) - 1 :: i] = repeat(
|
||||
0, ceil((num - i_squared) / (i << 1))
|
||||
)
|
||||
|
||||
return [2] + list(compress(range(3, num, 2), sieve))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import doctest
|
||||
|
||||
doctest.testmod()
|
165
other/guess_the_number_search.py
Normal file
165
other/guess_the_number_search.py
Normal file
@ -0,0 +1,165 @@
|
||||
"""
|
||||
guess the number using lower,higher and the value to find or guess
|
||||
|
||||
solution works by dividing lower and higher of number guessed
|
||||
|
||||
suppose lower is 0, higher is 1000 and the number to guess is 355
|
||||
|
||||
>>> guess_the_number(10, 1000, 17)
|
||||
started...
|
||||
guess the number : 17
|
||||
details : [505, 257, 133, 71, 40, 25, 17]
|
||||
|
||||
"""
|
||||
|
||||
|
||||
def temp_input_value(
|
||||
min_val: int = 10, max_val: int = 1000, option: bool = True
|
||||
) -> int:
|
||||
"""
|
||||
Temporary input values for tests
|
||||
|
||||
>>> temp_input_value(option=True)
|
||||
10
|
||||
|
||||
>>> temp_input_value(option=False)
|
||||
1000
|
||||
|
||||
>>> temp_input_value(min_val=100, option=True)
|
||||
100
|
||||
|
||||
>>> temp_input_value(min_val=100, max_val=50)
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: Invalid value for min_val or max_val (min_value < max_value)
|
||||
|
||||
>>> temp_input_value("ten","fifty",1)
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
AssertionError: Invalid type of value(s) specified to function!
|
||||
|
||||
>>> temp_input_value(min_val=-100, max_val=500)
|
||||
-100
|
||||
|
||||
>>> temp_input_value(min_val=-5100, max_val=-100)
|
||||
-5100
|
||||
"""
|
||||
assert (
|
||||
isinstance(min_val, int)
|
||||
and isinstance(max_val, int)
|
||||
and isinstance(option, bool)
|
||||
), "Invalid type of value(s) specified to function!"
|
||||
|
||||
if min_val > max_val:
|
||||
raise ValueError("Invalid value for min_val or max_val (min_value < max_value)")
|
||||
return min_val if option else max_val
|
||||
|
||||
|
||||
def get_avg(number_1: int, number_2: int) -> int:
|
||||
"""
|
||||
Return the mid-number(whole) of two integers a and b
|
||||
|
||||
>>> get_avg(10, 15)
|
||||
12
|
||||
|
||||
>>> get_avg(20, 300)
|
||||
160
|
||||
|
||||
>>> get_avg("abcd", 300)
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
TypeError: can only concatenate str (not "int") to str
|
||||
|
||||
>>> get_avg(10.5,50.25)
|
||||
30
|
||||
"""
|
||||
return int((number_1 + number_2) / 2)
|
||||
|
||||
|
||||
def guess_the_number(lower: int, higher: int, to_guess: int) -> None:
|
||||
"""
|
||||
The `guess_the_number` function that guess the number by some operations
|
||||
and using inner functions
|
||||
|
||||
>>> guess_the_number(10, 1000, 17)
|
||||
started...
|
||||
guess the number : 17
|
||||
details : [505, 257, 133, 71, 40, 25, 17]
|
||||
|
||||
>>> guess_the_number(-10000, 10000, 7)
|
||||
started...
|
||||
guess the number : 7
|
||||
details : [0, 5000, 2500, 1250, 625, 312, 156, 78, 39, 19, 9, 4, 6, 7]
|
||||
|
||||
>>> guess_the_number(10, 1000, "a")
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
AssertionError: argument values must be type of "int"
|
||||
|
||||
>>> guess_the_number(10, 1000, 5)
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: guess value must be within the range of lower and higher value
|
||||
|
||||
>>> guess_the_number(10000, 100, 5)
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: argument value for lower and higher must be(lower > higher)
|
||||
"""
|
||||
assert (
|
||||
isinstance(lower, int) and isinstance(higher, int) and isinstance(to_guess, int)
|
||||
), 'argument values must be type of "int"'
|
||||
|
||||
if lower > higher:
|
||||
raise ValueError("argument value for lower and higher must be(lower > higher)")
|
||||
|
||||
if not lower < to_guess < higher:
|
||||
raise ValueError(
|
||||
"guess value must be within the range of lower and higher value"
|
||||
)
|
||||
|
||||
def answer(number: int) -> str:
|
||||
"""
|
||||
Returns value by comparing with entered `to_guess` number
|
||||
"""
|
||||
if number > to_guess:
|
||||
return "high"
|
||||
elif number < to_guess:
|
||||
return "low"
|
||||
else:
|
||||
return "same"
|
||||
|
||||
print("started...")
|
||||
|
||||
last_lowest = lower
|
||||
last_highest = higher
|
||||
|
||||
last_numbers = []
|
||||
|
||||
while True:
|
||||
number = get_avg(last_lowest, last_highest)
|
||||
last_numbers.append(number)
|
||||
|
||||
if answer(number) == "low":
|
||||
last_lowest = number
|
||||
elif answer(number) == "high":
|
||||
last_highest = number
|
||||
else:
|
||||
break
|
||||
|
||||
print(f"guess the number : {last_numbers[-1]}")
|
||||
print(f"details : {str(last_numbers)}")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
"""
|
||||
starting point or function of script
|
||||
"""
|
||||
lower = int(input("Enter lower value : ").strip())
|
||||
higher = int(input("Enter high value : ").strip())
|
||||
guess = int(input("Enter value to guess : ").strip())
|
||||
guess_the_number(lower, higher, guess)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
71
other/h_index.py
Normal file
71
other/h_index.py
Normal file
@ -0,0 +1,71 @@
|
||||
"""
|
||||
Task:
|
||||
Given an array of integers citations where citations[i] is the number of
|
||||
citations a researcher received for their ith paper, return compute the
|
||||
researcher's h-index.
|
||||
|
||||
According to the definition of h-index on Wikipedia: A scientist has an
|
||||
index h if h of their n papers have at least h citations each, and the other
|
||||
n - h papers have no more than h citations each.
|
||||
|
||||
If there are several possible values for h, the maximum one is taken as the
|
||||
h-index.
|
||||
|
||||
H-Index link: https://en.wikipedia.org/wiki/H-index
|
||||
|
||||
Implementation notes:
|
||||
Use sorting of array
|
||||
|
||||
Leetcode link: https://leetcode.com/problems/h-index/description/
|
||||
|
||||
n = len(citations)
|
||||
Runtime Complexity: O(n * log(n))
|
||||
Space Complexity: O(1)
|
||||
|
||||
"""
|
||||
|
||||
|
||||
def h_index(citations: list[int]) -> int:
|
||||
"""
|
||||
Return H-index of citations
|
||||
|
||||
>>> h_index([3, 0, 6, 1, 5])
|
||||
3
|
||||
>>> h_index([1, 3, 1])
|
||||
1
|
||||
>>> h_index([1, 2, 3])
|
||||
2
|
||||
>>> h_index('test')
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: The citations should be a list of non negative integers.
|
||||
>>> h_index([1,2,'3'])
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: The citations should be a list of non negative integers.
|
||||
>>> h_index([1,2,-3])
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: The citations should be a list of non negative integers.
|
||||
"""
|
||||
|
||||
# validate:
|
||||
if not isinstance(citations, list) or not all(
|
||||
isinstance(item, int) and item >= 0 for item in citations
|
||||
):
|
||||
raise ValueError("The citations should be a list of non negative integers.")
|
||||
|
||||
citations.sort()
|
||||
len_citations = len(citations)
|
||||
|
||||
for i in range(len_citations):
|
||||
if citations[len_citations - 1 - i] <= i:
|
||||
return i
|
||||
|
||||
return len_citations
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import doctest
|
||||
|
||||
doctest.testmod()
|
Loading…
x
Reference in New Issue
Block a user