mirror of
https://github.com/TheAlgorithms/Python.git
synced 2025-01-19 00:37:02 +00:00
Merge branch 'master' of https://github.com/MaximSmolskiy/Python
This commit is contained in:
commit
b2e30e7e21
|
@ -16,20 +16,20 @@ repos:
|
|||
- id: auto-walrus
|
||||
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
rev: v0.4.3
|
||||
rev: v0.4.7
|
||||
hooks:
|
||||
- id: ruff
|
||||
- id: ruff-format
|
||||
|
||||
- repo: https://github.com/codespell-project/codespell
|
||||
rev: v2.2.6
|
||||
rev: v2.3.0
|
||||
hooks:
|
||||
- id: codespell
|
||||
additional_dependencies:
|
||||
- tomli
|
||||
|
||||
- repo: https://github.com/tox-dev/pyproject-fmt
|
||||
rev: "1.8.0"
|
||||
rev: "2.1.3"
|
||||
hooks:
|
||||
- id: pyproject-fmt
|
||||
|
||||
|
@ -42,7 +42,7 @@ repos:
|
|||
pass_filenames: false
|
||||
|
||||
- repo: https://github.com/abravalheri/validate-pyproject
|
||||
rev: v0.16
|
||||
rev: v0.18
|
||||
hooks:
|
||||
- id: validate-pyproject
|
||||
|
||||
|
|
|
@ -661,7 +661,6 @@
|
|||
* [Manhattan Distance](maths/manhattan_distance.py)
|
||||
* [Matrix Exponentiation](maths/matrix_exponentiation.py)
|
||||
* [Max Sum Sliding Window](maths/max_sum_sliding_window.py)
|
||||
* [Median Of Two Arrays](maths/median_of_two_arrays.py)
|
||||
* [Minkowski Distance](maths/minkowski_distance.py)
|
||||
* [Mobius Function](maths/mobius_function.py)
|
||||
* [Modular Division](maths/modular_division.py)
|
||||
|
|
|
@ -23,6 +23,42 @@ def create_state_space_tree(
|
|||
Creates a state space tree to iterate through each branch using DFS.
|
||||
We know that each state has exactly len(sequence) - index children.
|
||||
It terminates when it reaches the end of the given sequence.
|
||||
|
||||
:param sequence: The input sequence for which permutations are generated.
|
||||
:param current_sequence: The current permutation being built.
|
||||
:param index: The current index in the sequence.
|
||||
:param index_used: list to track which elements are used in permutation.
|
||||
|
||||
Example 1:
|
||||
>>> sequence = [1, 2, 3]
|
||||
>>> current_sequence = []
|
||||
>>> index_used = [False, False, False]
|
||||
>>> create_state_space_tree(sequence, current_sequence, 0, index_used)
|
||||
[1, 2, 3]
|
||||
[1, 3, 2]
|
||||
[2, 1, 3]
|
||||
[2, 3, 1]
|
||||
[3, 1, 2]
|
||||
[3, 2, 1]
|
||||
|
||||
Example 2:
|
||||
>>> sequence = ["A", "B", "C"]
|
||||
>>> current_sequence = []
|
||||
>>> index_used = [False, False, False]
|
||||
>>> create_state_space_tree(sequence, current_sequence, 0, index_used)
|
||||
['A', 'B', 'C']
|
||||
['A', 'C', 'B']
|
||||
['B', 'A', 'C']
|
||||
['B', 'C', 'A']
|
||||
['C', 'A', 'B']
|
||||
['C', 'B', 'A']
|
||||
|
||||
Example 3:
|
||||
>>> sequence = [1]
|
||||
>>> current_sequence = []
|
||||
>>> index_used = [False]
|
||||
>>> create_state_space_tree(sequence, current_sequence, 0, index_used)
|
||||
[1]
|
||||
"""
|
||||
|
||||
if index == len(sequence):
|
||||
|
|
|
@ -22,6 +22,56 @@ def create_state_space_tree(
|
|||
Creates a state space tree to iterate through each branch using DFS.
|
||||
We know that each state has exactly two children.
|
||||
It terminates when it reaches the end of the given sequence.
|
||||
|
||||
:param sequence: The input sequence for which subsequences are generated.
|
||||
:param current_subsequence: The current subsequence being built.
|
||||
:param index: The current index in the sequence.
|
||||
|
||||
Example:
|
||||
>>> sequence = [3, 2, 1]
|
||||
>>> current_subsequence = []
|
||||
>>> create_state_space_tree(sequence, current_subsequence, 0)
|
||||
[]
|
||||
[1]
|
||||
[2]
|
||||
[2, 1]
|
||||
[3]
|
||||
[3, 1]
|
||||
[3, 2]
|
||||
[3, 2, 1]
|
||||
|
||||
>>> sequence = ["A", "B"]
|
||||
>>> current_subsequence = []
|
||||
>>> create_state_space_tree(sequence, current_subsequence, 0)
|
||||
[]
|
||||
['B']
|
||||
['A']
|
||||
['A', 'B']
|
||||
|
||||
>>> sequence = []
|
||||
>>> current_subsequence = []
|
||||
>>> create_state_space_tree(sequence, current_subsequence, 0)
|
||||
[]
|
||||
|
||||
>>> sequence = [1, 2, 3, 4]
|
||||
>>> current_subsequence = []
|
||||
>>> create_state_space_tree(sequence, current_subsequence, 0)
|
||||
[]
|
||||
[4]
|
||||
[3]
|
||||
[3, 4]
|
||||
[2]
|
||||
[2, 4]
|
||||
[2, 3]
|
||||
[2, 3, 4]
|
||||
[1]
|
||||
[1, 4]
|
||||
[1, 3]
|
||||
[1, 3, 4]
|
||||
[1, 2]
|
||||
[1, 2, 4]
|
||||
[1, 2, 3]
|
||||
[1, 2, 3, 4]
|
||||
"""
|
||||
|
||||
if index == len(sequence):
|
||||
|
@ -35,7 +85,7 @@ def create_state_space_tree(
|
|||
|
||||
|
||||
if __name__ == "__main__":
|
||||
seq: list[Any] = [3, 1, 2, 4]
|
||||
seq: list[Any] = [1, 2, 3]
|
||||
generate_all_subsequences(seq)
|
||||
|
||||
seq.clear()
|
||||
|
|
|
@ -26,7 +26,7 @@ def binary_and(a: int, b: int) -> str:
|
|||
>>> binary_and(0, 1.1)
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
TypeError: 'float' object cannot be interpreted as an integer
|
||||
ValueError: Unknown format code 'b' for object of type 'float'
|
||||
>>> binary_and("0", "1")
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
|
@ -35,8 +35,8 @@ def binary_and(a: int, b: int) -> str:
|
|||
if a < 0 or b < 0:
|
||||
raise ValueError("the value of both inputs must be positive")
|
||||
|
||||
a_binary = str(bin(a))[2:] # remove the leading "0b"
|
||||
b_binary = str(bin(b))[2:] # remove the leading "0b"
|
||||
a_binary = format(a, "b")
|
||||
b_binary = format(b, "b")
|
||||
|
||||
max_len = max(len(a_binary), len(b_binary))
|
||||
|
||||
|
|
|
@ -98,7 +98,7 @@ class SegmentTree:
|
|||
|
||||
def show_data(self):
|
||||
show_list = []
|
||||
for i in range(1, N + 1):
|
||||
for i in range(1, self.N + 1):
|
||||
show_list += [self.query(i, i)]
|
||||
print(show_list)
|
||||
|
||||
|
|
|
@ -2,6 +2,20 @@ def actual_power(a: int, b: int):
|
|||
"""
|
||||
Function using divide and conquer to calculate a^b.
|
||||
It only works for integer a,b.
|
||||
|
||||
:param a: The base of the power operation, an integer.
|
||||
:param b: The exponent of the power operation, a non-negative integer.
|
||||
:return: The result of a^b.
|
||||
|
||||
Examples:
|
||||
>>> actual_power(3, 2)
|
||||
9
|
||||
>>> actual_power(5, 3)
|
||||
125
|
||||
>>> actual_power(2, 5)
|
||||
32
|
||||
>>> actual_power(7, 0)
|
||||
1
|
||||
"""
|
||||
if b == 0:
|
||||
return 1
|
||||
|
@ -13,6 +27,10 @@ def actual_power(a: int, b: int):
|
|||
|
||||
def power(a: int, b: int) -> float:
|
||||
"""
|
||||
:param a: The base (integer).
|
||||
:param b: The exponent (integer).
|
||||
:return: The result of a^b, as a float for negative exponents.
|
||||
|
||||
>>> power(4,6)
|
||||
4096
|
||||
>>> power(2,3)
|
||||
|
|
|
@ -215,7 +215,7 @@ class PriorityQueue:
|
|||
[(5, 'A'), (15, 'B')]
|
||||
"""
|
||||
idx = self.pos[tup[1]]
|
||||
# assuming the new_d is atmost old_d
|
||||
# assuming the new_d is at most old_d
|
||||
self.array[idx] = (new_d, tup[1])
|
||||
while idx > 0 and self.array[self.par(idx)][0] > self.array[idx][0]:
|
||||
self.swap(idx, self.par(idx))
|
||||
|
|
|
@ -629,6 +629,40 @@ def smooth_l1_loss(y_true: np.ndarray, y_pred: np.ndarray, beta: float = 1.0) ->
|
|||
return np.mean(loss)
|
||||
|
||||
|
||||
def kullback_leibler_divergence(y_true: np.ndarray, y_pred: np.ndarray) -> float:
|
||||
"""
|
||||
Calculate the Kullback-Leibler divergence (KL divergence) loss between true labels
|
||||
and predicted probabilities.
|
||||
|
||||
KL divergence loss quantifies dissimilarity between true labels and predicted
|
||||
probabilities. It's often used in training generative models.
|
||||
|
||||
KL = Σ(y_true * ln(y_true / y_pred))
|
||||
|
||||
Reference: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence
|
||||
|
||||
Parameters:
|
||||
- y_true: True class probabilities
|
||||
- y_pred: Predicted class probabilities
|
||||
|
||||
>>> true_labels = np.array([0.2, 0.3, 0.5])
|
||||
>>> predicted_probs = np.array([0.3, 0.3, 0.4])
|
||||
>>> kullback_leibler_divergence(true_labels, predicted_probs)
|
||||
0.030478754035472025
|
||||
>>> true_labels = np.array([0.2, 0.3, 0.5])
|
||||
>>> predicted_probs = np.array([0.3, 0.3, 0.4, 0.5])
|
||||
>>> kullback_leibler_divergence(true_labels, predicted_probs)
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: Input arrays must have the same length.
|
||||
"""
|
||||
if len(y_true) != len(y_pred):
|
||||
raise ValueError("Input arrays must have the same length.")
|
||||
|
||||
kl_loss = y_true * np.log(y_true / y_pred)
|
||||
return np.sum(kl_loss)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import doctest
|
||||
|
||||
|
|
|
@ -1,11 +1,9 @@
|
|||
"""
|
||||
Implementation of sequential minimal optimization (SMO) for support vector machines
|
||||
(SVM).
|
||||
Sequential minimal optimization (SMO) for support vector machines (SVM)
|
||||
|
||||
Sequential minimal optimization (SMO) is an algorithm for solving the quadratic
|
||||
programming (QP) problem that arises during the training of support vector
|
||||
machines.
|
||||
It was invented by John Platt in 1998.
|
||||
Sequential minimal optimization (SMO) is an algorithm for solving the quadratic
|
||||
programming (QP) problem that arises during the training of SVMs. It was invented by
|
||||
John Platt in 1998.
|
||||
|
||||
Input:
|
||||
0: type: numpy.ndarray.
|
||||
|
@ -124,8 +122,7 @@ class SmoSVM:
|
|||
b_old = self._b
|
||||
self._b = b
|
||||
|
||||
# 4: update error value,here we only calculate those non-bound samples'
|
||||
# error
|
||||
# 4: update error, here we only calculate the error for non-bound samples
|
||||
self._unbound = [i for i in self._all_samples if self._is_unbound(i)]
|
||||
for s in self.unbound:
|
||||
if s in (i1, i2):
|
||||
|
@ -136,7 +133,7 @@ class SmoSVM:
|
|||
+ (self._b - b_old)
|
||||
)
|
||||
|
||||
# if i1 or i2 is non-bound,update there error value to zero
|
||||
# if i1 or i2 is non-bound, update their error value to zero
|
||||
if self._is_unbound(i1):
|
||||
self._error[i1] = 0
|
||||
if self._is_unbound(i2):
|
||||
|
@ -161,7 +158,7 @@ class SmoSVM:
|
|||
results.append(result)
|
||||
return np.array(results)
|
||||
|
||||
# Check if alpha violate KKT condition
|
||||
# Check if alpha violates the KKT condition
|
||||
def _check_obey_kkt(self, index):
|
||||
alphas = self.alphas
|
||||
tol = self._tol
|
||||
|
@ -172,20 +169,19 @@ class SmoSVM:
|
|||
|
||||
# Get value calculated from kernel function
|
||||
def _k(self, i1, i2):
|
||||
# for test samples,use Kernel function
|
||||
# for test samples, use kernel function
|
||||
if isinstance(i2, np.ndarray):
|
||||
return self.Kernel(self.samples[i1], i2)
|
||||
# for train samples,Kernel values have been saved in matrix
|
||||
# for training samples, kernel values have been saved in matrix
|
||||
else:
|
||||
return self._K_matrix[i1, i2]
|
||||
|
||||
# Get sample's error
|
||||
# Get error for sample
|
||||
def _e(self, index):
|
||||
"""
|
||||
Two cases:
|
||||
1:Sample[index] is non-bound,Fetch error from list: _error
|
||||
2:sample[index] is bound,Use predicted value deduct true value: g(xi) - yi
|
||||
|
||||
1: Sample[index] is non-bound, fetch error from list: _error
|
||||
2: sample[index] is bound, use predicted value minus true value: g(xi) - yi
|
||||
"""
|
||||
# get from error data
|
||||
if self._is_unbound(index):
|
||||
|
@ -196,7 +192,7 @@ class SmoSVM:
|
|||
yi = self.tags[index]
|
||||
return gx - yi
|
||||
|
||||
# Calculate Kernel matrix of all possible i1,i2 ,saving time
|
||||
# Calculate kernel matrix of all possible i1, i2, saving time
|
||||
def _calculate_k_matrix(self):
|
||||
k_matrix = np.zeros([self.length, self.length])
|
||||
for i in self._all_samples:
|
||||
|
@ -206,7 +202,7 @@ class SmoSVM:
|
|||
)
|
||||
return k_matrix
|
||||
|
||||
# Predict test sample's tag
|
||||
# Predict tag for test sample
|
||||
def _predict(self, sample):
|
||||
k = self._k
|
||||
predicted_value = (
|
||||
|
@ -222,30 +218,31 @@ class SmoSVM:
|
|||
|
||||
# Choose alpha1 and alpha2
|
||||
def _choose_alphas(self):
|
||||
locis = yield from self._choose_a1()
|
||||
if not locis:
|
||||
loci = yield from self._choose_a1()
|
||||
if not loci:
|
||||
return None
|
||||
return locis
|
||||
return loci
|
||||
|
||||
def _choose_a1(self):
|
||||
"""
|
||||
Choose first alpha ;steps:
|
||||
1:First loop over all sample
|
||||
2:Second loop over all non-bound samples till all non-bound samples does not
|
||||
voilate kkt condition.
|
||||
3:Repeat this two process endlessly,till all samples does not voilate kkt
|
||||
condition samples after first loop.
|
||||
Choose first alpha
|
||||
Steps:
|
||||
1: First loop over all samples
|
||||
2: Second loop over all non-bound samples until no non-bound samples violate
|
||||
the KKT condition.
|
||||
3: Repeat these two processes until no samples violate the KKT condition
|
||||
after the first loop.
|
||||
"""
|
||||
while True:
|
||||
all_not_obey = True
|
||||
# all sample
|
||||
print("scanning all sample!")
|
||||
print("Scanning all samples!")
|
||||
for i1 in [i for i in self._all_samples if self._check_obey_kkt(i)]:
|
||||
all_not_obey = False
|
||||
yield from self._choose_a2(i1)
|
||||
|
||||
# non-bound sample
|
||||
print("scanning non-bound sample!")
|
||||
print("Scanning non-bound samples!")
|
||||
while True:
|
||||
not_obey = True
|
||||
for i1 in [
|
||||
|
@ -256,20 +253,21 @@ class SmoSVM:
|
|||
not_obey = False
|
||||
yield from self._choose_a2(i1)
|
||||
if not_obey:
|
||||
print("all non-bound samples fit the KKT condition!")
|
||||
print("All non-bound samples satisfy the KKT condition!")
|
||||
break
|
||||
if all_not_obey:
|
||||
print("all samples fit the KKT condition! Optimization done!")
|
||||
print("All samples satisfy the KKT condition!")
|
||||
break
|
||||
return False
|
||||
|
||||
def _choose_a2(self, i1):
|
||||
"""
|
||||
Choose the second alpha by using heuristic algorithm ;steps:
|
||||
1: Choose alpha2 which gets the maximum step size (|E1 - E2|).
|
||||
2: Start in a random point,loop over all non-bound samples till alpha1 and
|
||||
Choose the second alpha using a heuristic algorithm
|
||||
Steps:
|
||||
1: Choose alpha2 that maximizes the step size (|E1 - E2|).
|
||||
2: Start in a random point, loop over all non-bound samples till alpha1 and
|
||||
alpha2 are optimized.
|
||||
3: Start in a random point,loop over all samples till alpha1 and alpha2 are
|
||||
3: Start in a random point, loop over all samples till alpha1 and alpha2 are
|
||||
optimized.
|
||||
"""
|
||||
self._unbound = [i for i in self._all_samples if self._is_unbound(i)]
|
||||
|
@ -306,7 +304,7 @@ class SmoSVM:
|
|||
if i1 == i2:
|
||||
return None, None
|
||||
|
||||
# calculate L and H which bound the new alpha2
|
||||
# calculate L and H which bound the new alpha2
|
||||
s = y1 * y2
|
||||
if s == -1:
|
||||
l, h = max(0.0, a2 - a1), min(self._c, self._c + a2 - a1) # noqa: E741
|
||||
|
@ -320,7 +318,7 @@ class SmoSVM:
|
|||
k22 = k(i2, i2)
|
||||
k12 = k(i1, i2)
|
||||
|
||||
# select the new alpha2 which could get the minimal objectives
|
||||
# select the new alpha2 which could achieve the minimal objectives
|
||||
if (eta := k11 + k22 - 2.0 * k12) > 0.0:
|
||||
a2_new_unc = a2 + (y2 * (e1 - e2)) / eta
|
||||
# a2_new has a boundary
|
||||
|
@ -335,7 +333,7 @@ class SmoSVM:
|
|||
l1 = a1 + s * (a2 - l)
|
||||
h1 = a1 + s * (a2 - h)
|
||||
|
||||
# way 1
|
||||
# Method 1
|
||||
f1 = y1 * (e1 + b) - a1 * k(i1, i1) - s * a2 * k(i1, i2)
|
||||
f2 = y2 * (e2 + b) - a2 * k(i2, i2) - s * a1 * k(i1, i2)
|
||||
ol = (
|
||||
|
@ -353,9 +351,8 @@ class SmoSVM:
|
|||
+ s * h * h1 * k(i1, i2)
|
||||
)
|
||||
"""
|
||||
# way 2
|
||||
Use objective function check which alpha2 new could get the minimal
|
||||
objectives
|
||||
Method 2: Use objective function to check which alpha2_new could achieve the
|
||||
minimal objectives
|
||||
"""
|
||||
if ol < (oh - self._eps):
|
||||
a2_new = l
|
||||
|
@ -375,7 +372,7 @@ class SmoSVM:
|
|||
|
||||
return a1_new, a2_new
|
||||
|
||||
# Normalise data using min_max way
|
||||
# Normalize data using min-max method
|
||||
def _norm(self, data):
|
||||
if self._init:
|
||||
self._min = np.min(data, axis=0)
|
||||
|
@ -424,7 +421,7 @@ class Kernel:
|
|||
|
||||
def _check(self):
|
||||
if self._kernel == self._rbf and self.gamma < 0:
|
||||
raise ValueError("gamma value must greater than 0")
|
||||
raise ValueError("gamma value must be non-negative")
|
||||
|
||||
def _get_kernel(self, kernel_name):
|
||||
maps = {"linear": self._linear, "poly": self._polynomial, "rbf": self._rbf}
|
||||
|
@ -444,26 +441,30 @@ def count_time(func):
|
|||
start_time = time.time()
|
||||
func(*args, **kwargs)
|
||||
end_time = time.time()
|
||||
print(f"smo algorithm cost {end_time - start_time} seconds")
|
||||
print(f"SMO algorithm cost {end_time - start_time} seconds")
|
||||
|
||||
return call_func
|
||||
|
||||
|
||||
@count_time
|
||||
def test_cancel_data():
|
||||
print("Hello!\nStart test svm by smo algorithm!")
|
||||
def test_cancer_data():
|
||||
print("Hello!\nStart test SVM using the SMO algorithm!")
|
||||
# 0: download dataset and load into pandas' dataframe
|
||||
if not os.path.exists(r"cancel_data.csv"):
|
||||
if not os.path.exists(r"cancer_data.csv"):
|
||||
request = urllib.request.Request( # noqa: S310
|
||||
CANCER_DATASET_URL,
|
||||
headers={"User-Agent": "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)"},
|
||||
)
|
||||
response = urllib.request.urlopen(request) # noqa: S310
|
||||
content = response.read().decode("utf-8")
|
||||
with open(r"cancel_data.csv", "w") as f:
|
||||
with open(r"cancer_data.csv", "w") as f:
|
||||
f.write(content)
|
||||
|
||||
data = pd.read_csv(r"cancel_data.csv", header=None)
|
||||
data = pd.read_csv(
|
||||
"cancer_data.csv",
|
||||
header=None,
|
||||
dtype={0: str}, # Assuming the first column contains string data
|
||||
)
|
||||
|
||||
# 1: pre-processing data
|
||||
del data[data.columns.tolist()[0]]
|
||||
|
@ -475,14 +476,14 @@ def test_cancel_data():
|
|||
train_data, test_data = samples[:328, :], samples[328:, :]
|
||||
test_tags, test_samples = test_data[:, 0], test_data[:, 1:]
|
||||
|
||||
# 3: choose kernel function,and set initial alphas to zero(optional)
|
||||
mykernel = Kernel(kernel="rbf", degree=5, coef0=1, gamma=0.5)
|
||||
# 3: choose kernel function, and set initial alphas to zero (optional)
|
||||
my_kernel = Kernel(kernel="rbf", degree=5, coef0=1, gamma=0.5)
|
||||
al = np.zeros(train_data.shape[0])
|
||||
|
||||
# 4: calculating best alphas using SMO algorithm and predict test_data samples
|
||||
mysvm = SmoSVM(
|
||||
train=train_data,
|
||||
kernel_func=mykernel,
|
||||
kernel_func=my_kernel,
|
||||
alpha_list=al,
|
||||
cost=0.4,
|
||||
b=0.0,
|
||||
|
@ -497,30 +498,30 @@ def test_cancel_data():
|
|||
for i in range(test_tags.shape[0]):
|
||||
if test_tags[i] == predict[i]:
|
||||
score += 1
|
||||
print(f"\nall: {test_num}\nright: {score}\nfalse: {test_num - score}")
|
||||
print(f"\nAll: {test_num}\nCorrect: {score}\nIncorrect: {test_num - score}")
|
||||
print(f"Rough Accuracy: {score / test_tags.shape[0]}")
|
||||
|
||||
|
||||
def test_demonstration():
|
||||
# change stdout
|
||||
print("\nStart plot,please wait!!!")
|
||||
print("\nStarting plot, please wait!")
|
||||
sys.stdout = open(os.devnull, "w")
|
||||
|
||||
ax1 = plt.subplot2grid((2, 2), (0, 0))
|
||||
ax2 = plt.subplot2grid((2, 2), (0, 1))
|
||||
ax3 = plt.subplot2grid((2, 2), (1, 0))
|
||||
ax4 = plt.subplot2grid((2, 2), (1, 1))
|
||||
ax1.set_title("linear svm,cost:0.1")
|
||||
ax1.set_title("Linear SVM, cost = 0.1")
|
||||
test_linear_kernel(ax1, cost=0.1)
|
||||
ax2.set_title("linear svm,cost:500")
|
||||
ax2.set_title("Linear SVM, cost = 500")
|
||||
test_linear_kernel(ax2, cost=500)
|
||||
ax3.set_title("rbf kernel svm,cost:0.1")
|
||||
ax3.set_title("RBF kernel SVM, cost = 0.1")
|
||||
test_rbf_kernel(ax3, cost=0.1)
|
||||
ax4.set_title("rbf kernel svm,cost:500")
|
||||
ax4.set_title("RBF kernel SVM, cost = 500")
|
||||
test_rbf_kernel(ax4, cost=500)
|
||||
|
||||
sys.stdout = sys.__stdout__
|
||||
print("Plot done!!!")
|
||||
print("Plot done!")
|
||||
|
||||
|
||||
def test_linear_kernel(ax, cost):
|
||||
|
@ -531,10 +532,10 @@ def test_linear_kernel(ax, cost):
|
|||
scaler = StandardScaler()
|
||||
train_x_scaled = scaler.fit_transform(train_x, train_y)
|
||||
train_data = np.hstack((train_y.reshape(500, 1), train_x_scaled))
|
||||
mykernel = Kernel(kernel="linear", degree=5, coef0=1, gamma=0.5)
|
||||
my_kernel = Kernel(kernel="linear", degree=5, coef0=1, gamma=0.5)
|
||||
mysvm = SmoSVM(
|
||||
train=train_data,
|
||||
kernel_func=mykernel,
|
||||
kernel_func=my_kernel,
|
||||
cost=cost,
|
||||
tolerance=0.001,
|
||||
auto_norm=False,
|
||||
|
@ -551,10 +552,10 @@ def test_rbf_kernel(ax, cost):
|
|||
scaler = StandardScaler()
|
||||
train_x_scaled = scaler.fit_transform(train_x, train_y)
|
||||
train_data = np.hstack((train_y.reshape(500, 1), train_x_scaled))
|
||||
mykernel = Kernel(kernel="rbf", degree=5, coef0=1, gamma=0.5)
|
||||
my_kernel = Kernel(kernel="rbf", degree=5, coef0=1, gamma=0.5)
|
||||
mysvm = SmoSVM(
|
||||
train=train_data,
|
||||
kernel_func=mykernel,
|
||||
kernel_func=my_kernel,
|
||||
cost=cost,
|
||||
tolerance=0.001,
|
||||
auto_norm=False,
|
||||
|
@ -567,11 +568,11 @@ def plot_partition_boundary(
|
|||
model, train_data, ax, resolution=100, colors=("b", "k", "r")
|
||||
):
|
||||
"""
|
||||
We can not get the optimum w of our kernel svm model which is different from linear
|
||||
svm. For this reason, we generate randomly distributed points with high desity and
|
||||
prediced values of these points are calculated by using our trained model. Then we
|
||||
could use this prediced values to draw contour map.
|
||||
And this contour map can represent svm's partition boundary.
|
||||
We cannot get the optimal w of our kernel SVM model, which is different from a
|
||||
linear SVM. For this reason, we generate randomly distributed points with high
|
||||
density, and predicted values of these points are calculated using our trained
|
||||
model. Then we could use this predicted values to draw contour map, and this contour
|
||||
map represents the SVM's partition boundary.
|
||||
"""
|
||||
train_data_x = train_data[:, 1]
|
||||
train_data_y = train_data[:, 2]
|
||||
|
@ -616,6 +617,6 @@ def plot_partition_boundary(
|
|||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_cancel_data()
|
||||
test_cancer_data()
|
||||
test_demonstration()
|
||||
plt.show()
|
||||
|
|
|
@ -1,33 +0,0 @@
|
|||
from __future__ import annotations
|
||||
|
||||
|
||||
def median_of_two_arrays(nums1: list[float], nums2: list[float]) -> float:
|
||||
"""
|
||||
>>> median_of_two_arrays([1, 2], [3])
|
||||
2
|
||||
>>> median_of_two_arrays([0, -1.1], [2.5, 1])
|
||||
0.5
|
||||
>>> median_of_two_arrays([], [2.5, 1])
|
||||
1.75
|
||||
>>> median_of_two_arrays([], [0])
|
||||
0
|
||||
>>> median_of_two_arrays([], [])
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
IndexError: list index out of range
|
||||
"""
|
||||
all_numbers = sorted(nums1 + nums2)
|
||||
div, mod = divmod(len(all_numbers), 2)
|
||||
if mod == 1:
|
||||
return all_numbers[div]
|
||||
else:
|
||||
return (all_numbers[div] + all_numbers[div - 1]) / 2
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import doctest
|
||||
|
||||
doctest.testmod()
|
||||
array_1 = [float(x) for x in input("Enter the elements of first array: ").split()]
|
||||
array_2 = [float(x) for x in input("Enter the elements of second array: ").split()]
|
||||
print(f"The median of two arrays is: {median_of_two_arrays(array_1, array_2)}")
|
143
physics/rainfall_intensity.py
Normal file
143
physics/rainfall_intensity.py
Normal file
|
@ -0,0 +1,143 @@
|
|||
"""
|
||||
Rainfall Intensity
|
||||
==================
|
||||
This module contains functions to calculate the intensity of
|
||||
a rainfall event for a given duration and return period.
|
||||
|
||||
This function uses the Sherman intensity-duration-frequency curve.
|
||||
|
||||
References
|
||||
----------
|
||||
- Aparicio, F. (1997): Fundamentos de Hidrología de Superficie.
|
||||
Balderas, México, Limusa. 303 p.
|
||||
- https://en.wikipedia.org/wiki/Intensity-duration-frequency_curve
|
||||
"""
|
||||
|
||||
|
||||
def rainfall_intensity(
|
||||
coefficient_k: float,
|
||||
coefficient_a: float,
|
||||
coefficient_b: float,
|
||||
coefficient_c: float,
|
||||
return_period: float,
|
||||
duration: float,
|
||||
) -> float:
|
||||
"""
|
||||
Calculate the intensity of a rainfall event for a given duration and return period.
|
||||
It's based on the Sherman intensity-duration-frequency curve:
|
||||
|
||||
I = k * T^a / (D + b)^c
|
||||
|
||||
where:
|
||||
I = Intensity of the rainfall event [mm/h]
|
||||
k, a, b, c = Coefficients obtained through statistical distribution adjust
|
||||
T = Return period in years
|
||||
D = Rainfall event duration in minutes
|
||||
|
||||
Parameters
|
||||
----------
|
||||
coefficient_k : float
|
||||
Coefficient obtained through statistical distribution adjust.
|
||||
coefficient_a : float
|
||||
Coefficient obtained through statistical distribution adjust.
|
||||
coefficient_b : float
|
||||
Coefficient obtained through statistical distribution adjust.
|
||||
coefficient_c : float
|
||||
Coefficient obtained through statistical distribution adjust.
|
||||
return_period : float
|
||||
Return period in years.
|
||||
duration : float
|
||||
Rainfall event duration in minutes.
|
||||
|
||||
Returns
|
||||
-------
|
||||
intensity : float
|
||||
Intensity of the rainfall event in mm/h.
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
If any of the parameters are not positive.
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
>>> rainfall_intensity(1000, 0.2, 11.6, 0.81, 10, 60)
|
||||
49.83339231138578
|
||||
|
||||
>>> rainfall_intensity(1000, 0.2, 11.6, 0.81, 10, 30)
|
||||
77.36319588106228
|
||||
|
||||
>>> rainfall_intensity(1000, 0.2, 11.6, 0.81, 5, 60)
|
||||
43.382487747633625
|
||||
|
||||
>>> rainfall_intensity(0, 0.2, 11.6, 0.81, 10, 60)
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: All parameters must be positive.
|
||||
|
||||
>>> rainfall_intensity(1000, -0.2, 11.6, 0.81, 10, 60)
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: All parameters must be positive.
|
||||
|
||||
>>> rainfall_intensity(1000, 0.2, -11.6, 0.81, 10, 60)
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: All parameters must be positive.
|
||||
|
||||
>>> rainfall_intensity(1000, 0.2, 11.6, -0.81, 10, 60)
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: All parameters must be positive.
|
||||
|
||||
>>> rainfall_intensity(1000, 0, 11.6, 0.81, 10, 60)
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: All parameters must be positive.
|
||||
|
||||
>>> rainfall_intensity(1000, 0.2, 0, 0.81, 10, 60)
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: All parameters must be positive.
|
||||
|
||||
>>> rainfall_intensity(1000, 0.2, 11.6, 0, 10, 60)
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: All parameters must be positive.
|
||||
|
||||
>>> rainfall_intensity(0, 0.2, 11.6, 0.81, 10, 60)
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: All parameters must be positive.
|
||||
|
||||
>>> rainfall_intensity(1000, 0.2, 11.6, 0.81, 0, 60)
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: All parameters must be positive.
|
||||
|
||||
>>> rainfall_intensity(1000, 0.2, 11.6, 0.81, 10, 0)
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: All parameters must be positive.
|
||||
|
||||
"""
|
||||
if (
|
||||
coefficient_k <= 0
|
||||
or coefficient_a <= 0
|
||||
or coefficient_b <= 0
|
||||
or coefficient_c <= 0
|
||||
or return_period <= 0
|
||||
or duration <= 0
|
||||
):
|
||||
raise ValueError("All parameters must be positive.")
|
||||
intensity = (coefficient_k * (return_period**coefficient_a)) / (
|
||||
(duration + coefficient_b) ** coefficient_c
|
||||
)
|
||||
return intensity
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import doctest
|
||||
|
||||
doctest.testmod()
|
|
@ -58,7 +58,7 @@ def upf_len(num: int) -> int:
|
|||
|
||||
def equality(iterable: list) -> bool:
|
||||
"""
|
||||
Check equality of ALL elements in an interable.
|
||||
Check equality of ALL elements in an iterable
|
||||
>>> equality([1, 2, 3, 4])
|
||||
False
|
||||
>>> equality([2, 2, 2, 2])
|
||||
|
|
192
pyproject.toml
192
pyproject.toml
|
@ -1,61 +1,49 @@
|
|||
[tool.ruff]
|
||||
lint.ignore = [ # `ruff rule S101` for a description of that rule
|
||||
"B904", # Within an `except` clause, raise exceptions with `raise ... from err` -- FIX ME
|
||||
"B905", # `zip()` without an explicit `strict=` parameter -- FIX ME
|
||||
"EM101", # Exception must not use a string literal, assign to variable first
|
||||
"EXE001", # Shebang is present but file is not executable -- DO NOT FIX
|
||||
"G004", # Logging statement uses f-string
|
||||
"PLC1901", # `{}` can be simplified to `{}` as an empty string is falsey
|
||||
"PLW060", # Using global for `{name}` but no assignment is done -- DO NOT FIX
|
||||
"PLW2901", # PLW2901: Redefined loop variable -- FIX ME
|
||||
"PT011", # `pytest.raises(Exception)` is too broad, set the `match` parameter or use a more specific exception
|
||||
"PT018", # Assertion should be broken down into multiple parts
|
||||
"S101", # Use of `assert` detected -- DO NOT FIX
|
||||
"S311", # Standard pseudo-random generators are not suitable for cryptographic purposes -- FIX ME
|
||||
"SLF001", # Private member accessed: `_Iterator` -- FIX ME
|
||||
"UP038", # Use `X | Y` in `{}` call instead of `(X, Y)` -- DO NOT FIX
|
||||
]
|
||||
lint.select = [ # https://beta.ruff.rs/docs/rules
|
||||
"A", # flake8-builtins
|
||||
"ARG", # flake8-unused-arguments
|
||||
"ASYNC", # flake8-async
|
||||
"B", # flake8-bugbear
|
||||
"BLE", # flake8-blind-except
|
||||
"C4", # flake8-comprehensions
|
||||
"C90", # McCabe cyclomatic complexity
|
||||
"DJ", # flake8-django
|
||||
"DTZ", # flake8-datetimez
|
||||
"E", # pycodestyle
|
||||
"EM", # flake8-errmsg
|
||||
"EXE", # flake8-executable
|
||||
"F", # Pyflakes
|
||||
"FA", # flake8-future-annotations
|
||||
"FLY", # flynt
|
||||
"G", # flake8-logging-format
|
||||
"I", # isort
|
||||
"ICN", # flake8-import-conventions
|
||||
"INP", # flake8-no-pep420
|
||||
"INT", # flake8-gettext
|
||||
"ISC", # flake8-implicit-str-concat
|
||||
"N", # pep8-naming
|
||||
"NPY", # NumPy-specific rules
|
||||
"PD", # pandas-vet
|
||||
"PGH", # pygrep-hooks
|
||||
"PIE", # flake8-pie
|
||||
"PL", # Pylint
|
||||
"PT", # flake8-pytest-style
|
||||
"PYI", # flake8-pyi
|
||||
"RSE", # flake8-raise
|
||||
"RUF", # Ruff-specific rules
|
||||
"S", # flake8-bandit
|
||||
"SIM", # flake8-simplify
|
||||
"SLF", # flake8-self
|
||||
"T10", # flake8-debugger
|
||||
"TD", # flake8-todos
|
||||
"TID", # flake8-tidy-imports
|
||||
"UP", # pyupgrade
|
||||
"W", # pycodestyle
|
||||
"YTT", # flake8-2020
|
||||
target-version = "py312"
|
||||
|
||||
output-format = "full"
|
||||
lint.select = [
|
||||
# https://beta.ruff.rs/docs/rules
|
||||
"A", # flake8-builtins
|
||||
"ARG", # flake8-unused-arguments
|
||||
"ASYNC", # flake8-async
|
||||
"B", # flake8-bugbear
|
||||
"BLE", # flake8-blind-except
|
||||
"C4", # flake8-comprehensions
|
||||
"C90", # McCabe cyclomatic complexity
|
||||
"DJ", # flake8-django
|
||||
"DTZ", # flake8-datetimez
|
||||
"E", # pycodestyle
|
||||
"EM", # flake8-errmsg
|
||||
"EXE", # flake8-executable
|
||||
"F", # Pyflakes
|
||||
"FA", # flake8-future-annotations
|
||||
"FLY", # flynt
|
||||
"G", # flake8-logging-format
|
||||
"I", # isort
|
||||
"ICN", # flake8-import-conventions
|
||||
"INP", # flake8-no-pep420
|
||||
"INT", # flake8-gettext
|
||||
"ISC", # flake8-implicit-str-concat
|
||||
"N", # pep8-naming
|
||||
"NPY", # NumPy-specific rules
|
||||
"PD", # pandas-vet
|
||||
"PGH", # pygrep-hooks
|
||||
"PIE", # flake8-pie
|
||||
"PL", # Pylint
|
||||
"PT", # flake8-pytest-style
|
||||
"PYI", # flake8-pyi
|
||||
"RSE", # flake8-raise
|
||||
"RUF", # Ruff-specific rules
|
||||
"S", # flake8-bandit
|
||||
"SIM", # flake8-simplify
|
||||
"SLF", # flake8-self
|
||||
"T10", # flake8-debugger
|
||||
"TD", # flake8-todos
|
||||
"TID", # flake8-tidy-imports
|
||||
"UP", # pyupgrade
|
||||
"W", # pycodestyle
|
||||
"YTT", # flake8-2020
|
||||
# "ANN", # flake8-annotations # FIX ME?
|
||||
# "COM", # flake8-commas
|
||||
# "D", # pydocstyle -- FIX ME?
|
||||
|
@ -68,30 +56,64 @@ lint.select = [ # https://beta.ruff.rs/docs/rules
|
|||
# "TCH", # flake8-type-checking
|
||||
# "TRY", # tryceratops
|
||||
]
|
||||
output-format = "full"
|
||||
target-version = "py312"
|
||||
lint.ignore = [
|
||||
# `ruff rule S101` for a description of that rule
|
||||
"B904", # Within an `except` clause, raise exceptions with `raise ... from err` -- FIX ME
|
||||
"B905", # `zip()` without an explicit `strict=` parameter -- FIX ME
|
||||
"EM101", # Exception must not use a string literal, assign to variable first
|
||||
"EXE001", # Shebang is present but file is not executable -- DO NOT FIX
|
||||
"G004", # Logging statement uses f-string
|
||||
"PLC1901", # `{}` can be simplified to `{}` as an empty string is falsey
|
||||
"PLW060", # Using global for `{name}` but no assignment is done -- DO NOT FIX
|
||||
"PLW2901", # PLW2901: Redefined loop variable -- FIX ME
|
||||
"PT011", # `pytest.raises(Exception)` is too broad, set the `match` parameter or use a more specific exception
|
||||
"PT018", # Assertion should be broken down into multiple parts
|
||||
"S101", # Use of `assert` detected -- DO NOT FIX
|
||||
"S311", # Standard pseudo-random generators are not suitable for cryptographic purposes -- FIX ME
|
||||
"SLF001", # Private member accessed: `_Iterator` -- FIX ME
|
||||
"UP038", # Use `X | Y` in `{}` call instead of `(X, Y)` -- DO NOT FIX
|
||||
]
|
||||
|
||||
[tool.ruff.lint.mccabe] # DO NOT INCREASE THIS VALUE
|
||||
max-complexity = 17 # default: 10
|
||||
|
||||
[tool.ruff.lint.per-file-ignores]
|
||||
"arithmetic_analysis/newton_raphson.py" = ["PGH001"]
|
||||
"data_structures/binary_tree/binary_search_tree_recursive.py" = ["BLE001"]
|
||||
"data_structures/hashing/tests/test_hash_map.py" = ["BLE001"]
|
||||
"hashes/enigma_machine.py" = ["BLE001"]
|
||||
"machine_learning/sequential_minimum_optimization.py" = ["SIM115"]
|
||||
"matrix/sherman_morrison.py" = ["SIM103"]
|
||||
"other/l*u_cache.py" = ["RUF012"]
|
||||
"physics/newtons_second_law_of_motion.py" = ["BLE001"]
|
||||
"project_euler/problem_099/sol1.py" = ["SIM115"]
|
||||
"sorts/external_sort.py" = ["SIM115"]
|
||||
|
||||
[tool.ruff.lint.pylint] # DO NOT INCREASE THESE VALUES
|
||||
allow-magic-value-types = ["float", "int", "str"]
|
||||
max-args = 10 # default: 5
|
||||
max-branches = 20 # default: 12
|
||||
max-returns = 8 # default: 6
|
||||
max-statements = 88 # default: 50
|
||||
lint.per-file-ignores."arithmetic_analysis/newton_raphson.py" = [
|
||||
"PGH001",
|
||||
]
|
||||
lint.per-file-ignores."data_structures/binary_tree/binary_search_tree_recursive.py" = [
|
||||
"BLE001",
|
||||
]
|
||||
lint.per-file-ignores."data_structures/hashing/tests/test_hash_map.py" = [
|
||||
"BLE001",
|
||||
]
|
||||
lint.per-file-ignores."hashes/enigma_machine.py" = [
|
||||
"BLE001",
|
||||
]
|
||||
lint.per-file-ignores."machine_learning/sequential_minimum_optimization.py" = [
|
||||
"SIM115",
|
||||
]
|
||||
lint.per-file-ignores."matrix/sherman_morrison.py" = [
|
||||
"SIM103",
|
||||
]
|
||||
lint.per-file-ignores."other/l*u_cache.py" = [
|
||||
"RUF012",
|
||||
]
|
||||
lint.per-file-ignores."physics/newtons_second_law_of_motion.py" = [
|
||||
"BLE001",
|
||||
]
|
||||
lint.per-file-ignores."project_euler/problem_099/sol1.py" = [
|
||||
"SIM115",
|
||||
]
|
||||
lint.per-file-ignores."sorts/external_sort.py" = [
|
||||
"SIM115",
|
||||
]
|
||||
lint.mccabe.max-complexity = 17 # default: 10
|
||||
lint.pylint.allow-magic-value-types = [
|
||||
"float",
|
||||
"int",
|
||||
"str",
|
||||
]
|
||||
lint.pylint.max-args = 10 # default: 5
|
||||
lint.pylint.max-branches = 20 # default: 12
|
||||
lint.pylint.max-returns = 8 # default: 6
|
||||
lint.pylint.max-statements = 88 # default: 50
|
||||
|
||||
[tool.codespell]
|
||||
ignore-words-list = "3rt,ans,bitap,crate,damon,fo,followings,hist,iff,kwanza,manuel,mater,secant,som,sur,tim,toi,zar"
|
||||
|
@ -99,17 +121,17 @@ skip = "./.*,*.json,ciphers/prehistoric_men.txt,project_euler/problem_022/p022_n
|
|||
|
||||
[tool.pytest.ini_options]
|
||||
markers = [
|
||||
"mat_ops: mark a test as utilizing matrix operations.",
|
||||
"mat_ops: mark a test as utilizing matrix operations.",
|
||||
]
|
||||
addopts = [
|
||||
"--durations=10",
|
||||
"--doctest-modules",
|
||||
"--showlocals",
|
||||
"--durations=10",
|
||||
"--doctest-modules",
|
||||
"--showlocals",
|
||||
]
|
||||
|
||||
[tool.coverage.report]
|
||||
omit = [
|
||||
".env/*",
|
||||
"project_euler/*"
|
||||
"project_euler/*",
|
||||
]
|
||||
sort = "Cover"
|
||||
|
|
Loading…
Reference in New Issue
Block a user