mirror of
https://github.com/TheAlgorithms/Python.git
synced 2025-02-25 10:28:39 +00:00
Compare commits
10 Commits
dd3e69e0a6
...
64bbb725da
Author | SHA1 | Date | |
---|---|---|---|
|
64bbb725da | ||
|
5cac60ab90 | ||
|
ea284221eb | ||
|
50d8ef8525 | ||
|
0050c3fad5 | ||
|
7dee330c83 | ||
|
945803f65d | ||
|
5f7819e1cd | ||
|
72c7b05caa | ||
|
a207187ddb |
@ -710,6 +710,7 @@
|
||||
* [2 Hidden Layers Neural Network](neural_network/2_hidden_layers_neural_network.py)
|
||||
* Activation Functions
|
||||
* [Exponential Linear Unit](neural_network/activation_functions/exponential_linear_unit.py)
|
||||
* [Leaky Rectified Linear Unit](neural_network/activation_functions/leaky_rectified_linear_unit.py)
|
||||
* [Back Propagation Neural Network](neural_network/back_propagation_neural_network.py)
|
||||
* [Convolution Neural Network](neural_network/convolution_neural_network.py)
|
||||
* [Perceptron](neural_network/perceptron.py)
|
||||
@ -1212,6 +1213,7 @@
|
||||
* [Daily Horoscope](web_programming/daily_horoscope.py)
|
||||
* [Download Images From Google Query](web_programming/download_images_from_google_query.py)
|
||||
* [Emails From Url](web_programming/emails_from_url.py)
|
||||
* [Fetch Anime And Play](web_programming/fetch_anime_and_play.py)
|
||||
* [Fetch Bbc News](web_programming/fetch_bbc_news.py)
|
||||
* [Fetch Github Info](web_programming/fetch_github_info.py)
|
||||
* [Fetch Jobs](web_programming/fetch_jobs.py)
|
||||
@ -1220,6 +1222,7 @@
|
||||
* [Get Amazon Product Data](web_programming/get_amazon_product_data.py)
|
||||
* [Get Imdb Top 250 Movies Csv](web_programming/get_imdb_top_250_movies_csv.py)
|
||||
* [Get Imdbtop](web_programming/get_imdbtop.py)
|
||||
* [Get Top Billionaires](web_programming/get_top_billionaires.py)
|
||||
* [Get Top Hn Posts](web_programming/get_top_hn_posts.py)
|
||||
* [Get User Tweets](web_programming/get_user_tweets.py)
|
||||
* [Giphy](web_programming/giphy.py)
|
||||
|
@ -20,40 +20,60 @@ import numpy as np
|
||||
class Tableau:
|
||||
"""Operate on simplex tableaus
|
||||
|
||||
>>> t = Tableau(np.array([[-1,-1,0,0,-1],[1,3,1,0,4],[3,1,0,1,4.]]), 2)
|
||||
>>> Tableau(np.array([[-1,-1,0,0,1],[1,3,1,0,4],[3,1,0,1,4]]), 2, 2)
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
TypeError: Tableau must have type float64
|
||||
|
||||
>>> Tableau(np.array([[-1,-1,0,0,-1],[1,3,1,0,4],[3,1,0,1,4.]]), 2, 2)
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: RHS must be > 0
|
||||
|
||||
>>> Tableau(np.array([[-1,-1,0,0,1],[1,3,1,0,4],[3,1,0,1,4.]]), -2, 2)
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: number of (artificial) variables must be a natural number
|
||||
"""
|
||||
|
||||
def __init__(self, tableau: np.ndarray, n_vars: int) -> None:
|
||||
# Max iteration number to prevent cycling
|
||||
maxiter = 100
|
||||
|
||||
def __init__(
|
||||
self, tableau: np.ndarray, n_vars: int, n_artificial_vars: int
|
||||
) -> None:
|
||||
if tableau.dtype != "float64":
|
||||
raise TypeError("Tableau must have type float64")
|
||||
|
||||
# Check if RHS is negative
|
||||
if np.any(tableau[:, -1], where=tableau[:, -1] < 0):
|
||||
if not (tableau[:, -1] >= 0).all():
|
||||
raise ValueError("RHS must be > 0")
|
||||
|
||||
if n_vars < 2 or n_artificial_vars < 0:
|
||||
raise ValueError(
|
||||
"number of (artificial) variables must be a natural number"
|
||||
)
|
||||
|
||||
self.tableau = tableau
|
||||
self.n_rows, _ = tableau.shape
|
||||
self.n_rows, n_cols = tableau.shape
|
||||
|
||||
# Number of decision variables x1, x2, x3...
|
||||
self.n_vars = n_vars
|
||||
|
||||
# Number of artificial variables to be minimised
|
||||
self.n_art_vars = len(np.where(tableau[self.n_vars : -1] == -1)[0])
|
||||
self.n_vars, self.n_artificial_vars = n_vars, n_artificial_vars
|
||||
|
||||
# 2 if there are >= or == constraints (nonstandard), 1 otherwise (std)
|
||||
self.n_stages = (self.n_art_vars > 0) + 1
|
||||
self.n_stages = (self.n_artificial_vars > 0) + 1
|
||||
|
||||
# Number of slack variables added to make inequalities into equalities
|
||||
self.n_slack = self.n_rows - self.n_stages
|
||||
self.n_slack = n_cols - self.n_vars - self.n_artificial_vars - 1
|
||||
|
||||
# Objectives for each stage
|
||||
self.objectives = ["max"]
|
||||
|
||||
# In two stage simplex, first minimise then maximise
|
||||
if self.n_art_vars:
|
||||
if self.n_artificial_vars:
|
||||
self.objectives.append("min")
|
||||
|
||||
self.col_titles = [""]
|
||||
self.col_titles = self.generate_col_titles()
|
||||
|
||||
# Index of current pivot row and column
|
||||
self.row_idx = None
|
||||
@ -62,48 +82,39 @@ class Tableau:
|
||||
# Does objective row only contain (non)-negative values?
|
||||
self.stop_iter = False
|
||||
|
||||
@staticmethod
|
||||
def generate_col_titles(*args: int) -> list[str]:
|
||||
def generate_col_titles(self) -> list[str]:
|
||||
"""Generate column titles for tableau of specific dimensions
|
||||
|
||||
>>> Tableau.generate_col_titles(2, 3, 1)
|
||||
['x1', 'x2', 's1', 's2', 's3', 'a1', 'RHS']
|
||||
>>> Tableau(np.array([[-1,-1,0,0,1],[1,3,1,0,4],[3,1,0,1,4.]]),
|
||||
... 2, 0).generate_col_titles()
|
||||
['x1', 'x2', 's1', 's2', 'RHS']
|
||||
|
||||
>>> Tableau.generate_col_titles()
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: Must provide n_vars, n_slack, and n_art_vars
|
||||
>>> Tableau.generate_col_titles(-2, 3, 1)
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: All arguments must be non-negative integers
|
||||
>>> Tableau(np.array([[-1,-1,0,0,1],[1,3,1,0,4],[3,1,0,1,4.]]),
|
||||
... 2, 2).generate_col_titles()
|
||||
['x1', 'x2', 'RHS']
|
||||
"""
|
||||
if len(args) != 3:
|
||||
raise ValueError("Must provide n_vars, n_slack, and n_art_vars")
|
||||
args = (self.n_vars, self.n_slack)
|
||||
|
||||
if not all(x >= 0 and isinstance(x, int) for x in args):
|
||||
raise ValueError("All arguments must be non-negative integers")
|
||||
|
||||
# decision | slack | artificial
|
||||
string_starts = ["x", "s", "a"]
|
||||
# decision | slack
|
||||
string_starts = ["x", "s"]
|
||||
titles = []
|
||||
for i in range(3):
|
||||
for i in range(2):
|
||||
for j in range(args[i]):
|
||||
titles.append(string_starts[i] + str(j + 1))
|
||||
titles.append("RHS")
|
||||
return titles
|
||||
|
||||
def find_pivot(self, tableau: np.ndarray) -> tuple[Any, Any]:
|
||||
def find_pivot(self) -> tuple[Any, Any]:
|
||||
"""Finds the pivot row and column.
|
||||
>>> t = Tableau(np.array([[-2,1,0,0,0], [3,1,1,0,6], [1,2,0,1,7.]]), 2)
|
||||
>>> t.find_pivot(t.tableau)
|
||||
>>> Tableau(np.array([[-2,1,0,0,0], [3,1,1,0,6], [1,2,0,1,7.]]),
|
||||
... 2, 0).find_pivot()
|
||||
(1, 0)
|
||||
"""
|
||||
objective = self.objectives[-1]
|
||||
|
||||
# Find entries of highest magnitude in objective rows
|
||||
sign = (objective == "min") - (objective == "max")
|
||||
col_idx = np.argmax(sign * tableau[0, : self.n_vars])
|
||||
col_idx = np.argmax(sign * self.tableau[0, :-1])
|
||||
|
||||
# Choice is only valid if below 0 for maximise, and above for minimise
|
||||
if sign * self.tableau[0, col_idx] <= 0:
|
||||
@ -117,15 +128,15 @@ class Tableau:
|
||||
s = slice(self.n_stages, self.n_rows)
|
||||
|
||||
# RHS
|
||||
dividend = tableau[s, -1]
|
||||
dividend = self.tableau[s, -1]
|
||||
|
||||
# Elements of pivot column within slice
|
||||
divisor = tableau[s, col_idx]
|
||||
divisor = self.tableau[s, col_idx]
|
||||
|
||||
# Array filled with nans
|
||||
nans = np.full(self.n_rows - self.n_stages, np.nan)
|
||||
|
||||
# If element in pivot column is greater than zeron_stages, return
|
||||
# If element in pivot column is greater than zero, return
|
||||
# quotient or nan otherwise
|
||||
quotients = np.divide(dividend, divisor, out=nans, where=divisor > 0)
|
||||
|
||||
@ -134,18 +145,18 @@ class Tableau:
|
||||
row_idx = np.nanargmin(quotients) + self.n_stages
|
||||
return row_idx, col_idx
|
||||
|
||||
def pivot(self, tableau: np.ndarray, row_idx: int, col_idx: int) -> np.ndarray:
|
||||
def pivot(self, row_idx: int, col_idx: int) -> np.ndarray:
|
||||
"""Pivots on value on the intersection of pivot row and column.
|
||||
|
||||
>>> t = Tableau(np.array([[-2,-3,0,0,0],[1,3,1,0,4],[3,1,0,1,4.]]), 2)
|
||||
>>> t.pivot(t.tableau, 1, 0).tolist()
|
||||
>>> Tableau(np.array([[-2,-3,0,0,0],[1,3,1,0,4],[3,1,0,1,4.]]),
|
||||
... 2, 2).pivot(1, 0).tolist()
|
||||
... # doctest: +NORMALIZE_WHITESPACE
|
||||
[[0.0, 3.0, 2.0, 0.0, 8.0],
|
||||
[1.0, 3.0, 1.0, 0.0, 4.0],
|
||||
[0.0, -8.0, -3.0, 1.0, -8.0]]
|
||||
"""
|
||||
# Avoid changes to original tableau
|
||||
piv_row = tableau[row_idx].copy()
|
||||
piv_row = self.tableau[row_idx].copy()
|
||||
|
||||
piv_val = piv_row[col_idx]
|
||||
|
||||
@ -153,48 +164,47 @@ class Tableau:
|
||||
piv_row *= 1 / piv_val
|
||||
|
||||
# Variable in pivot column becomes basic, ie the only non-zero entry
|
||||
for idx, coeff in enumerate(tableau[:, col_idx]):
|
||||
tableau[idx] += -coeff * piv_row
|
||||
tableau[row_idx] = piv_row
|
||||
return tableau
|
||||
for idx, coeff in enumerate(self.tableau[:, col_idx]):
|
||||
self.tableau[idx] += -coeff * piv_row
|
||||
self.tableau[row_idx] = piv_row
|
||||
return self.tableau
|
||||
|
||||
def change_stage(self, tableau: np.ndarray) -> np.ndarray:
|
||||
def change_stage(self) -> np.ndarray:
|
||||
"""Exits first phase of the two-stage method by deleting artificial
|
||||
rows and columns, or completes the algorithm if exiting the standard
|
||||
case.
|
||||
|
||||
>>> t = Tableau(np.array([
|
||||
>>> Tableau(np.array([
|
||||
... [3, 3, -1, -1, 0, 0, 4],
|
||||
... [2, 1, 0, 0, 0, 0, 0.],
|
||||
... [1, 2, -1, 0, 1, 0, 2],
|
||||
... [2, 1, 0, -1, 0, 1, 2]
|
||||
... ]), 2)
|
||||
>>> t.change_stage(t.tableau).tolist()
|
||||
... ]), 2, 2).change_stage().tolist()
|
||||
... # doctest: +NORMALIZE_WHITESPACE
|
||||
[[2.0, 1.0, 0.0, 0.0, 0.0, 0.0],
|
||||
[1.0, 2.0, -1.0, 0.0, 1.0, 2.0],
|
||||
[2.0, 1.0, 0.0, -1.0, 0.0, 2.0]]
|
||||
[[2.0, 1.0, 0.0, 0.0, 0.0],
|
||||
[1.0, 2.0, -1.0, 0.0, 2.0],
|
||||
[2.0, 1.0, 0.0, -1.0, 2.0]]
|
||||
"""
|
||||
# Objective of original objective row remains
|
||||
self.objectives.pop()
|
||||
|
||||
if not self.objectives:
|
||||
return tableau
|
||||
return self.tableau
|
||||
|
||||
# Slice containing ids for artificial columns
|
||||
s = slice(-self.n_art_vars - 1, -1)
|
||||
s = slice(-self.n_artificial_vars - 1, -1)
|
||||
|
||||
# Delete the artificial variable columns
|
||||
tableau = np.delete(tableau, s, axis=1)
|
||||
self.tableau = np.delete(self.tableau, s, axis=1)
|
||||
|
||||
# Delete the objective row of the first stage
|
||||
tableau = np.delete(tableau, 0, axis=0)
|
||||
self.tableau = np.delete(self.tableau, 0, axis=0)
|
||||
|
||||
self.n_stages = 1
|
||||
self.n_rows -= 1
|
||||
self.n_art_vars = 0
|
||||
self.n_artificial_vars = 0
|
||||
self.stop_iter = False
|
||||
return tableau
|
||||
return self.tableau
|
||||
|
||||
def run_simplex(self) -> dict[Any, Any]:
|
||||
"""Operate on tableau until objective function cannot be
|
||||
@ -205,15 +215,29 @@ class Tableau:
|
||||
ST: x1 + 3x2 <= 4
|
||||
3x1 + x2 <= 4
|
||||
>>> Tableau(np.array([[-1,-1,0,0,0],[1,3,1,0,4],[3,1,0,1,4.]]),
|
||||
... 2).run_simplex()
|
||||
... 2, 0).run_simplex()
|
||||
{'P': 2.0, 'x1': 1.0, 'x2': 1.0}
|
||||
|
||||
# Standard linear program with 3 variables:
|
||||
Max: 3x1 + x2 + 3x3
|
||||
ST: 2x1 + x2 + x3 ≤ 2
|
||||
x1 + 2x2 + 3x3 ≤ 5
|
||||
2x1 + 2x2 + x3 ≤ 6
|
||||
>>> Tableau(np.array([
|
||||
... [-3,-1,-3,0,0,0,0],
|
||||
... [2,1,1,1,0,0,2],
|
||||
... [1,2,3,0,1,0,5],
|
||||
... [2,2,1,0,0,1,6.]
|
||||
... ]),3,0).run_simplex() # doctest: +ELLIPSIS
|
||||
{'P': 5.4, 'x1': 0.199..., 'x3': 1.6}
|
||||
|
||||
|
||||
# Optimal tableau input:
|
||||
>>> Tableau(np.array([
|
||||
... [0, 0, 0.25, 0.25, 2],
|
||||
... [0, 1, 0.375, -0.125, 1],
|
||||
... [1, 0, -0.125, 0.375, 1]
|
||||
... ]), 2).run_simplex()
|
||||
... ]), 2, 0).run_simplex()
|
||||
{'P': 2.0, 'x1': 1.0, 'x2': 1.0}
|
||||
|
||||
# Non-standard: >= constraints
|
||||
@ -227,7 +251,7 @@ class Tableau:
|
||||
... [1, 1, 1, 1, 0, 0, 0, 0, 40],
|
||||
... [2, 1, -1, 0, -1, 0, 1, 0, 10],
|
||||
... [0, -1, 1, 0, 0, -1, 0, 1, 10.]
|
||||
... ]), 3).run_simplex()
|
||||
... ]), 3, 2).run_simplex()
|
||||
{'P': 70.0, 'x1': 10.0, 'x2': 10.0, 'x3': 20.0}
|
||||
|
||||
# Non standard: minimisation and equalities
|
||||
@ -235,73 +259,76 @@ class Tableau:
|
||||
ST: 2x1 + x2 = 12
|
||||
6x1 + 5x2 = 40
|
||||
>>> Tableau(np.array([
|
||||
... [8, 6, 0, -1, 0, -1, 0, 0, 52],
|
||||
... [1, 1, 0, 0, 0, 0, 0, 0, 0],
|
||||
... [2, 1, 1, 0, 0, 0, 0, 0, 12],
|
||||
... [2, 1, 0, -1, 0, 0, 1, 0, 12],
|
||||
... [6, 5, 0, 0, 1, 0, 0, 0, 40],
|
||||
... [6, 5, 0, 0, 0, -1, 0, 1, 40.]
|
||||
... ]), 2).run_simplex()
|
||||
... [8, 6, 0, 0, 52],
|
||||
... [1, 1, 0, 0, 0],
|
||||
... [2, 1, 1, 0, 12],
|
||||
... [6, 5, 0, 1, 40.],
|
||||
... ]), 2, 2).run_simplex()
|
||||
{'P': 7.0, 'x1': 5.0, 'x2': 2.0}
|
||||
|
||||
|
||||
# Pivot on slack variables
|
||||
Max: 8x1 + 6x2
|
||||
ST: x1 + 3x2 <= 33
|
||||
4x1 + 2x2 <= 48
|
||||
2x1 + 4x2 <= 48
|
||||
x1 + x2 >= 10
|
||||
x1 >= 2
|
||||
>>> Tableau(np.array([
|
||||
... [2, 1, 0, 0, 0, -1, -1, 0, 0, 12.0],
|
||||
... [-8, -6, 0, 0, 0, 0, 0, 0, 0, 0.0],
|
||||
... [1, 3, 1, 0, 0, 0, 0, 0, 0, 33.0],
|
||||
... [4, 2, 0, 1, 0, 0, 0, 0, 0, 60.0],
|
||||
... [2, 4, 0, 0, 1, 0, 0, 0, 0, 48.0],
|
||||
... [1, 1, 0, 0, 0, -1, 0, 1, 0, 10.0],
|
||||
... [1, 0, 0, 0, 0, 0, -1, 0, 1, 2.0]
|
||||
... ]), 2, 2).run_simplex() # doctest: +ELLIPSIS
|
||||
{'P': 132.0, 'x1': 12.000... 'x2': 5.999...}
|
||||
"""
|
||||
# Stop simplex algorithm from cycling.
|
||||
for _ in range(100):
|
||||
for _ in range(Tableau.maxiter):
|
||||
# Completion of each stage removes an objective. If both stages
|
||||
# are complete, then no objectives are left
|
||||
if not self.objectives:
|
||||
self.col_titles = self.generate_col_titles(
|
||||
self.n_vars, self.n_slack, self.n_art_vars
|
||||
)
|
||||
|
||||
# Find the values of each variable at optimal solution
|
||||
return self.interpret_tableau(self.tableau, self.col_titles)
|
||||
return self.interpret_tableau()
|
||||
|
||||
row_idx, col_idx = self.find_pivot(self.tableau)
|
||||
row_idx, col_idx = self.find_pivot()
|
||||
|
||||
# If there are no more negative values in objective row
|
||||
if self.stop_iter:
|
||||
# Delete artificial variable columns and rows. Update attributes
|
||||
self.tableau = self.change_stage(self.tableau)
|
||||
self.tableau = self.change_stage()
|
||||
else:
|
||||
self.tableau = self.pivot(self.tableau, row_idx, col_idx)
|
||||
self.tableau = self.pivot(row_idx, col_idx)
|
||||
return {}
|
||||
|
||||
def interpret_tableau(
|
||||
self, tableau: np.ndarray, col_titles: list[str]
|
||||
) -> dict[str, float]:
|
||||
def interpret_tableau(self) -> dict[str, float]:
|
||||
"""Given the final tableau, add the corresponding values of the basic
|
||||
decision variables to the `output_dict`
|
||||
>>> tableau = np.array([
|
||||
>>> Tableau(np.array([
|
||||
... [0,0,0.875,0.375,5],
|
||||
... [0,1,0.375,-0.125,1],
|
||||
... [1,0,-0.125,0.375,1]
|
||||
... ])
|
||||
>>> t = Tableau(tableau, 2)
|
||||
>>> t.interpret_tableau(tableau, ["x1", "x2", "s1", "s2", "RHS"])
|
||||
... ]),2, 0).interpret_tableau()
|
||||
{'P': 5.0, 'x1': 1.0, 'x2': 1.0}
|
||||
"""
|
||||
# P = RHS of final tableau
|
||||
output_dict = {"P": abs(tableau[0, -1])}
|
||||
output_dict = {"P": abs(self.tableau[0, -1])}
|
||||
|
||||
for i in range(self.n_vars):
|
||||
# Gives ids of nonzero entries in the ith column
|
||||
nonzero = np.nonzero(tableau[:, i])
|
||||
# Gives indices of nonzero entries in the ith column
|
||||
nonzero = np.nonzero(self.tableau[:, i])
|
||||
n_nonzero = len(nonzero[0])
|
||||
|
||||
# First entry in the nonzero ids
|
||||
# First entry in the nonzero indices
|
||||
nonzero_rowidx = nonzero[0][0]
|
||||
nonzero_val = tableau[nonzero_rowidx, i]
|
||||
nonzero_val = self.tableau[nonzero_rowidx, i]
|
||||
|
||||
# If there is only one nonzero value in column, which is one
|
||||
if n_nonzero == nonzero_val == 1:
|
||||
rhs_val = tableau[nonzero_rowidx, -1]
|
||||
output_dict[col_titles[i]] = rhs_val
|
||||
|
||||
# Check for basic variables
|
||||
for title in col_titles:
|
||||
# Don't add RHS or slack variables to output dict
|
||||
if title[0] not in "R-s-a":
|
||||
output_dict.setdefault(title, 0)
|
||||
if n_nonzero == 1 and nonzero_val == 1:
|
||||
rhs_val = self.tableau[nonzero_rowidx, -1]
|
||||
output_dict[self.col_titles[i]] = rhs_val
|
||||
return output_dict
|
||||
|
||||
|
||||
|
@ -10,11 +10,11 @@ Inputs:
|
||||
- k , number of clusters to create.
|
||||
- initial_centroids , initial centroid values generated by utility function(mentioned
|
||||
in usage).
|
||||
- maxiter , maximum number of iterations to process.
|
||||
- heterogeneity , empty list that will be filled with hetrogeneity values if passed
|
||||
- maxiter , the maximum number of iterations to process.
|
||||
- heterogeneity, empty list that will be filled with heterogeneity values if passed
|
||||
to kmeans func.
|
||||
Usage:
|
||||
1. define 'k' value, 'X' features array and 'hetrogeneity' empty list
|
||||
1. define 'k' value, 'X' features array and 'heterogeneity' empty list
|
||||
2. create initial_centroids,
|
||||
initial_centroids = get_initial_centroids(
|
||||
X,
|
||||
@ -31,8 +31,8 @@ Usage:
|
||||
record_heterogeneity=heterogeneity,
|
||||
verbose=True # whether to print logs in console or not.(default=False)
|
||||
)
|
||||
4. Plot the loss function, hetrogeneity values for every iteration saved in
|
||||
hetrogeneity list.
|
||||
4. Plot the loss function, heterogeneity values for every iteration saved in
|
||||
heterogeneity list.
|
||||
plot_heterogeneity(
|
||||
heterogeneity,
|
||||
k
|
||||
@ -47,6 +47,7 @@ import pandas as pd
|
||||
from matplotlib import pyplot as plt
|
||||
from sklearn.metrics import pairwise_distances
|
||||
|
||||
|
||||
warnings.filterwarnings("ignore")
|
||||
|
||||
TAG = "K-MEANS-CLUST/ "
|
||||
@ -131,7 +132,7 @@ def kmeans(
|
||||
"""This function runs k-means on given data and initial set of centroids.
|
||||
maxiter: maximum number of iterations to run.(default=500)
|
||||
record_heterogeneity: (optional) a list, to store the history of heterogeneity
|
||||
as function of iterations
|
||||
as function of iterations.
|
||||
if None, do not store the history.
|
||||
verbose: if True, print how many data points changed their cluster labels in
|
||||
each iteration"""
|
||||
@ -198,10 +199,10 @@ def report_generator(
|
||||
df: pd.DataFrame, clustering_variables: np.ndarray, fill_missing_report=None
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Function generates easy-erading clustering report. It takes 2 arguments as an input:
|
||||
DataFrame - dataframe with predicted cluester column;
|
||||
FillMissingReport - dictionary of rules how we are going to fill missing
|
||||
values of for final report generate (not included in modeling);
|
||||
Function generates an easy-reading clustering report. It takes 3 arguments as input:
|
||||
DataFrame,predicted cluster column,
|
||||
FillMissingReport - dictionary of rules on how we are going to fill in missing
|
||||
values of for final report generate (not included in modelling);
|
||||
in order to run the function following libraries must be imported:
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
@ -306,10 +307,10 @@ def report_generator(
|
||||
a.columns = report.columns # rename columns to match report
|
||||
report = report.drop(
|
||||
report[report.Type == "count"].index
|
||||
) # drop count values except cluster size
|
||||
) # drop count values except for cluster size
|
||||
report = pd.concat(
|
||||
[report, a, clustersize, clusterproportion], axis=0
|
||||
) # concat report with clustert size and nan values
|
||||
[report, a, cluster size, clusterproportion], axis=0
|
||||
) # concat report with cluster size and nan values
|
||||
report["Mark"] = report["Features"].isin(clustering_variables)
|
||||
cols = report.columns.tolist()
|
||||
cols = cols[0:2] + cols[-1:] + cols[2:-1]
|
||||
|
@ -30,7 +30,7 @@ Source: https://en.wikipedia.org/wiki/Bucket_sort
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
def bucket_sort(my_list: list) -> list:
|
||||
def bucket_sort(my_list: list, bucket_count: int = 10) -> list:
|
||||
"""
|
||||
>>> data = [-1, 2, -5, 0]
|
||||
>>> bucket_sort(data) == sorted(data)
|
||||
@ -43,21 +43,27 @@ def bucket_sort(my_list: list) -> list:
|
||||
True
|
||||
>>> bucket_sort([]) == sorted([])
|
||||
True
|
||||
>>> data = [-1e10, 1e10]
|
||||
>>> bucket_sort(data) == sorted(data)
|
||||
True
|
||||
>>> import random
|
||||
>>> collection = random.sample(range(-50, 50), 50)
|
||||
>>> bucket_sort(collection) == sorted(collection)
|
||||
True
|
||||
"""
|
||||
if len(my_list) == 0:
|
||||
|
||||
if len(my_list) == 0 or bucket_count <= 0:
|
||||
return []
|
||||
|
||||
min_value, max_value = min(my_list), max(my_list)
|
||||
bucket_count = int(max_value - min_value) + 1
|
||||
bucket_size = (max_value - min_value) / bucket_count
|
||||
buckets: list[list] = [[] for _ in range(bucket_count)]
|
||||
|
||||
for i in my_list:
|
||||
buckets[int(i - min_value)].append(i)
|
||||
for val in my_list:
|
||||
index = min(int((val - min_value) / bucket_size), bucket_count - 1)
|
||||
buckets[index].append(val)
|
||||
|
||||
return [v for bucket in buckets for v in sorted(bucket)]
|
||||
return [val for bucket in buckets for val in sorted(bucket)]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
@ -1,7 +1,5 @@
|
||||
from xml.dom import NotFoundErr
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup, NavigableString
|
||||
from bs4 import BeautifulSoup, NavigableString, Tag
|
||||
from fake_useragent import UserAgent
|
||||
|
||||
BASE_URL = "https://ww1.gogoanime2.org"
|
||||
@ -41,25 +39,23 @@ def search_scraper(anime_name: str) -> list:
|
||||
|
||||
# get list of anime
|
||||
anime_ul = soup.find("ul", {"class": "items"})
|
||||
if anime_ul is None or isinstance(anime_ul, NavigableString):
|
||||
msg = f"Could not find and anime with name {anime_name}"
|
||||
raise ValueError(msg)
|
||||
anime_li = anime_ul.children
|
||||
|
||||
# for each anime, insert to list. the name and url.
|
||||
anime_list = []
|
||||
for anime in anime_li:
|
||||
if not isinstance(anime, NavigableString):
|
||||
try:
|
||||
anime_url, anime_title = (
|
||||
anime.find("a")["href"],
|
||||
anime.find("a")["title"],
|
||||
)
|
||||
anime_list.append(
|
||||
{
|
||||
"title": anime_title,
|
||||
"url": anime_url,
|
||||
}
|
||||
)
|
||||
except (NotFoundErr, KeyError):
|
||||
pass
|
||||
if isinstance(anime, Tag):
|
||||
anime_url = anime.find("a")
|
||||
if anime_url is None or isinstance(anime_url, NavigableString):
|
||||
continue
|
||||
anime_title = anime.find("a")
|
||||
if anime_title is None or isinstance(anime_title, NavigableString):
|
||||
continue
|
||||
|
||||
anime_list.append({"title": anime_title["title"], "url": anime_url["href"]})
|
||||
|
||||
return anime_list
|
||||
|
||||
@ -93,22 +89,24 @@ def search_anime_episode_list(episode_endpoint: str) -> list:
|
||||
|
||||
# With this id. get the episode list.
|
||||
episode_page_ul = soup.find("ul", {"id": "episode_related"})
|
||||
if episode_page_ul is None or isinstance(episode_page_ul, NavigableString):
|
||||
msg = f"Could not find any anime eposiodes with name {anime_name}"
|
||||
raise ValueError(msg)
|
||||
episode_page_li = episode_page_ul.children
|
||||
|
||||
episode_list = []
|
||||
for episode in episode_page_li:
|
||||
try:
|
||||
if not isinstance(episode, NavigableString):
|
||||
episode_list.append(
|
||||
{
|
||||
"title": episode.find("div", {"class": "name"}).text.replace(
|
||||
" ", ""
|
||||
),
|
||||
"url": episode.find("a")["href"],
|
||||
}
|
||||
)
|
||||
except (KeyError, NotFoundErr):
|
||||
pass
|
||||
if isinstance(episode, Tag):
|
||||
url = episode.find("a")
|
||||
if url is None or isinstance(url, NavigableString):
|
||||
continue
|
||||
title = episode.find("div", {"class": "name"})
|
||||
if title is None or isinstance(title, NavigableString):
|
||||
continue
|
||||
|
||||
episode_list.append(
|
||||
{"title": title.text.replace(" ", ""), "url": url["href"]}
|
||||
)
|
||||
|
||||
return episode_list
|
||||
|
||||
@ -140,11 +138,16 @@ def get_anime_episode(episode_endpoint: str) -> list:
|
||||
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
|
||||
try:
|
||||
episode_url = soup.find("iframe", {"id": "playerframe"})["src"]
|
||||
download_url = episode_url.replace("/embed/", "/playlist/") + ".m3u8"
|
||||
except (KeyError, NotFoundErr) as e:
|
||||
raise e
|
||||
url = soup.find("iframe", {"id": "playerframe"})
|
||||
if url is None or isinstance(url, NavigableString):
|
||||
msg = f"Could not find url and download url from {episode_endpoint}"
|
||||
raise RuntimeError(msg)
|
||||
|
||||
episode_url = url["src"]
|
||||
if not isinstance(episode_url, str):
|
||||
msg = f"Could not find url and download url from {episode_endpoint}"
|
||||
raise RuntimeError(msg)
|
||||
download_url = episode_url.replace("/embed/", "/playlist/") + ".m3u8"
|
||||
|
||||
return [f"{BASE_URL}{episode_url}", f"{BASE_URL}{download_url}"]
|
||||
|
@ -3,7 +3,7 @@ CAUTION: You may get a json.decoding error.
|
||||
This works for some of us but fails for others.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from datetime import UTC, datetime, timedelta
|
||||
|
||||
import requests
|
||||
from rich import box
|
||||
@ -20,18 +20,31 @@ API_URL = (
|
||||
)
|
||||
|
||||
|
||||
def calculate_age(unix_date: int) -> str:
|
||||
def calculate_age(unix_date: float) -> str:
|
||||
"""Calculates age from given unix time format.
|
||||
|
||||
Returns:
|
||||
Age as string
|
||||
|
||||
>>> calculate_age(-657244800000)
|
||||
'73'
|
||||
>>> calculate_age(46915200000)
|
||||
'51'
|
||||
>>> from datetime import datetime, UTC
|
||||
>>> years_since_create = datetime.now(tz=UTC).year - 2022
|
||||
>>> int(calculate_age(-657244800000)) - years_since_create
|
||||
73
|
||||
>>> int(calculate_age(46915200000)) - years_since_create
|
||||
51
|
||||
"""
|
||||
birthdate = datetime.fromtimestamp(unix_date / 1000).date()
|
||||
# Convert date from milliseconds to seconds
|
||||
unix_date /= 1000
|
||||
|
||||
if unix_date < 0:
|
||||
# Handle timestamp before epoch
|
||||
epoch = datetime.fromtimestamp(0, tz=UTC)
|
||||
seconds_since_epoch = (datetime.now(tz=UTC) - epoch).seconds
|
||||
birthdate = (
|
||||
epoch - timedelta(seconds=abs(unix_date) - seconds_since_epoch)
|
||||
).date()
|
||||
else:
|
||||
birthdate = datetime.fromtimestamp(unix_date, tz=UTC).date()
|
||||
return str(
|
||||
TODAY.year
|
||||
- birthdate.year
|
Loading…
x
Reference in New Issue
Block a user