Merge pull request #2 from thor-harsh/thor-harsh-patch-2

Thor harsh patch 2
This commit is contained in:
thor-harsh 2023-08-18 18:44:48 +05:30 committed by GitHub
commit 0050c3fad5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 213 additions and 160 deletions

View File

@ -710,6 +710,7 @@
* [2 Hidden Layers Neural Network](neural_network/2_hidden_layers_neural_network.py) * [2 Hidden Layers Neural Network](neural_network/2_hidden_layers_neural_network.py)
* Activation Functions * Activation Functions
* [Exponential Linear Unit](neural_network/activation_functions/exponential_linear_unit.py) * [Exponential Linear Unit](neural_network/activation_functions/exponential_linear_unit.py)
* [Leaky Rectified Linear Unit](neural_network/activation_functions/leaky_rectified_linear_unit.py)
* [Back Propagation Neural Network](neural_network/back_propagation_neural_network.py) * [Back Propagation Neural Network](neural_network/back_propagation_neural_network.py)
* [Convolution Neural Network](neural_network/convolution_neural_network.py) * [Convolution Neural Network](neural_network/convolution_neural_network.py)
* [Perceptron](neural_network/perceptron.py) * [Perceptron](neural_network/perceptron.py)
@ -1212,6 +1213,7 @@
* [Daily Horoscope](web_programming/daily_horoscope.py) * [Daily Horoscope](web_programming/daily_horoscope.py)
* [Download Images From Google Query](web_programming/download_images_from_google_query.py) * [Download Images From Google Query](web_programming/download_images_from_google_query.py)
* [Emails From Url](web_programming/emails_from_url.py) * [Emails From Url](web_programming/emails_from_url.py)
* [Fetch Anime And Play](web_programming/fetch_anime_and_play.py)
* [Fetch Bbc News](web_programming/fetch_bbc_news.py) * [Fetch Bbc News](web_programming/fetch_bbc_news.py)
* [Fetch Github Info](web_programming/fetch_github_info.py) * [Fetch Github Info](web_programming/fetch_github_info.py)
* [Fetch Jobs](web_programming/fetch_jobs.py) * [Fetch Jobs](web_programming/fetch_jobs.py)
@ -1220,6 +1222,7 @@
* [Get Amazon Product Data](web_programming/get_amazon_product_data.py) * [Get Amazon Product Data](web_programming/get_amazon_product_data.py)
* [Get Imdb Top 250 Movies Csv](web_programming/get_imdb_top_250_movies_csv.py) * [Get Imdb Top 250 Movies Csv](web_programming/get_imdb_top_250_movies_csv.py)
* [Get Imdbtop](web_programming/get_imdbtop.py) * [Get Imdbtop](web_programming/get_imdbtop.py)
* [Get Top Billionaires](web_programming/get_top_billionaires.py)
* [Get Top Hn Posts](web_programming/get_top_hn_posts.py) * [Get Top Hn Posts](web_programming/get_top_hn_posts.py)
* [Get User Tweets](web_programming/get_user_tweets.py) * [Get User Tweets](web_programming/get_user_tweets.py)
* [Giphy](web_programming/giphy.py) * [Giphy](web_programming/giphy.py)

View File

@ -20,40 +20,60 @@ import numpy as np
class Tableau: class Tableau:
"""Operate on simplex tableaus """Operate on simplex tableaus
>>> t = Tableau(np.array([[-1,-1,0,0,-1],[1,3,1,0,4],[3,1,0,1,4.]]), 2) >>> Tableau(np.array([[-1,-1,0,0,1],[1,3,1,0,4],[3,1,0,1,4]]), 2, 2)
Traceback (most recent call last):
...
TypeError: Tableau must have type float64
>>> Tableau(np.array([[-1,-1,0,0,-1],[1,3,1,0,4],[3,1,0,1,4.]]), 2, 2)
Traceback (most recent call last): Traceback (most recent call last):
... ...
ValueError: RHS must be > 0 ValueError: RHS must be > 0
>>> Tableau(np.array([[-1,-1,0,0,1],[1,3,1,0,4],[3,1,0,1,4.]]), -2, 2)
Traceback (most recent call last):
...
ValueError: number of (artificial) variables must be a natural number
""" """
def __init__(self, tableau: np.ndarray, n_vars: int) -> None: # Max iteration number to prevent cycling
maxiter = 100
def __init__(
self, tableau: np.ndarray, n_vars: int, n_artificial_vars: int
) -> None:
if tableau.dtype != "float64":
raise TypeError("Tableau must have type float64")
# Check if RHS is negative # Check if RHS is negative
if np.any(tableau[:, -1], where=tableau[:, -1] < 0): if not (tableau[:, -1] >= 0).all():
raise ValueError("RHS must be > 0") raise ValueError("RHS must be > 0")
if n_vars < 2 or n_artificial_vars < 0:
raise ValueError(
"number of (artificial) variables must be a natural number"
)
self.tableau = tableau self.tableau = tableau
self.n_rows, _ = tableau.shape self.n_rows, n_cols = tableau.shape
# Number of decision variables x1, x2, x3... # Number of decision variables x1, x2, x3...
self.n_vars = n_vars self.n_vars, self.n_artificial_vars = n_vars, n_artificial_vars
# Number of artificial variables to be minimised
self.n_art_vars = len(np.where(tableau[self.n_vars : -1] == -1)[0])
# 2 if there are >= or == constraints (nonstandard), 1 otherwise (std) # 2 if there are >= or == constraints (nonstandard), 1 otherwise (std)
self.n_stages = (self.n_art_vars > 0) + 1 self.n_stages = (self.n_artificial_vars > 0) + 1
# Number of slack variables added to make inequalities into equalities # Number of slack variables added to make inequalities into equalities
self.n_slack = self.n_rows - self.n_stages self.n_slack = n_cols - self.n_vars - self.n_artificial_vars - 1
# Objectives for each stage # Objectives for each stage
self.objectives = ["max"] self.objectives = ["max"]
# In two stage simplex, first minimise then maximise # In two stage simplex, first minimise then maximise
if self.n_art_vars: if self.n_artificial_vars:
self.objectives.append("min") self.objectives.append("min")
self.col_titles = [""] self.col_titles = self.generate_col_titles()
# Index of current pivot row and column # Index of current pivot row and column
self.row_idx = None self.row_idx = None
@ -62,48 +82,39 @@ class Tableau:
# Does objective row only contain (non)-negative values? # Does objective row only contain (non)-negative values?
self.stop_iter = False self.stop_iter = False
@staticmethod def generate_col_titles(self) -> list[str]:
def generate_col_titles(*args: int) -> list[str]:
"""Generate column titles for tableau of specific dimensions """Generate column titles for tableau of specific dimensions
>>> Tableau.generate_col_titles(2, 3, 1) >>> Tableau(np.array([[-1,-1,0,0,1],[1,3,1,0,4],[3,1,0,1,4.]]),
['x1', 'x2', 's1', 's2', 's3', 'a1', 'RHS'] ... 2, 0).generate_col_titles()
['x1', 'x2', 's1', 's2', 'RHS']
>>> Tableau.generate_col_titles() >>> Tableau(np.array([[-1,-1,0,0,1],[1,3,1,0,4],[3,1,0,1,4.]]),
Traceback (most recent call last): ... 2, 2).generate_col_titles()
... ['x1', 'x2', 'RHS']
ValueError: Must provide n_vars, n_slack, and n_art_vars
>>> Tableau.generate_col_titles(-2, 3, 1)
Traceback (most recent call last):
...
ValueError: All arguments must be non-negative integers
""" """
if len(args) != 3: args = (self.n_vars, self.n_slack)
raise ValueError("Must provide n_vars, n_slack, and n_art_vars")
if not all(x >= 0 and isinstance(x, int) for x in args): # decision | slack
raise ValueError("All arguments must be non-negative integers") string_starts = ["x", "s"]
# decision | slack | artificial
string_starts = ["x", "s", "a"]
titles = [] titles = []
for i in range(3): for i in range(2):
for j in range(args[i]): for j in range(args[i]):
titles.append(string_starts[i] + str(j + 1)) titles.append(string_starts[i] + str(j + 1))
titles.append("RHS") titles.append("RHS")
return titles return titles
def find_pivot(self, tableau: np.ndarray) -> tuple[Any, Any]: def find_pivot(self) -> tuple[Any, Any]:
"""Finds the pivot row and column. """Finds the pivot row and column.
>>> t = Tableau(np.array([[-2,1,0,0,0], [3,1,1,0,6], [1,2,0,1,7.]]), 2) >>> Tableau(np.array([[-2,1,0,0,0], [3,1,1,0,6], [1,2,0,1,7.]]),
>>> t.find_pivot(t.tableau) ... 2, 0).find_pivot()
(1, 0) (1, 0)
""" """
objective = self.objectives[-1] objective = self.objectives[-1]
# Find entries of highest magnitude in objective rows # Find entries of highest magnitude in objective rows
sign = (objective == "min") - (objective == "max") sign = (objective == "min") - (objective == "max")
col_idx = np.argmax(sign * tableau[0, : self.n_vars]) col_idx = np.argmax(sign * self.tableau[0, :-1])
# Choice is only valid if below 0 for maximise, and above for minimise # Choice is only valid if below 0 for maximise, and above for minimise
if sign * self.tableau[0, col_idx] <= 0: if sign * self.tableau[0, col_idx] <= 0:
@ -117,15 +128,15 @@ class Tableau:
s = slice(self.n_stages, self.n_rows) s = slice(self.n_stages, self.n_rows)
# RHS # RHS
dividend = tableau[s, -1] dividend = self.tableau[s, -1]
# Elements of pivot column within slice # Elements of pivot column within slice
divisor = tableau[s, col_idx] divisor = self.tableau[s, col_idx]
# Array filled with nans # Array filled with nans
nans = np.full(self.n_rows - self.n_stages, np.nan) nans = np.full(self.n_rows - self.n_stages, np.nan)
# If element in pivot column is greater than zeron_stages, return # If element in pivot column is greater than zero, return
# quotient or nan otherwise # quotient or nan otherwise
quotients = np.divide(dividend, divisor, out=nans, where=divisor > 0) quotients = np.divide(dividend, divisor, out=nans, where=divisor > 0)
@ -134,18 +145,18 @@ class Tableau:
row_idx = np.nanargmin(quotients) + self.n_stages row_idx = np.nanargmin(quotients) + self.n_stages
return row_idx, col_idx return row_idx, col_idx
def pivot(self, tableau: np.ndarray, row_idx: int, col_idx: int) -> np.ndarray: def pivot(self, row_idx: int, col_idx: int) -> np.ndarray:
"""Pivots on value on the intersection of pivot row and column. """Pivots on value on the intersection of pivot row and column.
>>> t = Tableau(np.array([[-2,-3,0,0,0],[1,3,1,0,4],[3,1,0,1,4.]]), 2) >>> Tableau(np.array([[-2,-3,0,0,0],[1,3,1,0,4],[3,1,0,1,4.]]),
>>> t.pivot(t.tableau, 1, 0).tolist() ... 2, 2).pivot(1, 0).tolist()
... # doctest: +NORMALIZE_WHITESPACE ... # doctest: +NORMALIZE_WHITESPACE
[[0.0, 3.0, 2.0, 0.0, 8.0], [[0.0, 3.0, 2.0, 0.0, 8.0],
[1.0, 3.0, 1.0, 0.0, 4.0], [1.0, 3.0, 1.0, 0.0, 4.0],
[0.0, -8.0, -3.0, 1.0, -8.0]] [0.0, -8.0, -3.0, 1.0, -8.0]]
""" """
# Avoid changes to original tableau # Avoid changes to original tableau
piv_row = tableau[row_idx].copy() piv_row = self.tableau[row_idx].copy()
piv_val = piv_row[col_idx] piv_val = piv_row[col_idx]
@ -153,48 +164,47 @@ class Tableau:
piv_row *= 1 / piv_val piv_row *= 1 / piv_val
# Variable in pivot column becomes basic, ie the only non-zero entry # Variable in pivot column becomes basic, ie the only non-zero entry
for idx, coeff in enumerate(tableau[:, col_idx]): for idx, coeff in enumerate(self.tableau[:, col_idx]):
tableau[idx] += -coeff * piv_row self.tableau[idx] += -coeff * piv_row
tableau[row_idx] = piv_row self.tableau[row_idx] = piv_row
return tableau return self.tableau
def change_stage(self, tableau: np.ndarray) -> np.ndarray: def change_stage(self) -> np.ndarray:
"""Exits first phase of the two-stage method by deleting artificial """Exits first phase of the two-stage method by deleting artificial
rows and columns, or completes the algorithm if exiting the standard rows and columns, or completes the algorithm if exiting the standard
case. case.
>>> t = Tableau(np.array([ >>> Tableau(np.array([
... [3, 3, -1, -1, 0, 0, 4], ... [3, 3, -1, -1, 0, 0, 4],
... [2, 1, 0, 0, 0, 0, 0.], ... [2, 1, 0, 0, 0, 0, 0.],
... [1, 2, -1, 0, 1, 0, 2], ... [1, 2, -1, 0, 1, 0, 2],
... [2, 1, 0, -1, 0, 1, 2] ... [2, 1, 0, -1, 0, 1, 2]
... ]), 2) ... ]), 2, 2).change_stage().tolist()
>>> t.change_stage(t.tableau).tolist()
... # doctest: +NORMALIZE_WHITESPACE ... # doctest: +NORMALIZE_WHITESPACE
[[2.0, 1.0, 0.0, 0.0, 0.0, 0.0], [[2.0, 1.0, 0.0, 0.0, 0.0],
[1.0, 2.0, -1.0, 0.0, 1.0, 2.0], [1.0, 2.0, -1.0, 0.0, 2.0],
[2.0, 1.0, 0.0, -1.0, 0.0, 2.0]] [2.0, 1.0, 0.0, -1.0, 2.0]]
""" """
# Objective of original objective row remains # Objective of original objective row remains
self.objectives.pop() self.objectives.pop()
if not self.objectives: if not self.objectives:
return tableau return self.tableau
# Slice containing ids for artificial columns # Slice containing ids for artificial columns
s = slice(-self.n_art_vars - 1, -1) s = slice(-self.n_artificial_vars - 1, -1)
# Delete the artificial variable columns # Delete the artificial variable columns
tableau = np.delete(tableau, s, axis=1) self.tableau = np.delete(self.tableau, s, axis=1)
# Delete the objective row of the first stage # Delete the objective row of the first stage
tableau = np.delete(tableau, 0, axis=0) self.tableau = np.delete(self.tableau, 0, axis=0)
self.n_stages = 1 self.n_stages = 1
self.n_rows -= 1 self.n_rows -= 1
self.n_art_vars = 0 self.n_artificial_vars = 0
self.stop_iter = False self.stop_iter = False
return tableau return self.tableau
def run_simplex(self) -> dict[Any, Any]: def run_simplex(self) -> dict[Any, Any]:
"""Operate on tableau until objective function cannot be """Operate on tableau until objective function cannot be
@ -205,15 +215,29 @@ class Tableau:
ST: x1 + 3x2 <= 4 ST: x1 + 3x2 <= 4
3x1 + x2 <= 4 3x1 + x2 <= 4
>>> Tableau(np.array([[-1,-1,0,0,0],[1,3,1,0,4],[3,1,0,1,4.]]), >>> Tableau(np.array([[-1,-1,0,0,0],[1,3,1,0,4],[3,1,0,1,4.]]),
... 2).run_simplex() ... 2, 0).run_simplex()
{'P': 2.0, 'x1': 1.0, 'x2': 1.0} {'P': 2.0, 'x1': 1.0, 'x2': 1.0}
# Standard linear program with 3 variables:
Max: 3x1 + x2 + 3x3
ST: 2x1 + x2 + x3 2
x1 + 2x2 + 3x3 5
2x1 + 2x2 + x3 6
>>> Tableau(np.array([
... [-3,-1,-3,0,0,0,0],
... [2,1,1,1,0,0,2],
... [1,2,3,0,1,0,5],
... [2,2,1,0,0,1,6.]
... ]),3,0).run_simplex() # doctest: +ELLIPSIS
{'P': 5.4, 'x1': 0.199..., 'x3': 1.6}
# Optimal tableau input: # Optimal tableau input:
>>> Tableau(np.array([ >>> Tableau(np.array([
... [0, 0, 0.25, 0.25, 2], ... [0, 0, 0.25, 0.25, 2],
... [0, 1, 0.375, -0.125, 1], ... [0, 1, 0.375, -0.125, 1],
... [1, 0, -0.125, 0.375, 1] ... [1, 0, -0.125, 0.375, 1]
... ]), 2).run_simplex() ... ]), 2, 0).run_simplex()
{'P': 2.0, 'x1': 1.0, 'x2': 1.0} {'P': 2.0, 'x1': 1.0, 'x2': 1.0}
# Non-standard: >= constraints # Non-standard: >= constraints
@ -227,7 +251,7 @@ class Tableau:
... [1, 1, 1, 1, 0, 0, 0, 0, 40], ... [1, 1, 1, 1, 0, 0, 0, 0, 40],
... [2, 1, -1, 0, -1, 0, 1, 0, 10], ... [2, 1, -1, 0, -1, 0, 1, 0, 10],
... [0, -1, 1, 0, 0, -1, 0, 1, 10.] ... [0, -1, 1, 0, 0, -1, 0, 1, 10.]
... ]), 3).run_simplex() ... ]), 3, 2).run_simplex()
{'P': 70.0, 'x1': 10.0, 'x2': 10.0, 'x3': 20.0} {'P': 70.0, 'x1': 10.0, 'x2': 10.0, 'x3': 20.0}
# Non standard: minimisation and equalities # Non standard: minimisation and equalities
@ -235,73 +259,76 @@ class Tableau:
ST: 2x1 + x2 = 12 ST: 2x1 + x2 = 12
6x1 + 5x2 = 40 6x1 + 5x2 = 40
>>> Tableau(np.array([ >>> Tableau(np.array([
... [8, 6, 0, -1, 0, -1, 0, 0, 52], ... [8, 6, 0, 0, 52],
... [1, 1, 0, 0, 0, 0, 0, 0, 0], ... [1, 1, 0, 0, 0],
... [2, 1, 1, 0, 0, 0, 0, 0, 12], ... [2, 1, 1, 0, 12],
... [2, 1, 0, -1, 0, 0, 1, 0, 12], ... [6, 5, 0, 1, 40.],
... [6, 5, 0, 0, 1, 0, 0, 0, 40], ... ]), 2, 2).run_simplex()
... [6, 5, 0, 0, 0, -1, 0, 1, 40.]
... ]), 2).run_simplex()
{'P': 7.0, 'x1': 5.0, 'x2': 2.0} {'P': 7.0, 'x1': 5.0, 'x2': 2.0}
# Pivot on slack variables
Max: 8x1 + 6x2
ST: x1 + 3x2 <= 33
4x1 + 2x2 <= 48
2x1 + 4x2 <= 48
x1 + x2 >= 10
x1 >= 2
>>> Tableau(np.array([
... [2, 1, 0, 0, 0, -1, -1, 0, 0, 12.0],
... [-8, -6, 0, 0, 0, 0, 0, 0, 0, 0.0],
... [1, 3, 1, 0, 0, 0, 0, 0, 0, 33.0],
... [4, 2, 0, 1, 0, 0, 0, 0, 0, 60.0],
... [2, 4, 0, 0, 1, 0, 0, 0, 0, 48.0],
... [1, 1, 0, 0, 0, -1, 0, 1, 0, 10.0],
... [1, 0, 0, 0, 0, 0, -1, 0, 1, 2.0]
... ]), 2, 2).run_simplex() # doctest: +ELLIPSIS
{'P': 132.0, 'x1': 12.000... 'x2': 5.999...}
""" """
# Stop simplex algorithm from cycling. # Stop simplex algorithm from cycling.
for _ in range(100): for _ in range(Tableau.maxiter):
# Completion of each stage removes an objective. If both stages # Completion of each stage removes an objective. If both stages
# are complete, then no objectives are left # are complete, then no objectives are left
if not self.objectives: if not self.objectives:
self.col_titles = self.generate_col_titles(
self.n_vars, self.n_slack, self.n_art_vars
)
# Find the values of each variable at optimal solution # Find the values of each variable at optimal solution
return self.interpret_tableau(self.tableau, self.col_titles) return self.interpret_tableau()
row_idx, col_idx = self.find_pivot(self.tableau) row_idx, col_idx = self.find_pivot()
# If there are no more negative values in objective row # If there are no more negative values in objective row
if self.stop_iter: if self.stop_iter:
# Delete artificial variable columns and rows. Update attributes # Delete artificial variable columns and rows. Update attributes
self.tableau = self.change_stage(self.tableau) self.tableau = self.change_stage()
else: else:
self.tableau = self.pivot(self.tableau, row_idx, col_idx) self.tableau = self.pivot(row_idx, col_idx)
return {} return {}
def interpret_tableau( def interpret_tableau(self) -> dict[str, float]:
self, tableau: np.ndarray, col_titles: list[str]
) -> dict[str, float]:
"""Given the final tableau, add the corresponding values of the basic """Given the final tableau, add the corresponding values of the basic
decision variables to the `output_dict` decision variables to the `output_dict`
>>> tableau = np.array([ >>> Tableau(np.array([
... [0,0,0.875,0.375,5], ... [0,0,0.875,0.375,5],
... [0,1,0.375,-0.125,1], ... [0,1,0.375,-0.125,1],
... [1,0,-0.125,0.375,1] ... [1,0,-0.125,0.375,1]
... ]) ... ]),2, 0).interpret_tableau()
>>> t = Tableau(tableau, 2)
>>> t.interpret_tableau(tableau, ["x1", "x2", "s1", "s2", "RHS"])
{'P': 5.0, 'x1': 1.0, 'x2': 1.0} {'P': 5.0, 'x1': 1.0, 'x2': 1.0}
""" """
# P = RHS of final tableau # P = RHS of final tableau
output_dict = {"P": abs(tableau[0, -1])} output_dict = {"P": abs(self.tableau[0, -1])}
for i in range(self.n_vars): for i in range(self.n_vars):
# Gives ids of nonzero entries in the ith column # Gives indices of nonzero entries in the ith column
nonzero = np.nonzero(tableau[:, i]) nonzero = np.nonzero(self.tableau[:, i])
n_nonzero = len(nonzero[0]) n_nonzero = len(nonzero[0])
# First entry in the nonzero ids # First entry in the nonzero indices
nonzero_rowidx = nonzero[0][0] nonzero_rowidx = nonzero[0][0]
nonzero_val = tableau[nonzero_rowidx, i] nonzero_val = self.tableau[nonzero_rowidx, i]
# If there is only one nonzero value in column, which is one # If there is only one nonzero value in column, which is one
if n_nonzero == nonzero_val == 1: if n_nonzero == 1 and nonzero_val == 1:
rhs_val = tableau[nonzero_rowidx, -1] rhs_val = self.tableau[nonzero_rowidx, -1]
output_dict[col_titles[i]] = rhs_val output_dict[self.col_titles[i]] = rhs_val
# Check for basic variables
for title in col_titles:
# Don't add RHS or slack variables to output dict
if title[0] not in "R-s-a":
output_dict.setdefault(title, 0)
return output_dict return output_dict

View File

@ -10,11 +10,11 @@ Inputs:
- k , number of clusters to create. - k , number of clusters to create.
- initial_centroids , initial centroid values generated by utility function(mentioned - initial_centroids , initial centroid values generated by utility function(mentioned
in usage). in usage).
- maxiter , maximum number of iterations to process. - maxiter , the maximum number of iterations to process.
- heterogeneity , empty list that will be filled with hetrogeneity values if passed - heterogeneity, empty list that will be filled with heterogeneity values if passed
to kmeans func. to kmeans func.
Usage: Usage:
1. define 'k' value, 'X' features array and 'hetrogeneity' empty list 1. define 'k' value, 'X' features array and 'heterogeneity' empty list
2. create initial_centroids, 2. create initial_centroids,
initial_centroids = get_initial_centroids( initial_centroids = get_initial_centroids(
X, X,
@ -31,8 +31,8 @@ Usage:
record_heterogeneity=heterogeneity, record_heterogeneity=heterogeneity,
verbose=True # whether to print logs in console or not.(default=False) verbose=True # whether to print logs in console or not.(default=False)
) )
4. Plot the loss function, hetrogeneity values for every iteration saved in 4. Plot the loss function, heterogeneity values for every iteration saved in
hetrogeneity list. heterogeneity list.
plot_heterogeneity( plot_heterogeneity(
heterogeneity, heterogeneity,
k k
@ -46,6 +46,7 @@ import numpy as np
import pandas as pd import pandas as pd
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
from sklearn.metrics import pairwise_distances from sklearn.metrics import pairwise_distances
import doctest
warnings.filterwarnings("ignore") warnings.filterwarnings("ignore")
@ -198,10 +199,10 @@ def report_generator(
df: pd.DataFrame, clustering_variables: np.ndarray, fill_missing_report=None df: pd.DataFrame, clustering_variables: np.ndarray, fill_missing_report=None
) -> pd.DataFrame: ) -> pd.DataFrame:
""" """
Function generates easy-erading clustering report. It takes 2 arguments as an input: Function generates an easy-reading clustering report. It takes 3 arguments as input:
DataFrame - dataframe with predicted cluester column; DataFrame,predicted cluster column,
FillMissingReport - dictionary of rules how we are going to fill missing FillMissingReport - dictionary of rules on how we are going to fill in missing
values of for final report generate (not included in modeling); values of for final report generate (not included in modelling);
in order to run the function following libraries must be imported: in order to run the function following libraries must be imported:
import pandas as pd import pandas as pd
import numpy as np import numpy as np
@ -306,10 +307,10 @@ def report_generator(
a.columns = report.columns # rename columns to match report a.columns = report.columns # rename columns to match report
report = report.drop( report = report.drop(
report[report.Type == "count"].index report[report.Type == "count"].index
) # drop count values except cluster size ) # drop count values except for cluster size
report = pd.concat( report = pd.concat(
[report, a, clustersize, clusterproportion], axis=0 [report, a, cluster size, clusterproportion], axis=0
) # concat report with clustert size and nan values ) # concat report with cluster size and nan values
report["Mark"] = report["Features"].isin(clustering_variables) report["Mark"] = report["Features"].isin(clustering_variables)
cols = report.columns.tolist() cols = report.columns.tolist()
cols = cols[0:2] + cols[-1:] + cols[2:-1] cols = cols[0:2] + cols[-1:] + cols[2:-1]
@ -343,6 +344,6 @@ def report_generator(
if __name__ == "__main__": if __name__ == "__main__":
import doctest
doctest.testmod() doctest.testmod()

View File

@ -30,7 +30,7 @@ Source: https://en.wikipedia.org/wiki/Bucket_sort
from __future__ import annotations from __future__ import annotations
def bucket_sort(my_list: list) -> list: def bucket_sort(my_list: list, bucket_count: int = 10) -> list:
""" """
>>> data = [-1, 2, -5, 0] >>> data = [-1, 2, -5, 0]
>>> bucket_sort(data) == sorted(data) >>> bucket_sort(data) == sorted(data)
@ -43,21 +43,27 @@ def bucket_sort(my_list: list) -> list:
True True
>>> bucket_sort([]) == sorted([]) >>> bucket_sort([]) == sorted([])
True True
>>> data = [-1e10, 1e10]
>>> bucket_sort(data) == sorted(data)
True
>>> import random >>> import random
>>> collection = random.sample(range(-50, 50), 50) >>> collection = random.sample(range(-50, 50), 50)
>>> bucket_sort(collection) == sorted(collection) >>> bucket_sort(collection) == sorted(collection)
True True
""" """
if len(my_list) == 0:
if len(my_list) == 0 or bucket_count <= 0:
return [] return []
min_value, max_value = min(my_list), max(my_list) min_value, max_value = min(my_list), max(my_list)
bucket_count = int(max_value - min_value) + 1 bucket_size = (max_value - min_value) / bucket_count
buckets: list[list] = [[] for _ in range(bucket_count)] buckets: list[list] = [[] for _ in range(bucket_count)]
for i in my_list: for val in my_list:
buckets[int(i - min_value)].append(i) index = min(int((val - min_value) / bucket_size), bucket_count - 1)
buckets[index].append(val)
return [v for bucket in buckets for v in sorted(bucket)] return [val for bucket in buckets for val in sorted(bucket)]
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -1,7 +1,5 @@
from xml.dom import NotFoundErr
import requests import requests
from bs4 import BeautifulSoup, NavigableString from bs4 import BeautifulSoup, NavigableString, Tag
from fake_useragent import UserAgent from fake_useragent import UserAgent
BASE_URL = "https://ww1.gogoanime2.org" BASE_URL = "https://ww1.gogoanime2.org"
@ -41,25 +39,23 @@ def search_scraper(anime_name: str) -> list:
# get list of anime # get list of anime
anime_ul = soup.find("ul", {"class": "items"}) anime_ul = soup.find("ul", {"class": "items"})
if anime_ul is None or isinstance(anime_ul, NavigableString):
msg = f"Could not find and anime with name {anime_name}"
raise ValueError(msg)
anime_li = anime_ul.children anime_li = anime_ul.children
# for each anime, insert to list. the name and url. # for each anime, insert to list. the name and url.
anime_list = [] anime_list = []
for anime in anime_li: for anime in anime_li:
if not isinstance(anime, NavigableString): if isinstance(anime, Tag):
try: anime_url = anime.find("a")
anime_url, anime_title = ( if anime_url is None or isinstance(anime_url, NavigableString):
anime.find("a")["href"], continue
anime.find("a")["title"], anime_title = anime.find("a")
) if anime_title is None or isinstance(anime_title, NavigableString):
anime_list.append( continue
{
"title": anime_title, anime_list.append({"title": anime_title["title"], "url": anime_url["href"]})
"url": anime_url,
}
)
except (NotFoundErr, KeyError):
pass
return anime_list return anime_list
@ -93,22 +89,24 @@ def search_anime_episode_list(episode_endpoint: str) -> list:
# With this id. get the episode list. # With this id. get the episode list.
episode_page_ul = soup.find("ul", {"id": "episode_related"}) episode_page_ul = soup.find("ul", {"id": "episode_related"})
if episode_page_ul is None or isinstance(episode_page_ul, NavigableString):
msg = f"Could not find any anime eposiodes with name {anime_name}"
raise ValueError(msg)
episode_page_li = episode_page_ul.children episode_page_li = episode_page_ul.children
episode_list = [] episode_list = []
for episode in episode_page_li: for episode in episode_page_li:
try: if isinstance(episode, Tag):
if not isinstance(episode, NavigableString): url = episode.find("a")
episode_list.append( if url is None or isinstance(url, NavigableString):
{ continue
"title": episode.find("div", {"class": "name"}).text.replace( title = episode.find("div", {"class": "name"})
" ", "" if title is None or isinstance(title, NavigableString):
), continue
"url": episode.find("a")["href"],
} episode_list.append(
) {"title": title.text.replace(" ", ""), "url": url["href"]}
except (KeyError, NotFoundErr): )
pass
return episode_list return episode_list
@ -140,11 +138,16 @@ def get_anime_episode(episode_endpoint: str) -> list:
soup = BeautifulSoup(response.text, "html.parser") soup = BeautifulSoup(response.text, "html.parser")
try: url = soup.find("iframe", {"id": "playerframe"})
episode_url = soup.find("iframe", {"id": "playerframe"})["src"] if url is None or isinstance(url, NavigableString):
download_url = episode_url.replace("/embed/", "/playlist/") + ".m3u8" msg = f"Could not find url and download url from {episode_endpoint}"
except (KeyError, NotFoundErr) as e: raise RuntimeError(msg)
raise e
episode_url = url["src"]
if not isinstance(episode_url, str):
msg = f"Could not find url and download url from {episode_endpoint}"
raise RuntimeError(msg)
download_url = episode_url.replace("/embed/", "/playlist/") + ".m3u8"
return [f"{BASE_URL}{episode_url}", f"{BASE_URL}{download_url}"] return [f"{BASE_URL}{episode_url}", f"{BASE_URL}{download_url}"]

View File

@ -3,7 +3,7 @@ CAUTION: You may get a json.decoding error.
This works for some of us but fails for others. This works for some of us but fails for others.
""" """
from datetime import datetime from datetime import UTC, datetime, timedelta
import requests import requests
from rich import box from rich import box
@ -20,18 +20,31 @@ API_URL = (
) )
def calculate_age(unix_date: int) -> str: def calculate_age(unix_date: float) -> str:
"""Calculates age from given unix time format. """Calculates age from given unix time format.
Returns: Returns:
Age as string Age as string
>>> calculate_age(-657244800000) >>> from datetime import datetime, UTC
'73' >>> years_since_create = datetime.now(tz=UTC).year - 2022
>>> calculate_age(46915200000) >>> int(calculate_age(-657244800000)) - years_since_create
'51' 73
>>> int(calculate_age(46915200000)) - years_since_create
51
""" """
birthdate = datetime.fromtimestamp(unix_date / 1000).date() # Convert date from milliseconds to seconds
unix_date /= 1000
if unix_date < 0:
# Handle timestamp before epoch
epoch = datetime.fromtimestamp(0, tz=UTC)
seconds_since_epoch = (datetime.now(tz=UTC) - epoch).seconds
birthdate = (
epoch - timedelta(seconds=abs(unix_date) - seconds_since_epoch)
).date()
else:
birthdate = datetime.fromtimestamp(unix_date, tz=UTC).date()
return str( return str(
TODAY.year TODAY.year
- birthdate.year - birthdate.year