diff --git a/DIRECTORY.md b/DIRECTORY.md index cdcd1a8ae..3a244ca6c 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -336,6 +336,7 @@ * [Minimum Tickets Cost](dynamic_programming/minimum_tickets_cost.py) * [Optimal Binary Search Tree](dynamic_programming/optimal_binary_search_tree.py) * [Palindrome Partitioning](dynamic_programming/palindrome_partitioning.py) + * [Regex Match](dynamic_programming/regex_match.py) * [Rod Cutting](dynamic_programming/rod_cutting.py) * [Subset Generation](dynamic_programming/subset_generation.py) * [Sum Of Subset](dynamic_programming/sum_of_subset.py) diff --git a/machine_learning/forecasting/ex_data.csv b/machine_learning/forecasting/ex_data.csv index 1c429e649..e6e73c4a1 100644 --- a/machine_learning/forecasting/ex_data.csv +++ b/machine_learning/forecasting/ex_data.csv @@ -1,4 +1,4 @@ -total_user,total_events,days +total_users,total_events,days 18231,0.0,1 22621,1.0,2 15675,0.0,3 diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py index 0909b76d8..88c4a537b 100644 --- a/machine_learning/forecasting/run.py +++ b/machine_learning/forecasting/run.py @@ -1,6 +1,6 @@ """ this is code for forecasting -but i modified it and used it for safety checker of data +but I modified it and used it for safety checker of data for ex: you have an online shop and for some reason some data are missing (the amount of data that u expected are not supposed to be) then we can use it @@ -102,6 +102,10 @@ def data_safety_checker(list_vote: list, actual_result: float) -> bool: """ safe = 0 not_safe = 0 + + if not isinstance(actual_result, float): + raise TypeError("Actual result should be float. Value passed is a list") + for i in list_vote: if i > actual_result: safe = not_safe + 1 @@ -114,16 +118,11 @@ def data_safety_checker(list_vote: list, actual_result: float) -> bool: if __name__ == "__main__": - # data_input_df = pd.read_csv("ex_data.csv", header=None) - data_input = [[18231, 0.0, 1], [22621, 1.0, 2], [15675, 0.0, 3], [23583, 1.0, 4]] - data_input_df = pd.DataFrame( - data_input, columns=["total_user", "total_even", "days"] - ) - """ data column = total user in a day, how much online event held in one day, what day is that(sunday-saturday) """ + data_input_df = pd.read_csv("ex_data.csv") # start normalization normalize_df = Normalizer().fit_transform(data_input_df.values) @@ -138,23 +137,23 @@ if __name__ == "__main__": x_test = x[len(x) - 1 :] # for linear regression & sarimax - trn_date = total_date[: len(total_date) - 1] - trn_user = total_user[: len(total_user) - 1] - trn_match = total_match[: len(total_match) - 1] + train_date = total_date[: len(total_date) - 1] + train_user = total_user[: len(total_user) - 1] + train_match = total_match[: len(total_match) - 1] - tst_date = total_date[len(total_date) - 1 :] - tst_user = total_user[len(total_user) - 1 :] - tst_match = total_match[len(total_match) - 1 :] + test_date = total_date[len(total_date) - 1 :] + test_user = total_user[len(total_user) - 1 :] + test_match = total_match[len(total_match) - 1 :] # voting system with forecasting res_vote = [ linear_regression_prediction( - trn_date, trn_user, trn_match, tst_date, tst_match + train_date, train_user, train_match, test_date, test_match ), - sarimax_predictor(trn_user, trn_match, tst_match), - support_vector_regressor(x_train, x_test, trn_user), + sarimax_predictor(train_user, train_match, test_match), + support_vector_regressor(x_train, x_test, train_user), ] # check the safety of today's data - not_str = "" if data_safety_checker(res_vote, tst_user) else "not " - print("Today's data is {not_str}safe.") + not_str = "" if data_safety_checker(res_vote, test_user[0]) else "not " + print(f"Today's data is {not_str}safe.")