From 7446e695716eaf0f75f51a1c4805f8d8d698e653 Mon Sep 17 00:00:00 2001 From: Abdoulaye Balde <51192943+abdoulayegk@users.noreply.github.com> Date: Sat, 26 Sep 2020 14:58:29 +0000 Subject: [PATCH] Gradient Boosting Regressor (#2298) * Stock market prediction using greadient boosting * To reverse a string using stack * To reverse string using stack * Predict Stock Prices Python & Machine Learning * Gradient boosting regressor on boston dataset * Gradient boosting regressor implementation * Gradient boosting regressor * Gradient boosting regressor * Gradient boosting regressor * Removing files * GradientBoostingRegressor example * Demo Gradient Boosting * Demo Gradient boosting * demo of gradient boosting * gradient boosting demo * Fix spelling mistake * Fix formatting Co-authored-by: John Law --- .../gradient_boosting_regressor.py | 70 +++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 machine_learning/gradient_boosting_regressor.py diff --git a/machine_learning/gradient_boosting_regressor.py b/machine_learning/gradient_boosting_regressor.py new file mode 100644 index 000000000..045aa056e --- /dev/null +++ b/machine_learning/gradient_boosting_regressor.py @@ -0,0 +1,70 @@ +"""Implementation of GradientBoostingRegressor in sklearn using the + boston dataset which is very popular for regression problem to + predict house price. +""" + +import pandas as pd +import matplotlib.pyplot as plt +from sklearn.datasets import load_boston +from sklearn.metrics import mean_squared_error, r2_score +from sklearn.ensemble import GradientBoostingRegressor +from sklearn.model_selection import train_test_split + + +def main(): + + # loading the dataset from the sklearn + df = load_boston() + print(df.keys()) + # now let construct a data frame + df_boston = pd.DataFrame(df.data, columns=df.feature_names) + # let add the target to the dataframe + df_boston["Price"] = df.target + # print the first five rows using the head function + print(df_boston.head()) + # Summary statistics + print(df_boston.describe().T) + # Feature selection + + X = df_boston.iloc[:, :-1] + y = df_boston.iloc[:, -1] # target variable + # split the data with 75% train and 25% test sets. + X_train, X_test, y_train, y_test = train_test_split( + X, y, random_state=0, test_size=0.25 + ) + + model = GradientBoostingRegressor( + n_estimators=500, max_depth=5, min_samples_split=4, learning_rate=0.01 + ) + # training the model + model.fit(X_train, y_train) + # to see how good the model fit the data + training_score = model.score(X_train, y_train).round(3) + test_score = model.score(X_test, y_test).round(3) + print("Training score of GradientBoosting is :", training_score) + print( + "The test score of GradientBoosting is :", + test_score + ) + # Let us evaluation the model by finding the errors + y_pred = model.predict(X_test) + + # The mean squared error + print("Mean squared error: %.2f" % mean_squared_error(y_test, y_pred)) + # Explained variance score: 1 is perfect prediction + print("Test Variance score: %.2f" % r2_score(y_test, y_pred)) + + # So let's run the model against the test data + fig, ax = plt.subplots() + ax.scatter(y_test, y_pred, edgecolors=(0, 0, 0)) + ax.plot([y_test.min(), y_test.max()], + [y_test.min(), y_test.max()], "k--", lw=4) + ax.set_xlabel("Actual") + ax.set_ylabel("Predicted") + ax.set_title("Truth vs Predicted") + # this show function will display the plotting + plt.show() + + +if __name__ == "__main__": + main()