From 7446e695716eaf0f75f51a1c4805f8d8d698e653 Mon Sep 17 00:00:00 2001
From: Abdoulaye Balde <51192943+abdoulayegk@users.noreply.github.com>
Date: Sat, 26 Sep 2020 14:58:29 +0000
Subject: [PATCH] Gradient Boosting Regressor (#2298)

* Stock market prediction using greadient boosting

* To reverse a string using stack

* To reverse string using stack

* Predict Stock Prices Python & Machine Learning

* Gradient boosting regressor on boston dataset

* Gradient boosting regressor implementation

* Gradient boosting regressor

* Gradient boosting regressor

* Gradient boosting regressor

* Removing files

* GradientBoostingRegressor example

* Demo Gradient Boosting

* Demo Gradient boosting

* demo of gradient boosting

* gradient boosting demo

* Fix spelling mistake

* Fix formatting

Co-authored-by: John Law <johnlaw.po@gmail.com>
---
 .../gradient_boosting_regressor.py            | 70 +++++++++++++++++++
 1 file changed, 70 insertions(+)
 create mode 100644 machine_learning/gradient_boosting_regressor.py

diff --git a/machine_learning/gradient_boosting_regressor.py b/machine_learning/gradient_boosting_regressor.py
new file mode 100644
index 000000000..045aa056e
--- /dev/null
+++ b/machine_learning/gradient_boosting_regressor.py
@@ -0,0 +1,70 @@
+"""Implementation of GradientBoostingRegressor in sklearn using the
+   boston dataset which is very popular for regression problem to
+   predict house price.
+"""
+
+import pandas as pd
+import matplotlib.pyplot as plt
+from sklearn.datasets import load_boston
+from sklearn.metrics import mean_squared_error, r2_score
+from sklearn.ensemble import GradientBoostingRegressor
+from sklearn.model_selection import train_test_split
+
+
+def main():
+
+    # loading the dataset from the sklearn
+    df = load_boston()
+    print(df.keys())
+    # now let construct a data frame
+    df_boston = pd.DataFrame(df.data, columns=df.feature_names)
+    # let add the target to the dataframe
+    df_boston["Price"] = df.target
+    # print the first five rows using the head function
+    print(df_boston.head())
+    # Summary statistics
+    print(df_boston.describe().T)
+    # Feature selection
+
+    X = df_boston.iloc[:, :-1]
+    y = df_boston.iloc[:, -1]  # target variable
+    # split the data with 75% train and 25% test sets.
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, random_state=0, test_size=0.25
+    )
+
+    model = GradientBoostingRegressor(
+        n_estimators=500, max_depth=5, min_samples_split=4, learning_rate=0.01
+    )
+    # training the model
+    model.fit(X_train, y_train)
+    # to see how good the model fit the data
+    training_score = model.score(X_train, y_train).round(3)
+    test_score = model.score(X_test, y_test).round(3)
+    print("Training score of GradientBoosting is :", training_score)
+    print(
+        "The test score of GradientBoosting is :",
+        test_score
+    )
+    # Let us evaluation the model by finding the errors
+    y_pred = model.predict(X_test)
+
+    # The mean squared error
+    print("Mean squared error: %.2f" % mean_squared_error(y_test, y_pred))
+    # Explained variance score: 1 is perfect prediction
+    print("Test Variance score: %.2f" % r2_score(y_test, y_pred))
+
+    # So let's run the model against the test data
+    fig, ax = plt.subplots()
+    ax.scatter(y_test, y_pred, edgecolors=(0, 0, 0))
+    ax.plot([y_test.min(), y_test.max()],
+            [y_test.min(), y_test.max()], "k--", lw=4)
+    ax.set_xlabel("Actual")
+    ax.set_ylabel("Predicted")
+    ax.set_title("Truth vs Predicted")
+    # this show function will display the plotting
+    plt.show()
+
+
+if __name__ == "__main__":
+    main()