Python/machine_learning/gradient_boosting_regressor.py

"""Implementation of GradientBoostingRegressor in sklearn using the
   boston dataset which is very popular for regression problem to
   predict house price.
"""

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_boston
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split


def main():

    # loading the dataset from the sklearn
    df = load_boston()
    print(df.keys())
    # now let construct a data frame
    df_boston = pd.DataFrame(df.data, columns=df.feature_names)
    # let add the target to the dataframe
    df_boston["Price"] = df.target
    # print the first five rows using the head function
    print(df_boston.head())
    # Summary statistics
    print(df_boston.describe().T)
    # Feature selection

    X = df_boston.iloc[:, :-1]
    y = df_boston.iloc[:, -1]  # target variable
    # split the data with 75% train and 25% test sets.
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, random_state=0, test_size=0.25
    )

    model = GradientBoostingRegressor(
        n_estimators=500, max_depth=5, min_samples_split=4, learning_rate=0.01
    )
    # training the model
    model.fit(X_train, y_train)
    # to see how good the model fit the data
    training_score = model.score(X_train, y_train).round(3)
    test_score = model.score(X_test, y_test).round(3)
    print("Training score of GradientBoosting is :", training_score)
    print(
        "The test score of GradientBoosting is :",
        test_score
    )
    # Let us evaluation the model by finding the errors
    y_pred = model.predict(X_test)

    # The mean squared error
    print("Mean squared error: %.2f" % mean_squared_error(y_test, y_pred))
    # Explained variance score: 1 is perfect prediction
    print("Test Variance score: %.2f" % r2_score(y_test, y_pred))

    # So let's run the model against the test data
    fig, ax = plt.subplots()
    ax.scatter(y_test, y_pred, edgecolors=(0, 0, 0))
    ax.plot([y_test.min(), y_test.max()],
            [y_test.min(), y_test.max()], "k--", lw=4)
    ax.set_xlabel("Actual")
    ax.set_ylabel("Predicted")
    ax.set_title("Truth vs Predicted")
    # this show function will display the plotting
    plt.show()


if __name__ == "__main__":
    main()
Gradient Boosting Regressor (#2298) * Stock market prediction using greadient boosting * To reverse a string using stack * To reverse string using stack * Predict Stock Prices Python & Machine Learning * Gradient boosting regressor on boston dataset * Gradient boosting regressor implementation * Gradient boosting regressor * Gradient boosting regressor * Gradient boosting regressor * Removing files * GradientBoostingRegressor example * Demo Gradient Boosting * Demo Gradient boosting * demo of gradient boosting * gradient boosting demo * Fix spelling mistake * Fix formatting Co-authored-by: John Law <johnlaw.po@gmail.com> 2020-09-26 14:58:29 +00:00			`"""Implementation of GradientBoostingRegressor in sklearn using the`
			`boston dataset which is very popular for regression problem to`
			`predict house price.`
			`"""`

			`import pandas as pd`
			`import matplotlib.pyplot as plt`
			`from sklearn.datasets import load_boston`
			`from sklearn.metrics import mean_squared_error, r2_score`
			`from sklearn.ensemble import GradientBoostingRegressor`
			`from sklearn.model_selection import train_test_split`


			`def main():`

			`# loading the dataset from the sklearn`
			`df = load_boston()`
			`print(df.keys())`
			`# now let construct a data frame`
			`df_boston = pd.DataFrame(df.data, columns=df.feature_names)`
			`# let add the target to the dataframe`
			`df_boston["Price"] = df.target`
			`# print the first five rows using the head function`
			`print(df_boston.head())`
			`# Summary statistics`
			`print(df_boston.describe().T)`
			`# Feature selection`

			`X = df_boston.iloc[:, :-1]`
			`y = df_boston.iloc[:, -1] # target variable`
			`# split the data with 75% train and 25% test sets.`
			`X_train, X_test, y_train, y_test = train_test_split(`
			`X, y, random_state=0, test_size=0.25`
			`)`

			`model = GradientBoostingRegressor(`
			`n_estimators=500, max_depth=5, min_samples_split=4, learning_rate=0.01`
			`)`
			`# training the model`
			`model.fit(X_train, y_train)`
			`# to see how good the model fit the data`
			`training_score = model.score(X_train, y_train).round(3)`
			`test_score = model.score(X_test, y_test).round(3)`
			`print("Training score of GradientBoosting is :", training_score)`
			`print(`
			`"The test score of GradientBoosting is :",`
			`test_score`
			`)`
			`# Let us evaluation the model by finding the errors`
			`y_pred = model.predict(X_test)`

			`# The mean squared error`
			`print("Mean squared error: %.2f" % mean_squared_error(y_test, y_pred))`
			`# Explained variance score: 1 is perfect prediction`
			`print("Test Variance score: %.2f" % r2_score(y_test, y_pred))`

			`# So let's run the model against the test data`
			`fig, ax = plt.subplots()`
			`ax.scatter(y_test, y_pred, edgecolors=(0, 0, 0))`
			`ax.plot([y_test.min(), y_test.max()],`
			`[y_test.min(), y_test.max()], "k--", lw=4)`
			`ax.set_xlabel("Actual")`
			`ax.set_ylabel("Predicted")`
			`ax.set_title("Truth vs Predicted")`
			`# this show function will display the plotting`
			`plt.show()`


			`if __name__ == "__main__":`
			`main()`