mirror of
https://github.com/TheAlgorithms/Python.git
synced 2025-01-30 22:23:42 +00:00
Gradient Boosting Regressor (#2298)
* Stock market prediction using greadient boosting * To reverse a string using stack * To reverse string using stack * Predict Stock Prices Python & Machine Learning * Gradient boosting regressor on boston dataset * Gradient boosting regressor implementation * Gradient boosting regressor * Gradient boosting regressor * Gradient boosting regressor * Removing files * GradientBoostingRegressor example * Demo Gradient Boosting * Demo Gradient boosting * demo of gradient boosting * gradient boosting demo * Fix spelling mistake * Fix formatting Co-authored-by: John Law <johnlaw.po@gmail.com>
This commit is contained in:
parent
7f48bb8c95
commit
7446e69571
70
machine_learning/gradient_boosting_regressor.py
Normal file
70
machine_learning/gradient_boosting_regressor.py
Normal file
|
@ -0,0 +1,70 @@
|
|||
"""Implementation of GradientBoostingRegressor in sklearn using the
|
||||
boston dataset which is very popular for regression problem to
|
||||
predict house price.
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
from sklearn.datasets import load_boston
|
||||
from sklearn.metrics import mean_squared_error, r2_score
|
||||
from sklearn.ensemble import GradientBoostingRegressor
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
# loading the dataset from the sklearn
|
||||
df = load_boston()
|
||||
print(df.keys())
|
||||
# now let construct a data frame
|
||||
df_boston = pd.DataFrame(df.data, columns=df.feature_names)
|
||||
# let add the target to the dataframe
|
||||
df_boston["Price"] = df.target
|
||||
# print the first five rows using the head function
|
||||
print(df_boston.head())
|
||||
# Summary statistics
|
||||
print(df_boston.describe().T)
|
||||
# Feature selection
|
||||
|
||||
X = df_boston.iloc[:, :-1]
|
||||
y = df_boston.iloc[:, -1] # target variable
|
||||
# split the data with 75% train and 25% test sets.
|
||||
X_train, X_test, y_train, y_test = train_test_split(
|
||||
X, y, random_state=0, test_size=0.25
|
||||
)
|
||||
|
||||
model = GradientBoostingRegressor(
|
||||
n_estimators=500, max_depth=5, min_samples_split=4, learning_rate=0.01
|
||||
)
|
||||
# training the model
|
||||
model.fit(X_train, y_train)
|
||||
# to see how good the model fit the data
|
||||
training_score = model.score(X_train, y_train).round(3)
|
||||
test_score = model.score(X_test, y_test).round(3)
|
||||
print("Training score of GradientBoosting is :", training_score)
|
||||
print(
|
||||
"The test score of GradientBoosting is :",
|
||||
test_score
|
||||
)
|
||||
# Let us evaluation the model by finding the errors
|
||||
y_pred = model.predict(X_test)
|
||||
|
||||
# The mean squared error
|
||||
print("Mean squared error: %.2f" % mean_squared_error(y_test, y_pred))
|
||||
# Explained variance score: 1 is perfect prediction
|
||||
print("Test Variance score: %.2f" % r2_score(y_test, y_pred))
|
||||
|
||||
# So let's run the model against the test data
|
||||
fig, ax = plt.subplots()
|
||||
ax.scatter(y_test, y_pred, edgecolors=(0, 0, 0))
|
||||
ax.plot([y_test.min(), y_test.max()],
|
||||
[y_test.min(), y_test.max()], "k--", lw=4)
|
||||
ax.set_xlabel("Actual")
|
||||
ax.set_ylabel("Predicted")
|
||||
ax.set_title("Truth vs Predicted")
|
||||
# this show function will display the plotting
|
||||
plt.show()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Reference in New Issue
Block a user