mirror of
https://github.com/TheAlgorithms/Python.git
synced 2024-11-24 13:31:07 +00:00
71 lines
2.3 KiB
Python
71 lines
2.3 KiB
Python
|
"""Implementation of GradientBoostingRegressor in sklearn using the
|
||
|
boston dataset which is very popular for regression problem to
|
||
|
predict house price.
|
||
|
"""
|
||
|
|
||
|
import pandas as pd
|
||
|
import matplotlib.pyplot as plt
|
||
|
from sklearn.datasets import load_boston
|
||
|
from sklearn.metrics import mean_squared_error, r2_score
|
||
|
from sklearn.ensemble import GradientBoostingRegressor
|
||
|
from sklearn.model_selection import train_test_split
|
||
|
|
||
|
|
||
|
def main():
|
||
|
|
||
|
# loading the dataset from the sklearn
|
||
|
df = load_boston()
|
||
|
print(df.keys())
|
||
|
# now let construct a data frame
|
||
|
df_boston = pd.DataFrame(df.data, columns=df.feature_names)
|
||
|
# let add the target to the dataframe
|
||
|
df_boston["Price"] = df.target
|
||
|
# print the first five rows using the head function
|
||
|
print(df_boston.head())
|
||
|
# Summary statistics
|
||
|
print(df_boston.describe().T)
|
||
|
# Feature selection
|
||
|
|
||
|
X = df_boston.iloc[:, :-1]
|
||
|
y = df_boston.iloc[:, -1] # target variable
|
||
|
# split the data with 75% train and 25% test sets.
|
||
|
X_train, X_test, y_train, y_test = train_test_split(
|
||
|
X, y, random_state=0, test_size=0.25
|
||
|
)
|
||
|
|
||
|
model = GradientBoostingRegressor(
|
||
|
n_estimators=500, max_depth=5, min_samples_split=4, learning_rate=0.01
|
||
|
)
|
||
|
# training the model
|
||
|
model.fit(X_train, y_train)
|
||
|
# to see how good the model fit the data
|
||
|
training_score = model.score(X_train, y_train).round(3)
|
||
|
test_score = model.score(X_test, y_test).round(3)
|
||
|
print("Training score of GradientBoosting is :", training_score)
|
||
|
print(
|
||
|
"The test score of GradientBoosting is :",
|
||
|
test_score
|
||
|
)
|
||
|
# Let us evaluation the model by finding the errors
|
||
|
y_pred = model.predict(X_test)
|
||
|
|
||
|
# The mean squared error
|
||
|
print("Mean squared error: %.2f" % mean_squared_error(y_test, y_pred))
|
||
|
# Explained variance score: 1 is perfect prediction
|
||
|
print("Test Variance score: %.2f" % r2_score(y_test, y_pred))
|
||
|
|
||
|
# So let's run the model against the test data
|
||
|
fig, ax = plt.subplots()
|
||
|
ax.scatter(y_test, y_pred, edgecolors=(0, 0, 0))
|
||
|
ax.plot([y_test.min(), y_test.max()],
|
||
|
[y_test.min(), y_test.max()], "k--", lw=4)
|
||
|
ax.set_xlabel("Actual")
|
||
|
ax.set_ylabel("Predicted")
|
||
|
ax.set_title("Truth vs Predicted")
|
||
|
# this show function will display the plotting
|
||
|
plt.show()
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
main()
|