mirror of
https://github.com/TheAlgorithms/Python.git
synced 2025-01-18 00:07:00 +00:00
Added Gradient Boosting Classifier (#10944)
* Added Gradient Boosting Classifier * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update gradient_boosting_classifier.py * Update gradient_boosting_classifier.py * Update gradient_boosting_classifier.py * Update gradient_boosting_classifier.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
parent
5df16f11eb
commit
a0e80a74c8
118
machine_learning/gradient_boosting_classifier.py
Normal file
118
machine_learning/gradient_boosting_classifier.py
Normal file
|
@ -0,0 +1,118 @@
|
|||
import numpy as np
|
||||
from sklearn.datasets import load_iris
|
||||
from sklearn.metrics import accuracy_score
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.tree import DecisionTreeRegressor
|
||||
|
||||
|
||||
class GradientBoostingClassifier:
|
||||
def __init__(self, n_estimators: int = 100, learning_rate: float = 0.1) -> None:
|
||||
"""
|
||||
Initialize a GradientBoostingClassifier.
|
||||
|
||||
Parameters:
|
||||
- n_estimators (int): The number of weak learners to train.
|
||||
- learning_rate (float): The learning rate for updating the model.
|
||||
|
||||
Attributes:
|
||||
- n_estimators (int): The number of weak learners.
|
||||
- learning_rate (float): The learning rate.
|
||||
- models (list): A list to store the trained weak learners.
|
||||
"""
|
||||
self.n_estimators = n_estimators
|
||||
self.learning_rate = learning_rate
|
||||
self.models: list[tuple[DecisionTreeRegressor, float]] = []
|
||||
|
||||
def fit(self, features: np.ndarray, target: np.ndarray) -> None:
|
||||
"""
|
||||
Fit the GradientBoostingClassifier to the training data.
|
||||
|
||||
Parameters:
|
||||
- features (np.ndarray): The training features.
|
||||
- target (np.ndarray): The target values.
|
||||
|
||||
Returns:
|
||||
None
|
||||
|
||||
>>> import numpy as np
|
||||
>>> from sklearn.datasets import load_iris
|
||||
>>> clf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1)
|
||||
>>> iris = load_iris()
|
||||
>>> X, y = iris.data, iris.target
|
||||
>>> clf.fit(X, y)
|
||||
>>> # Check if the model is trained
|
||||
>>> len(clf.models) == 100
|
||||
True
|
||||
"""
|
||||
for _ in range(self.n_estimators):
|
||||
# Calculate the pseudo-residuals
|
||||
residuals = -self.gradient(target, self.predict(features))
|
||||
# Fit a weak learner (e.g., decision tree) to the residuals
|
||||
model = DecisionTreeRegressor(max_depth=1)
|
||||
model.fit(features, residuals)
|
||||
# Update the model by adding the weak learner with a learning rate
|
||||
self.models.append((model, self.learning_rate))
|
||||
|
||||
def predict(self, features: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Make predictions on input data.
|
||||
|
||||
Parameters:
|
||||
- features (np.ndarray): The input data for making predictions.
|
||||
|
||||
Returns:
|
||||
- np.ndarray: An array of binary predictions (-1 or 1).
|
||||
|
||||
>>> import numpy as np
|
||||
>>> from sklearn.datasets import load_iris
|
||||
>>> clf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1)
|
||||
>>> iris = load_iris()
|
||||
>>> X, y = iris.data, iris.target
|
||||
>>> clf.fit(X, y)
|
||||
>>> y_pred = clf.predict(X)
|
||||
>>> # Check if the predictions have the correct shape
|
||||
>>> y_pred.shape == y.shape
|
||||
True
|
||||
"""
|
||||
# Initialize predictions with zeros
|
||||
predictions = np.zeros(features.shape[0])
|
||||
for model, learning_rate in self.models:
|
||||
predictions += learning_rate * model.predict(features)
|
||||
return np.sign(predictions) # Convert to binary predictions (-1 or 1)
|
||||
|
||||
def gradient(self, target: np.ndarray, y_pred: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Calculate the negative gradient (pseudo-residuals) for logistic loss.
|
||||
|
||||
Parameters:
|
||||
- target (np.ndarray): The target values.
|
||||
- y_pred (np.ndarray): The predicted values.
|
||||
|
||||
Returns:
|
||||
- np.ndarray: An array of pseudo-residuals.
|
||||
|
||||
>>> import numpy as np
|
||||
>>> clf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1)
|
||||
>>> target = np.array([0, 1, 0, 1])
|
||||
>>> y_pred = np.array([0.2, 0.8, 0.3, 0.7])
|
||||
>>> residuals = clf.gradient(target, y_pred)
|
||||
>>> # Check if residuals have the correct shape
|
||||
>>> residuals.shape == target.shape
|
||||
True
|
||||
"""
|
||||
return -target / (1 + np.exp(target * y_pred))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
iris = load_iris()
|
||||
X, y = iris.data, iris.target
|
||||
X_train, X_test, y_train, y_test = train_test_split(
|
||||
X, y, test_size=0.2, random_state=42
|
||||
)
|
||||
|
||||
clf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1)
|
||||
clf.fit(X_train, y_train)
|
||||
|
||||
y_pred = clf.predict(X_test)
|
||||
accuracy = accuracy_score(y_test, y_pred)
|
||||
print(f"Accuracy: {accuracy:.2f}")
|
Loading…
Reference in New Issue
Block a user