Python/machine_learning/xgboost_classifier.py
Christian Clauss c909da9b08
pre-commit: Upgrade psf/black for stable style 2023 ()
* pre-commit: Upgrade psf/black for stable style 2023

Updating https://github.com/psf/black ... updating 22.12.0 -> 23.1.0 for their `2023 stable style`.
* https://github.com/psf/black/blob/main/CHANGES.md#2310

> This is the first [psf/black] release of 2023, and following our stability policy, it comes with a number of improvements to our stable style…

Also, add https://github.com/tox-dev/pyproject-fmt and https://github.com/abravalheri/validate-pyproject to pre-commit.

I only modified `.pre-commit-config.yaml` and all other files were modified by pre-commit.ci and psf/black.

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2023-02-01 18:44:54 +05:30

82 lines
2.6 KiB
Python

# XGBoost Classifier Example
import numpy as np
from matplotlib import pyplot as plt
from sklearn.datasets import load_iris
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
def data_handling(data: dict) -> tuple:
# Split dataset into features and target
# data is features
"""
>>> data_handling(({'data':'[5.1, 3.5, 1.4, 0.2]','target':([0])}))
('[5.1, 3.5, 1.4, 0.2]', [0])
>>> data_handling(
... {'data': '[4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2]', 'target': ([0, 0])}
... )
('[4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2]', [0, 0])
"""
return (data["data"], data["target"])
def xgboost(features: np.ndarray, target: np.ndarray) -> XGBClassifier:
"""
# THIS TEST IS BROKEN!! >>> xgboost(np.array([[5.1, 3.6, 1.4, 0.2]]), np.array([0]))
XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,
colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,
early_stopping_rounds=None, enable_categorical=False,
eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',
importance_type=None, interaction_constraints='',
learning_rate=0.300000012, max_bin=256, max_cat_to_onehot=4,
max_delta_step=0, max_depth=6, max_leaves=0, min_child_weight=1,
missing=nan, monotone_constraints='()', n_estimators=100,
n_jobs=0, num_parallel_tree=1, predictor='auto', random_state=0,
reg_alpha=0, reg_lambda=1, ...)
"""
classifier = XGBClassifier()
classifier.fit(features, target)
return classifier
def main() -> None:
"""
>>> main()
Url for the algorithm:
https://xgboost.readthedocs.io/en/stable/
Iris type dataset is used to demonstrate algorithm.
"""
# Load Iris dataset
iris = load_iris()
features, targets = data_handling(iris)
x_train, x_test, y_train, y_test = train_test_split(
features, targets, test_size=0.25
)
names = iris["target_names"]
# Create an XGBoost Classifier from the training data
xgboost_classifier = xgboost(x_train, y_train)
# Display the confusion matrix of the classifier with both training and test sets
ConfusionMatrixDisplay.from_estimator(
xgboost_classifier,
x_test,
y_test,
display_labels=names,
cmap="Blues",
normalize="true",
)
plt.title("Normalized Confusion Matrix - IRIS Dataset")
plt.show()
if __name__ == "__main__":
import doctest
doctest.testmod(verbose=True)
main()