# XGBoost Classifier Example import numpy as np from matplotlib import pyplot as plt from sklearn.datasets import load_iris from sklearn.metrics import ConfusionMatrixDisplay from sklearn.model_selection import train_test_split from xgboost import XGBClassifier def data_handling(data: dict) -> tuple: # Split dataset into features and target # data is features """ >>> data_handling(({'data':'[5.1, 3.5, 1.4, 0.2]','target':([0])})) ('[5.1, 3.5, 1.4, 0.2]', [0]) >>> data_handling( ... {'data': '[4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2]', 'target': ([0, 0])} ... ) ('[4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2]', [0, 0]) """ return (data["data"], data["target"]) def xgboost(features: np.ndarray, target: np.ndarray) -> XGBClassifier: """ # THIS TEST IS BROKEN!! >>> xgboost(np.array([[5.1, 3.6, 1.4, 0.2]]), np.array([0])) XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None, colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1, early_stopping_rounds=None, enable_categorical=False, eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise', importance_type=None, interaction_constraints='', learning_rate=0.300000012, max_bin=256, max_cat_to_onehot=4, max_delta_step=0, max_depth=6, max_leaves=0, min_child_weight=1, missing=nan, monotone_constraints='()', n_estimators=100, n_jobs=0, num_parallel_tree=1, predictor='auto', random_state=0, reg_alpha=0, reg_lambda=1, ...) """ classifier = XGBClassifier() classifier.fit(features, target) return classifier def main() -> None: """ >>> main() Url for the algorithm: https://xgboost.readthedocs.io/en/stable/ Iris type dataset is used to demonstrate algorithm. """ # Load Iris dataset iris = load_iris() features, targets = data_handling(iris) x_train, x_test, y_train, y_test = train_test_split( features, targets, test_size=0.25 ) names = iris["target_names"] # Create an XGBoost Classifier from the training data xgboost_classifier = xgboost(x_train, y_train) # Display the confusion matrix of the classifier with both training and test sets ConfusionMatrixDisplay.from_estimator( xgboost_classifier, x_test, y_test, display_labels=names, cmap="Blues", normalize="true", ) plt.title("Normalized Confusion Matrix - IRIS Dataset") plt.show() if __name__ == "__main__": import doctest doctest.testmod(verbose=True) main()