Python/machine_learning/xgboost_regressor.py

# XGBoost Regressor Example
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor


def data_handling(data: dict) -> tuple:
    # Split dataset into features and target.  Data is features.
    """
    >>> data_handling((
    ...  {'data':'[ 8.3252 41. 6.9841269 1.02380952  322. 2.55555556   37.88 -122.23 ]'
    ...  ,'target':([4.526])}))
    ('[ 8.3252 41. 6.9841269 1.02380952  322. 2.55555556   37.88 -122.23 ]', [4.526])
    """
    return (data["data"], data["target"])


def xgboost(
    features: np.ndarray, target: np.ndarray, test_features: np.ndarray
) -> np.ndarray:
    """
    >>> xgboost(np.array([[ 2.3571 ,   52. , 6.00813008, 1.06775068,
    ...    907. , 2.45799458,   40.58 , -124.26]]),np.array([1.114]),
    ... np.array([[1.97840000e+00,  3.70000000e+01,  4.98858447e+00,  1.03881279e+00,
    ...    1.14300000e+03,  2.60958904e+00,  3.67800000e+01, -1.19780000e+02]]))
    array([[1.1139996]], dtype=float32)
    """
    xgb = XGBRegressor(verbosity=0, random_state=42)
    xgb.fit(features, target)
    # Predict target for test data
    predictions = xgb.predict(test_features)
    predictions = predictions.reshape(len(predictions), 1)
    return predictions


def main() -> None:
    """
    >>> main()
    Mean Absolute Error : 0.30957163379906033
    Mean Square Error  : 0.22611560196662744

    The URL for this algorithm
    https://xgboost.readthedocs.io/en/stable/
    California house price dataset is used to demonstrate the algorithm.
    """
    # Load California house price dataset
    california = fetch_california_housing()
    data, target = data_handling(california)
    x_train, x_test, y_train, y_test = train_test_split(
        data, target, test_size=0.25, random_state=1
    )
    predictions = xgboost(x_train, y_train, x_test)
    # Error printing
    print(f"Mean Absolute Error : {mean_absolute_error(y_test, predictions)}")
    print(f"Mean Square Error  : {mean_squared_error(y_test, predictions)}")


if __name__ == "__main__":
    import doctest

    doctest.testmod(verbose=True)
    main()
XGB Regressor (#7107) * Fixes: #{6551} * Fixes: #{6551} * Update xgboostclassifier.py * Delete xgboostclassifier.py * Update xgboostregressor.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fixes: #{6551} * Fixes : {#6551} * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fixes: {#6551] * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update xgboostregressor.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update xgboostregressor.py * Update xgboostregressor.py * Fixes: { #6551} * Update xgboostregressor.py * Fixes: { #6551} * Fixes: { #6551} * Update and rename xgboostregressor.py to xgboost_regressor.py Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Christian Clauss <cclauss@me.com> 2022-10-23 11:47:19 +00:00			`# XGBoost Regressor Example`
			`import numpy as np`
			`from sklearn.datasets import fetch_california_housing`
			`from sklearn.metrics import mean_absolute_error, mean_squared_error`
			`from sklearn.model_selection import train_test_split`
			`from xgboost import XGBRegressor`


			`def data_handling(data: dict) -> tuple:`
			`# Split dataset into features and target. Data is features.`
			`"""`
			`>>> data_handling((`
			`... {'data':'[ 8.3252 41. 6.9841269 1.02380952 322. 2.55555556 37.88 -122.23 ]'`
			`... ,'target':([4.526])}))`
			`('[ 8.3252 41. 6.9841269 1.02380952 322. 2.55555556 37.88 -122.23 ]', [4.526])`
			`"""`
			`return (data["data"], data["target"])`


			`def xgboost(`
			`features: np.ndarray, target: np.ndarray, test_features: np.ndarray`
			`) -> np.ndarray:`
			`"""`
			`>>> xgboost(np.array([[ 2.3571 , 52. , 6.00813008, 1.06775068,`
			`... 907. , 2.45799458, 40.58 , -124.26]]),np.array([1.114]),`
			`... np.array([[1.97840000e+00, 3.70000000e+01, 4.98858447e+00, 1.03881279e+00,`
			`... 1.14300000e+03, 2.60958904e+00, 3.67800000e+01, -1.19780000e+02]]))`
			`array([[1.1139996]], dtype=float32)`
			`"""`
			`xgb = XGBRegressor(verbosity=0, random_state=42)`
			`xgb.fit(features, target)`
			`# Predict target for test data`
			`predictions = xgb.predict(test_features)`
			`predictions = predictions.reshape(len(predictions), 1)`
			`return predictions`


			`def main() -> None:`
			`"""`
			`>>> main()`
			`Mean Absolute Error : 0.30957163379906033`
			`Mean Square Error : 0.22611560196662744`

			`The URL for this algorithm`
			`https://xgboost.readthedocs.io/en/stable/`
			`California house price dataset is used to demonstrate the algorithm.`
			`"""`
			`# Load California house price dataset`
			`california = fetch_california_housing()`
			`data, target = data_handling(california)`
			`x_train, x_test, y_train, y_test = train_test_split(`
			`data, target, test_size=0.25, random_state=1`
			`)`
			`predictions = xgboost(x_train, y_train, x_test)`
			`# Error printing`
			`print(f"Mean Absolute Error : {mean_absolute_error(y_test, predictions)}")`
			`print(f"Mean Square Error : {mean_squared_error(y_test, predictions)}")`


			`if __name__ == "__main__":`
			`import doctest`

			`doctest.testmod(verbose=True)`
			`main()`