Python/machine_learning/local_weighted_learning/local_weighted_learning.py

# Required imports to run this file
import matplotlib.pyplot as plt
import numpy as np


# weighted matrix
def weighted_matrix(point: np.mat, training_data_x: np.mat, bandwidth: float) -> np.mat:
    """
    Calculate the weight for every point in the
    data set. It takes training_point , query_point, and tau
    Here Tau is not a fixed value it can be varied depends on output.
    tau --> bandwidth
    xmat -->Training data
    point --> the x where we want to make predictions
    >>> weighted_matrix(np.array([1., 1.]),np.mat([[16.99, 10.34], [21.01,23.68],
    ...                    [24.59,25.69]]), 0.6)
    matrix([[1.43807972e-207, 0.00000000e+000, 0.00000000e+000],
            [0.00000000e+000, 0.00000000e+000, 0.00000000e+000],
            [0.00000000e+000, 0.00000000e+000, 0.00000000e+000]])
    """
    # m is the number of training samples
    m, n = np.shape(training_data_x)
    # Initializing weights as identity matrix
    weights = np.mat(np.eye(m))
    # calculating weights for all training examples [x(i)'s]
    for j in range(m):
        diff = point - training_data_x[j]
        weights[j, j] = np.exp(diff * diff.T / (-2.0 * bandwidth ** 2))
    return weights


def local_weight(
    point: np.mat, training_data_x: np.mat, training_data_y: np.mat, bandwidth: float
) -> np.mat:
    """
    Calculate the local weights using the weight_matrix function on training data.
    Return the weighted matrix.
    >>> local_weight(np.array([1., 1.]),np.mat([[16.99, 10.34], [21.01,23.68],
    ...                 [24.59,25.69]]),np.mat([[1.01, 1.66, 3.5]]), 0.6)
    matrix([[0.00873174],
            [0.08272556]])
    """
    weight = weighted_matrix(point, training_data_x, bandwidth)
    W = (training_data_x.T * (weight * training_data_x)).I * (
        training_data_x.T * weight * training_data_y.T
    )

    return W


def local_weight_regression(
    training_data_x: np.mat, training_data_y: np.mat, bandwidth: float
) -> np.mat:
    """
    Calculate predictions for each data point on axis.
    >>> local_weight_regression(np.mat([[16.99, 10.34], [21.01,23.68],
    ...                            [24.59,25.69]]),np.mat([[1.01, 1.66, 3.5]]), 0.6)
    array([1.07173261, 1.65970737, 3.50160179])
    """
    m, n = np.shape(training_data_x)
    ypred = np.zeros(m)

    for i, item in enumerate(training_data_x):
        ypred[i] = item * local_weight(
            item, training_data_x, training_data_y, bandwidth
        )

    return ypred


def load_data(dataset_name: str, cola_name: str, colb_name: str) -> np.mat:
    """
    Function used for loading data from the seaborn splitting into x and y points
    >>> pass # this function has no doctest
    """
    import seaborn as sns

    data = sns.load_dataset(dataset_name)
    col_a = np.array(data[cola_name])  # total_bill
    col_b = np.array(data[colb_name])  # tip

    mcol_a = np.mat(col_a)
    mcol_b = np.mat(col_b)

    m = np.shape(mcol_b)[1]
    one = np.ones((1, m), dtype=int)

    # horizontal stacking
    training_data_x = np.hstack((one.T, mcol_a.T))

    return training_data_x, mcol_b, col_a, col_b


def get_preds(training_data_x: np.mat, mcol_b: np.mat, tau: float) -> np.ndarray:
    """
    Get predictions with minimum error for each training data
    >>> get_preds(np.mat([[16.99, 10.34], [21.01,23.68],
    ...                     [24.59,25.69]]),np.mat([[1.01, 1.66, 3.5]]), 0.6)
    array([1.07173261, 1.65970737, 3.50160179])
    """
    ypred = local_weight_regression(training_data_x, mcol_b, tau)
    return ypred


def plot_preds(
    training_data_x: np.mat,
    predictions: np.ndarray,
    col_x: np.ndarray,
    col_y: np.ndarray,
    cola_name: str,
    colb_name: str,
) -> plt.plot:
    """
    This function used to plot predictions and display the graph
    >>> pass #this function has no doctest
    """
    xsort = training_data_x.copy()
    xsort.sort(axis=0)
    plt.scatter(col_x, col_y, color="blue")
    plt.plot(
        xsort[:, 1],
        predictions[training_data_x[:, 1].argsort(0)],
        color="yellow",
        linewidth=5,
    )
    plt.title("Local Weighted Regression")
    plt.xlabel(cola_name)
    plt.ylabel(colb_name)
    plt.show()


if __name__ == "__main__":
    training_data_x, mcol_b, col_a, col_b = load_data("tips", "total_bill", "tip")
    predictions = get_preds(training_data_x, mcol_b, 0.5)
    plot_preds(training_data_x, predictions, col_a, col_b, "total_bill", "tip")
Local Weighted Learning (#5615) * Local Weighted Learning Added * Delete LWL directory * Local Weighted Learning Added * local weighted learning added * Delete LWL directory * Delete local_weighted_learning.py * rephrased code added * local weight learning updated * local weight learning updated * Updated dir * updated codespell * import modification * Doctests added * doctests updated * lcl updated * doctests updated * doctest values updated 2021-10-31 11:27:50 +00:00			`# Required imports to run this file`
			`import matplotlib.pyplot as plt`
			`import numpy as np`


			`# weighted matrix`
			`def weighted_matrix(point: np.mat, training_data_x: np.mat, bandwidth: float) -> np.mat:`
			`"""`
			`Calculate the weight for every point in the`
			`data set. It takes training_point , query_point, and tau`
			`Here Tau is not a fixed value it can be varied depends on output.`
			`tau --> bandwidth`
			`xmat -->Training data`
			`point --> the x where we want to make predictions`
			`>>> weighted_matrix(np.array([1., 1.]),np.mat([[16.99, 10.34], [21.01,23.68],`
			`... [24.59,25.69]]), 0.6)`
			`matrix([[1.43807972e-207, 0.00000000e+000, 0.00000000e+000],`
			`[0.00000000e+000, 0.00000000e+000, 0.00000000e+000],`
			`[0.00000000e+000, 0.00000000e+000, 0.00000000e+000]])`
			`"""`
			`# m is the number of training samples`
			`m, n = np.shape(training_data_x)`
			`# Initializing weights as identity matrix`
			`weights = np.mat(np.eye(m))`
			`# calculating weights for all training examples [x(i)'s]`
			`for j in range(m):`
			`diff = point - training_data_x[j]`
			`weights[j, j] = np.exp(diff * diff.T / (-2.0 * bandwidth ** 2))`
			`return weights`


			`def local_weight(`
			`point: np.mat, training_data_x: np.mat, training_data_y: np.mat, bandwidth: float`
			`) -> np.mat:`
			`"""`
			`Calculate the local weights using the weight_matrix function on training data.`
			`Return the weighted matrix.`
			`>>> local_weight(np.array([1., 1.]),np.mat([[16.99, 10.34], [21.01,23.68],`
			`... [24.59,25.69]]),np.mat([[1.01, 1.66, 3.5]]), 0.6)`
			`matrix([[0.00873174],`
			`[0.08272556]])`
			`"""`
			`weight = weighted_matrix(point, training_data_x, bandwidth)`
			`W = (training_data_x.T * (weight * training_data_x)).I * (`
			`training_data_x.T * weight * training_data_y.T`
			`)`

			`return W`


			`def local_weight_regression(`
			`training_data_x: np.mat, training_data_y: np.mat, bandwidth: float`
			`) -> np.mat:`
			`"""`
			`Calculate predictions for each data point on axis.`
			`>>> local_weight_regression(np.mat([[16.99, 10.34], [21.01,23.68],`
			`... [24.59,25.69]]),np.mat([[1.01, 1.66, 3.5]]), 0.6)`
			`array([1.07173261, 1.65970737, 3.50160179])`
			`"""`
			`m, n = np.shape(training_data_x)`
			`ypred = np.zeros(m)`

			`for i, item in enumerate(training_data_x):`
			`ypred[i] = item * local_weight(`
			`item, training_data_x, training_data_y, bandwidth`
			`)`

			`return ypred`


			`def load_data(dataset_name: str, cola_name: str, colb_name: str) -> np.mat:`
			`"""`
			`Function used for loading data from the seaborn splitting into x and y points`
			`>>> pass # this function has no doctest`
			`"""`
			`import seaborn as sns`

			`data = sns.load_dataset(dataset_name)`
			`col_a = np.array(data[cola_name]) # total_bill`
			`col_b = np.array(data[colb_name]) # tip`

			`mcol_a = np.mat(col_a)`
			`mcol_b = np.mat(col_b)`

			`m = np.shape(mcol_b)[1]`
			`one = np.ones((1, m), dtype=int)`

			`# horizontal stacking`
			`training_data_x = np.hstack((one.T, mcol_a.T))`

			`return training_data_x, mcol_b, col_a, col_b`


			`def get_preds(training_data_x: np.mat, mcol_b: np.mat, tau: float) -> np.ndarray:`
			`"""`
			`Get predictions with minimum error for each training data`
			`>>> get_preds(np.mat([[16.99, 10.34], [21.01,23.68],`
			`... [24.59,25.69]]),np.mat([[1.01, 1.66, 3.5]]), 0.6)`
			`array([1.07173261, 1.65970737, 3.50160179])`
			`"""`
			`ypred = local_weight_regression(training_data_x, mcol_b, tau)`
			`return ypred`


			`def plot_preds(`
			`training_data_x: np.mat,`
			`predictions: np.ndarray,`
			`col_x: np.ndarray,`
			`col_y: np.ndarray,`
			`cola_name: str,`
			`colb_name: str,`
			`) -> plt.plot:`
			`"""`
			`This function used to plot predictions and display the graph`
			`>>> pass #this function has no doctest`
			`"""`
			`xsort = training_data_x.copy()`
			`xsort.sort(axis=0)`
			`plt.scatter(col_x, col_y, color="blue")`
			`plt.plot(`
			`xsort[:, 1],`
			`predictions[training_data_x[:, 1].argsort(0)],`
			`color="yellow",`
			`linewidth=5,`
			`)`
			`plt.title("Local Weighted Regression")`
			`plt.xlabel(cola_name)`
			`plt.ylabel(colb_name)`
			`plt.show()`


			`if __name__ == "__main__":`
			`training_data_x, mcol_b, col_a, col_b = load_data("tips", "total_bill", "tip")`
			`predictions = get_preds(training_data_x, mcol_b, 0.5)`
			`plot_preds(training_data_x, predictions, col_a, col_b, "total_bill", "tip")`