mirror of
https://github.com/TheAlgorithms/Python.git
synced 2025-02-07 10:00:55 +00:00
Fixed imports
This commit is contained in:
parent
40c39a81f6
commit
f6404ccb10
|
@ -9,17 +9,16 @@ https://en.wikipedia.org/wiki/Naive_Bayes_classifier
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import doctest
|
import doctest
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from numpy.typing import ArrayLike
|
import numpy.typing as npt
|
||||||
from scipy import sparse
|
from scipy import sparse
|
||||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
||||||
from sklearn.datasets import fetch_20newsgroups
|
from sklearn.datasets import fetch_20newsgroups
|
||||||
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||||
from sklearn.metrics import accuracy_score
|
from sklearn.metrics import accuracy_score
|
||||||
|
|
||||||
|
|
||||||
|
def group_indices_by_target(targets: npt.ArrayLike) -> dict:
|
||||||
|
|
||||||
def group_indices_by_target(targets: ArrayLike) -> dict:
|
|
||||||
"""
|
"""
|
||||||
Associates to each target label the indices of the examples with that label
|
Associates to each target label the indices of the examples with that label
|
||||||
|
|
||||||
|
@ -49,24 +48,24 @@ def group_indices_by_target(targets: ArrayLike) -> dict:
|
||||||
|
|
||||||
|
|
||||||
class MultinomialNBClassifier:
|
class MultinomialNBClassifier:
|
||||||
def __init__(self, alpha=1):
|
def __init__(self, alpha: int = 1):
|
||||||
self.classes = None
|
self.classes = None
|
||||||
self.features_probs = None
|
self.features_probs = None
|
||||||
self.priors = None
|
self.priors = None
|
||||||
self.alpha = alpha
|
self.alpha = alpha
|
||||||
|
|
||||||
def fit(self, data: sparse.csr_matrix, y: ArrayLike) -> None:
|
def fit(self, data: sparse.csr_matrix, targets: npt.ArrayLike) -> None:
|
||||||
"""
|
"""
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
data : scipy.sparse.csr_matrix of shape (n_samples, n_features)
|
data : scipy.sparse.csr_matrix of shape (n_samples, n_features)
|
||||||
Multinomial training examples
|
Multinomial training examples
|
||||||
|
|
||||||
y : array-like of shape (n_samples,)
|
targets : array-like of shape (n_samples,)
|
||||||
Target labels
|
Target labels
|
||||||
"""
|
"""
|
||||||
n_examples, n_features = data.shape
|
n_examples, n_features = data.shape
|
||||||
grouped_indices = group_indices_by_target(y)
|
grouped_indices = group_indices_by_target(targets)
|
||||||
self.classes = list(grouped_indices.keys())
|
self.classes = list(grouped_indices.keys())
|
||||||
self.priors = np.zeros(shape=len(self.classes))
|
self.priors = np.zeros(shape=len(self.classes))
|
||||||
self.features_probs = np.zeros(shape=(len(self.classes), n_features))
|
self.features_probs = np.zeros(shape=(len(self.classes), n_features))
|
||||||
|
@ -76,15 +75,13 @@ class MultinomialNBClassifier:
|
||||||
prior_class_i = data_class_i.shape[0] / n_examples
|
prior_class_i = data_class_i.shape[0] / n_examples
|
||||||
self.priors[i] = prior_class_i
|
self.priors[i] = prior_class_i
|
||||||
tot_features_count = data_class_i.sum() # count of all features in class_i
|
tot_features_count = data_class_i.sum() # count of all features in class_i
|
||||||
features_count = np.array(data_class_i.sum(axis=0))[
|
features_count = np.array(data_class_i.sum(axis=0))[0]
|
||||||
0
|
|
||||||
] # count of each feature x_j in class_i
|
|
||||||
for j, n_j in enumerate(features_count):
|
for j, n_j in enumerate(features_count):
|
||||||
self.features_probs[i][j] = (self.alpha + n_j) / (
|
self.features_probs[i][j] = (self.alpha + n_j) / (
|
||||||
tot_features_count + self.alpha * n_features
|
tot_features_count + self.alpha * n_features
|
||||||
)
|
)
|
||||||
|
|
||||||
def predict(self, data: sparse.csr_matrix) -> np.array:
|
def predict(self, data: sparse.csr_matrix) -> np.ndarray:
|
||||||
"""
|
"""
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
|
@ -123,9 +120,6 @@ class MultinomialNBClassifier:
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
def main() -> None:
|
||||||
"""
|
|
||||||
Performs the text classification on the twenty_newsgroup dataset from sklearn
|
|
||||||
"""
|
|
||||||
newsgroups_train = fetch_20newsgroups(subset="train")
|
newsgroups_train = fetch_20newsgroups(subset="train")
|
||||||
newsgroups_test = fetch_20newsgroups(subset="test")
|
newsgroups_test = fetch_20newsgroups(subset="test")
|
||||||
x_train = newsgroups_train["data"]
|
x_train = newsgroups_train["data"]
|
||||||
|
|
Loading…
Reference in New Issue
Block a user