Fixed imports

2025-04-18 03:37:35 +00:00 · 2023-10-06 17:13:04 +02:00 · 2023-10-06 17:13:04 +02:00 · f6404ccb10
commit f6404ccb10
parent 40c39a81f6
1 changed files with 10 additions and 16 deletions
--- a/machine_learning/multinomial_naive_bayes_classifier.py
+++ b/machine_learning/multinomial_naive_bayes_classifier.py
@ -9,17 +9,16 @@ https://en.wikipedia.org/wiki/Naive_Bayes_classifier
 """

 import doctest
+
 import numpy as np
-from numpy.typing import ArrayLike
+import numpy.typing as npt
 from scipy import sparse
-from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.datasets import fetch_20newsgroups
+from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics import accuracy_score


-
-
-def group_indices_by_target(targets: ArrayLike) -> dict:
+def group_indices_by_target(targets: npt.ArrayLike) -> dict:
    """
    Associates to each target label the indices of the examples with that label

@ -49,24 +48,24 @@ def group_indices_by_target(targets: ArrayLike) -> dict:


 class MultinomialNBClassifier:
-    def __init__(self, alpha=1):
+    def __init__(self, alpha: int = 1):
        self.classes = None
        self.features_probs = None
        self.priors = None
        self.alpha = alpha

-    def fit(self, data: sparse.csr_matrix, y: ArrayLike) -> None:
+    def fit(self, data: sparse.csr_matrix, targets: npt.ArrayLike) -> None:
        """
        Parameters
        ----------
        data : scipy.sparse.csr_matrix of shape (n_samples, n_features)
            Multinomial training examples

-        y : array-like of shape (n_samples,)
+        targets : array-like of shape (n_samples,)
            Target labels
        """
        n_examples, n_features = data.shape
-        grouped_indices = group_indices_by_target(y)
+        grouped_indices = group_indices_by_target(targets)
        self.classes = list(grouped_indices.keys())
        self.priors = np.zeros(shape=len(self.classes))
        self.features_probs = np.zeros(shape=(len(self.classes), n_features))
@ -76,15 +75,13 @@ class MultinomialNBClassifier:
            prior_class_i = data_class_i.shape[0] / n_examples
            self.priors[i] = prior_class_i
            tot_features_count = data_class_i.sum()  # count of all features in class_i
-            features_count = np.array(data_class_i.sum(axis=0))[
-                0
-            ]  # count of each feature x_j in class_i
+            features_count = np.array(data_class_i.sum(axis=0))[0]
            for j, n_j in enumerate(features_count):
                self.features_probs[i][j] = (self.alpha + n_j) / (
                    tot_features_count + self.alpha * n_features
                )

-    def predict(self, data: sparse.csr_matrix) -> np.array:
+    def predict(self, data: sparse.csr_matrix) -> np.ndarray:
        """
        Parameters
        ----------
@ -123,9 +120,6 @@ class MultinomialNBClassifier:


 def main() -> None:
-    """
-    Performs the text classification on the twenty_newsgroup dataset from sklearn
-    """
    newsgroups_train = fetch_20newsgroups(subset="train")
    newsgroups_test = fetch_20newsgroups(subset="test")
    x_train = newsgroups_train["data"]