mirror of
https://github.com/TheAlgorithms/Python.git
synced 2025-02-12 04:18:08 +00:00
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
This commit is contained in:
parent
e9f3d61643
commit
c1664e876d
|
@ -58,7 +58,12 @@ class MultinomialNBClassifier:
|
||||||
self._check_X(X)
|
self._check_X(X)
|
||||||
if X.shape[0] != len(y):
|
if X.shape[0] != len(y):
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"The expected shape for array y is (" + str(X.shape[0]) + ",), but got (" + str(len(y)) + ",)")
|
"The expected shape for array y is ("
|
||||||
|
+ str(X.shape[0])
|
||||||
|
+ ",), but got ("
|
||||||
|
+ str(len(y))
|
||||||
|
+ ",)"
|
||||||
|
)
|
||||||
|
|
||||||
def fit(self, X, y):
|
def fit(self, X, y):
|
||||||
"""
|
"""
|
||||||
|
@ -81,10 +86,14 @@ class MultinomialNBClassifier:
|
||||||
data_class_i = X[grouped_indices[class_i]]
|
data_class_i = X[grouped_indices[class_i]]
|
||||||
prior_class_i = data_class_i.shape[0] / n_examples
|
prior_class_i = data_class_i.shape[0] / n_examples
|
||||||
self.priors[i] = prior_class_i
|
self.priors[i] = prior_class_i
|
||||||
tot_features_count = data_class_i.sum() # count of all features in class_i
|
tot_features_count = data_class_i.sum() # count of all features in class_i
|
||||||
features_count = np.array(data_class_i.sum(axis=0))[0] # count of each feature x_j in class_i
|
features_count = np.array(data_class_i.sum(axis=0))[
|
||||||
|
0
|
||||||
|
] # count of each feature x_j in class_i
|
||||||
for j, n_j in enumerate(features_count):
|
for j, n_j in enumerate(features_count):
|
||||||
self.features_probs[i][j] = (self.alpha + n_j) / (tot_features_count + self.alpha * n_features)
|
self.features_probs[i][j] = (self.alpha + n_j) / (
|
||||||
|
tot_features_count + self.alpha * n_features
|
||||||
|
)
|
||||||
|
|
||||||
def predict(self, X):
|
def predict(self, X):
|
||||||
"""
|
"""
|
||||||
|
@ -117,19 +126,22 @@ class MultinomialNBClassifier:
|
||||||
log_priors = np.log(self.priors)
|
log_priors = np.log(self.priors)
|
||||||
for instance in X:
|
for instance in X:
|
||||||
theta = instance.multiply(log_features_probs).sum(axis=1)
|
theta = instance.multiply(log_features_probs).sum(axis=1)
|
||||||
likelihood = [log_prior_class_i + theta[i] for i, log_prior_class_i in enumerate(log_priors)]
|
likelihood = [
|
||||||
|
log_prior_class_i + theta[i]
|
||||||
|
for i, log_prior_class_i in enumerate(log_priors)
|
||||||
|
]
|
||||||
y_pred.append(self.classes[np.argmax(likelihood)])
|
y_pred.append(self.classes[np.argmax(likelihood)])
|
||||||
return np.array(y_pred)
|
return np.array(y_pred)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
newsgroups_train = fetch_20newsgroups(subset='train')
|
newsgroups_train = fetch_20newsgroups(subset="train")
|
||||||
newsgroups_test = fetch_20newsgroups(subset='test')
|
newsgroups_test = fetch_20newsgroups(subset="test")
|
||||||
X_train = newsgroups_train['data']
|
X_train = newsgroups_train["data"]
|
||||||
y_train = newsgroups_train['target']
|
y_train = newsgroups_train["target"]
|
||||||
X_test = newsgroups_test['data']
|
X_test = newsgroups_test["data"]
|
||||||
y_test = newsgroups_test['target']
|
y_test = newsgroups_test["target"]
|
||||||
vectorizer = TfidfVectorizer(stop_words='english')
|
vectorizer = TfidfVectorizer(stop_words="english")
|
||||||
X_train = vectorizer.fit_transform(X_train)
|
X_train = vectorizer.fit_transform(X_train)
|
||||||
X_test = vectorizer.transform(X_test)
|
X_test = vectorizer.transform(X_test)
|
||||||
|
|
||||||
|
@ -138,10 +150,12 @@ def main():
|
||||||
model.fit(X_train, y_train)
|
model.fit(X_train, y_train)
|
||||||
|
|
||||||
y_pred = model.predict(X_test)
|
y_pred = model.predict(X_test)
|
||||||
print("Accuracy of naive bayes text classifier: " + str(accuracy_score(y_test, y_pred)))
|
print(
|
||||||
|
"Accuracy of naive bayes text classifier: "
|
||||||
|
+ str(accuracy_score(y_test, y_pred))
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
doctest.testmod()
|
doctest.testmod()
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user