Added doctest to decision_tree.py (#11143)

* Added doctest to decision_tree.py

* Update decision_tree.py

* Update machine_learning/decision_tree.py

* Update machine_learning/decision_tree.py

* raise ValueError()

* Update decision_tree.py

---------

Co-authored-by: Christian Clauss <cclauss@me.com>
This commit is contained in:
Suyash Dongre 2023-11-05 13:46:00 +05:30 committed by GitHub
parent ebfdb127e7
commit 257cfbdf6e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -18,7 +18,7 @@ class DecisionTree:
def mean_squared_error(self, labels, prediction):
"""
mean_squared_error:
@param labels: a one dimensional numpy array
@param labels: a one-dimensional numpy array
@param prediction: a floating point value
return value: mean_squared_error calculates the error if prediction is used to
estimate the labels
@ -44,26 +44,47 @@ class DecisionTree:
def train(self, x, y):
"""
train:
@param x: a one dimensional numpy array
@param y: a one dimensional numpy array.
@param x: a one-dimensional numpy array
@param y: a one-dimensional numpy array.
The contents of y are the labels for the corresponding X values
train does not have a return value
"""
train() does not have a return value
"""
this section is to check that the inputs conform to our dimensionality
Examples:
1. Try to train when x & y are of same length & 1 dimensions (No errors)
>>> dt = DecisionTree()
>>> dt.train(np.array([10,20,30,40,50]),np.array([0,0,0,1,1]))
2. Try to train when x is 2 dimensions
>>> dt = DecisionTree()
>>> dt.train(np.array([[1,2,3,4,5],[1,2,3,4,5]]),np.array([0,0,0,1,1]))
Traceback (most recent call last):
...
ValueError: Input data set must be one-dimensional
3. Try to train when x and y are not of the same length
>>> dt = DecisionTree()
>>> dt.train(np.array([1,2,3,4,5]),np.array([[0,0,0,1,1],[0,0,0,1,1]]))
Traceback (most recent call last):
...
ValueError: x and y have different lengths
4. Try to train when x & y are of the same length but different dimensions
>>> dt = DecisionTree()
>>> dt.train(np.array([1,2,3,4,5]),np.array([[1],[2],[3],[4],[5]]))
Traceback (most recent call last):
...
ValueError: Data set labels must be one-dimensional
This section is to check that the inputs conform to our dimensionality
constraints
"""
if x.ndim != 1:
print("Error: Input data set must be one dimensional")
return
raise ValueError("Input data set must be one-dimensional")
if len(x) != len(y):
print("Error: X and y have different lengths")
return
raise ValueError("x and y have different lengths")
if y.ndim != 1:
print("Error: Data set labels must be one dimensional")
return
raise ValueError("Data set labels must be one-dimensional")
if len(x) < 2 * self.min_leaf_size:
self.prediction = np.mean(y)