diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index fe3ec15a6..000000000 --- a/.travis.yml +++ /dev/null @@ -1,14 +0,0 @@ -language: python -python: - - "3.2" - - "3.3" - - "3.4" - - "3.5" - - "3.6" - - "3.6-dev" - -install: - - if [ "$TRAVIS_PYTHON_VERSION" == "3.2" ]; then travis_retry pip install coverage==3.7.1; fi - - if [ "$TRAVIS_PYTHON_VERSION" != "3.2" ]; then travis_retry pip install coverage; fi - - "pip install pytest pytest-cov" -script: py.test --doctest-modules --cov ./ diff --git a/data_structures/Binary Tree/binary_seach_tree.py b/data_structures/Binary Tree/binary_seach_tree.py index 1dac948ae..0b1726534 100644 --- a/data_structures/Binary Tree/binary_seach_tree.py +++ b/data_structures/Binary Tree/binary_seach_tree.py @@ -8,7 +8,7 @@ class Node: def __init__(self, label): self.label = label self.left = None - self.rigt = None + self.right = None def getLabel(self): return self.label @@ -23,10 +23,10 @@ class Node: self.left = left def getRight(self): - return self.rigt + return self.right def setRight(self, right): - self.rigt = right + self.right = right class BinarySearchTree: diff --git a/Graphs/Breadth_First_Search.py b/data_structures/Graph/Breadth_First_Search.py similarity index 59% rename from Graphs/Breadth_First_Search.py rename to data_structures/Graph/Breadth_First_Search.py index 1a3fdfd4d..92a6e819b 100644 --- a/Graphs/Breadth_First_Search.py +++ b/data_structures/Graph/Breadth_First_Search.py @@ -1,9 +1,9 @@ class GRAPH: """docstring for GRAPH""" def __init__(self, nodes): - self.nodes=nodes - self.graph=[[0]*nodes for i in range (nodes)] - self.visited=[0]*nodes + self.nodes = nodes + self.graph = [[0]*nodes for i in range (nodes)] + self.visited = [0]*nodes def show(self): @@ -23,7 +23,7 @@ class GRAPH: v = queue[0] for u in range(self.vertex): if self.graph[v][u] == 1: - if visited[u]== False: + if visited[u] is False: visited[u] = True queue.append(u) print('%d visited' % (u +1)) @@ -41,30 +41,32 @@ g.add_edge(4,8) g.add_edge(5,9) g.add_edge(6,10) g.bfs(4) -======= - print self.graph + +print(self.graph) def add_edge(self, i, j): self.graph[i][j]=1 self.graph[j][i]=1 - def bfs(self,s): - queue=[s] - self.visited[s]=1 - while len(queue)!=0: - x=queue.pop(0) + def bfs(self, s): + queue = [s] + self.visited[s] = 1 + while len(queue)!= 0: + x = queue.pop(0) print(x) - for i in range(0,self.nodes): - if self.graph[x][i]==1 and self.visited[i]==0: + for i in range(0, self.nodes): + if self.graph[x][i] == 1 and self.visited[i] == 0: queue.append(i) - self.visited[i]=1 + self.visited[i] = 1 -n=int(input("Enter the number of Nodes : ")) -g=GRAPH(n) -e=int(input("Enter the no of edges : ")) +n = int(input("Enter the number of Nodes : ")) +g = GRAPH(n) +e = int(input("Enter the no of edges : ")) print("Enter the edges (u v)") -for i in range(0,e): - u,v=map(int, raw_input().split()) - g.add_edge(u,v) -s=int(input("Enter the source node :")) + +for i in range(0, e): + u ,v = map(int, raw_input().split()) + g.add_edge(u, v) + +s = int(input("Enter the source node :")) g.bfs(s) diff --git a/Graphs/Deep_First_Search.py b/data_structures/Graph/Deep_First_Search.py similarity index 100% rename from Graphs/Deep_First_Search.py rename to data_structures/Graph/Deep_First_Search.py diff --git a/Graphs/Graph_list.py b/data_structures/Graph/Graph_list.py similarity index 100% rename from Graphs/Graph_list.py rename to data_structures/Graph/Graph_list.py diff --git a/Graphs/Graph_matrix.py b/data_structures/Graph/Graph_matrix.py similarity index 100% rename from Graphs/Graph_matrix.py rename to data_structures/Graph/Graph_matrix.py diff --git a/data_structures/Graph/P01_BreadthFirstSearch.py b/data_structures/Graph/P01_BreadthFirstSearch.py deleted file mode 100644 index 16b1b2007..000000000 --- a/data_structures/Graph/P01_BreadthFirstSearch.py +++ /dev/null @@ -1,61 +0,0 @@ -# Author: OMKAR PATHAK - -class Graph(): - def __init__(self): - self.vertex = {} - - # for printing the Graph vertexes - def printGraph(self): - for i in self.vertex.keys(): - print(i,' -> ', ' -> '.join([str(j) for j in self.vertex[i]])) - - # for adding the edge beween two vertexes - def addEdge(self, fromVertex, toVertex): - # check if vertex is already present, - if fromVertex in self.vertex.keys(): - self.vertex[fromVertex].append(toVertex) - else: - # else make a new vertex - self.vertex[fromVertex] = [toVertex] - - def BFS(self, startVertex): - # Take a list for stoting already visited vertexes - visited = [False] * len(self.vertex) - - # create a list to store all the vertexes for BFS - queue = [] - - # mark the source node as visited and enqueue it - visited[startVertex] = True - queue.append(startVertex) - - while queue: - startVertex = queue.pop(0) - print(startVertex, end = ' ') - - # mark all adjacent nodes as visited and print them - for i in self.vertex[startVertex]: - if visited[i] == False: - queue.append(i) - visited[i] = True - -if __name__ == '__main__': - g = Graph() - g.addEdge(0, 1) - g.addEdge(0, 2) - g.addEdge(1, 2) - g.addEdge(2, 0) - g.addEdge(2, 3) - g.addEdge(3, 3) - - g.printGraph() - print('BFS:') - g.BFS(2) - - # OUTPUT: - # 0  ->  1 -> 2 - # 1  ->  2 - # 2  ->  0 -> 3 - # 3  ->  3 - # BFS: - # 2 0 3 1 diff --git a/data_structures/Graph/P02_DepthFirstSearch.py b/data_structures/Graph/P02_DepthFirstSearch.py deleted file mode 100644 index 94ef3cb86..000000000 --- a/data_structures/Graph/P02_DepthFirstSearch.py +++ /dev/null @@ -1,61 +0,0 @@ -# Author: OMKAR PATHAK - -class Graph(): - def __init__(self): - self.vertex = {} - - # for printing the Graph vertexes - def printGraph(self): - print(self.vertex) - for i in self.vertex.keys(): - print(i,' -> ', ' -> '.join([str(j) for j in self.vertex[i]])) - - # for adding the edge beween two vertexes - def addEdge(self, fromVertex, toVertex): - # check if vertex is already present, - if fromVertex in self.vertex.keys(): - self.vertex[fromVertex].append(toVertex) - else: - # else make a new vertex - self.vertex[fromVertex] = [toVertex] - - def DFS(self): - # visited array for storing already visited nodes - visited = [False] * len(self.vertex) - - # call the recursive helper function - for i in range(len(self.vertex)): - if visited[i] == False: - self.DFSRec(i, visited) - - def DFSRec(self, startVertex, visited): - # mark start vertex as visited - visited[startVertex] = True - - print(startVertex, end = ' ') - - # Recur for all the vertexes that are adjacent to this node - for i in self.vertex.keys(): - if visited[i] == False: - self.DFSRec(i, visited) - -if __name__ == '__main__': - g = Graph() - g.addEdge(0, 1) - g.addEdge(0, 2) - g.addEdge(1, 2) - g.addEdge(2, 0) - g.addEdge(2, 3) - g.addEdge(3, 3) - - g.printGraph() - print('DFS:') - g.DFS() - - # OUTPUT: - # 0  ->  1 -> 2 - # 1  ->  2 - # 2  ->  0 -> 3 - # 3  ->  3 - # DFS: - # 0 1 2 3 diff --git a/machine_learning/decision_tree.py b/machine_learning/decision_tree.py new file mode 100644 index 000000000..51f600cac --- /dev/null +++ b/machine_learning/decision_tree.py @@ -0,0 +1,139 @@ +""" +Implementation of a basic regression decision tree. +Input data set: The input data set must be 1-dimensional with continuous labels. +Output: The decision tree maps a real number input to a real number output. +""" + +import numpy as np + +class Decision_Tree: + def __init__(self, depth = 5, min_leaf_size = 5): + self.depth = depth + self.decision_boundary = 0 + self.left = None + self.right = None + self.min_leaf_size = min_leaf_size + self.prediction = None + + def mean_squared_error(self, labels, prediction): + """ + mean_squared_error: + @param labels: a one dimensional numpy array + @param prediction: a floating point value + return value: mean_squared_error calculates the error if prediction is used to estimate the labels + """ + if labels.ndim != 1: + print("Error: Input labels must be one dimensional") + + return np.mean((labels - prediction) ** 2) + + def train(self, X, y): + """ + train: + @param X: a one dimensional numpy array + @param y: a one dimensional numpy array. + The contents of y are the labels for the corresponding X values + + train does not have a return value + """ + + """ + this section is to check that the inputs conform to our dimensionality constraints + """ + if X.ndim != 1: + print("Error: Input data set must be one dimensional") + return + if len(X) != len(y): + print("Error: X and y have different lengths") + return + if y.ndim != 1: + print("Error: Data set labels must be one dimensional") + return + + if len(X) < 2 * self.min_leaf_size: + self.prediction = np.mean(y) + return + + if self.depth == 1: + self.prediction = np.mean(y) + return + + best_split = 0 + min_error = self.mean_squared_error(X,np.mean(y)) * 2 + + + """ + loop over all possible splits for the decision tree. find the best split. + if no split exists that is less than 2 * error for the entire array + then the data set is not split and the average for the entire array is used as the predictor + """ + for i in range(len(X)): + if len(X[:i]) < self.min_leaf_size: + continue + elif len(X[i:]) < self.min_leaf_size: + continue + else: + error_left = self.mean_squared_error(X[:i], np.mean(y[:i])) + error_right = self.mean_squared_error(X[i:], np.mean(y[i:])) + error = error_left + error_right + if error < min_error: + best_split = i + min_error = error + + if best_split != 0: + left_X = X[:best_split] + left_y = y[:best_split] + right_X = X[best_split:] + right_y = y[best_split:] + + self.decision_boundary = X[best_split] + self.left = Decision_Tree(depth = self.depth - 1, min_leaf_size = self.min_leaf_size) + self.right = Decision_Tree(depth = self.depth - 1, min_leaf_size = self.min_leaf_size) + self.left.train(left_X, left_y) + self.right.train(right_X, right_y) + else: + self.prediction = np.mean(y) + + return + + def predict(self, x): + """ + predict: + @param x: a floating point value to predict the label of + the prediction function works by recursively calling the predict function + of the appropriate subtrees based on the tree's decision boundary + """ + if self.prediction is not None: + return self.prediction + elif self.left or self.right is not None: + if x >= self.decision_boundary: + return self.right.predict(x) + else: + return self.left.predict(x) + else: + print("Error: Decision tree not yet trained") + return None + +def main(): + """ + In this demonstration we're generating a sample data set from the sin function in numpy. + We then train a decision tree on the data set and use the decision tree to predict the + label of 10 different test values. Then the mean squared error over this test is displayed. + """ + X = np.arange(-1., 1., 0.005) + y = np.sin(X) + + tree = Decision_Tree(depth = 10, min_leaf_size = 10) + tree.train(X,y) + + test_cases = (np.random.rand(10) * 2) - 1 + predictions = np.array([tree.predict(x) for x in test_cases]) + avg_error = np.mean((predictions - test_cases) ** 2) + + print("Test values: " + str(test_cases)) + print("Predictions: " + str(predictions)) + print("Average error: " + str(avg_error)) + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/searches/binary_search.py b/searches/binary_search.py index 13b54f498..8dc3008da 100644 --- a/searches/binary_search.py +++ b/searches/binary_search.py @@ -110,9 +110,9 @@ def binary_search_by_recursion(sorted_collection, item, left, right): if sorted_collection[midpoint] == item: return midpoint elif sorted_collection[midpoint] > item: - return binary_search_by_recursion(sorted_collection, item, left, right-1) + return binary_search_by_recursion(sorted_collection, item, left, midpoint-1) else: - return binary_search_by_recursion(sorted_collection, item, left+1, right) + return binary_search_by_recursion(sorted_collection, item, midpoint+1, right) def __assert_sorted(collection): """Check if collection is sorted, if not - raises :py:class:`ValueError` diff --git a/searches/quick_select.py b/searches/quick_select.py new file mode 100644 index 000000000..e5e2ce99c --- /dev/null +++ b/searches/quick_select.py @@ -0,0 +1,47 @@ +import collections +import sys +import random +import time +import math +""" +A python implementation of the quick select algorithm, which is efficient for calculating the value that would appear in the index of a list if it would be sorted, even if it is not already sorted +https://en.wikipedia.org/wiki/Quickselect +""" +def _partition(data, pivot): + """ + Three way partition the data into smaller, equal and greater lists, + in relationship to the pivot + :param data: The data to be sorted (a list) + :param pivot: The value to partition the data on + :return: Three list: smaller, equal and greater + """ + less, equal, greater = [], [], [] + for element in data: + if element.address < pivot.address: + less.append(element) + elif element.address > pivot.address: + greater.append(element) + else: + equal.append(element) + return less, equal, greater + + def quickSelect(list, k): + #k = len(list) // 2 when trying to find the median (index that value would be when list is sorted) + smaller = [] + larger = [] + pivot = random.randint(0, len(list) - 1) + pivot = list[pivot] + count = 0 + smaller, equal, larger =_partition(list, pivot) + count = len(equal) + m = len(smaller) + + #k is the pivot + if m <= k < m + count: + return pivot + # must be in smaller + elif m > k: + return quickSelect(smaller, k) + #must be in larger + else: + return quickSelect(larger, k - (m + count))