From 42c5863f4e35c382c67660e5626fb3e19a1afa41 Mon Sep 17 00:00:00 2001 From: Daniel K Date: Thu, 28 Sep 2017 10:40:22 -0400 Subject: [PATCH 01/11] Merged Graphs --- {Graphs => data_structures/Graph}/Breadth_First_Search.py | 0 {Graphs => data_structures/Graph}/Deep_First_Search.py | 0 {Graphs => data_structures/Graph}/Graph_list.py | 0 {Graphs => data_structures/Graph}/Graph_matrix.py | 0 4 files changed, 0 insertions(+), 0 deletions(-) rename {Graphs => data_structures/Graph}/Breadth_First_Search.py (100%) rename {Graphs => data_structures/Graph}/Deep_First_Search.py (100%) rename {Graphs => data_structures/Graph}/Graph_list.py (100%) rename {Graphs => data_structures/Graph}/Graph_matrix.py (100%) diff --git a/Graphs/Breadth_First_Search.py b/data_structures/Graph/Breadth_First_Search.py similarity index 100% rename from Graphs/Breadth_First_Search.py rename to data_structures/Graph/Breadth_First_Search.py diff --git a/Graphs/Deep_First_Search.py b/data_structures/Graph/Deep_First_Search.py similarity index 100% rename from Graphs/Deep_First_Search.py rename to data_structures/Graph/Deep_First_Search.py diff --git a/Graphs/Graph_list.py b/data_structures/Graph/Graph_list.py similarity index 100% rename from Graphs/Graph_list.py rename to data_structures/Graph/Graph_list.py diff --git a/Graphs/Graph_matrix.py b/data_structures/Graph/Graph_matrix.py similarity index 100% rename from Graphs/Graph_matrix.py rename to data_structures/Graph/Graph_matrix.py From 8bae14ba1cd3d6bcd5764dc4561088abe5c4b747 Mon Sep 17 00:00:00 2001 From: Anup Kumar Panwar <1anuppanwar@gmail.com> Date: Fri, 29 Sep 2017 11:22:32 +0530 Subject: [PATCH 02/11] Update .travis.yml --- .travis.yml | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/.travis.yml b/.travis.yml index fe3ec15a6..1e0ad55bb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,14 +1,19 @@ language: python python: + - "2.6" + - "2.7" - "3.2" - "3.3" - "3.4" - "3.5" + - "3.5-dev" # 3.5 development branch - "3.6" - - "3.6-dev" - -install: - - if [ "$TRAVIS_PYTHON_VERSION" == "3.2" ]; then travis_retry pip install coverage==3.7.1; fi - - if [ "$TRAVIS_PYTHON_VERSION" != "3.2" ]; then travis_retry pip install coverage; fi - - "pip install pytest pytest-cov" -script: py.test --doctest-modules --cov ./ + - "3.6-dev" # 3.6 development branch + - "3.7-dev" # 3.7 development branch + - "nightly" + +install: + - pip install -r requirements.txt + +script: + - py.test From aa8485b4dff800b7cc5d65f8a84fb7bc71f9e0a1 Mon Sep 17 00:00:00 2001 From: Anup Kumar Panwar <1anuppanwar@gmail.com> Date: Fri, 29 Sep 2017 11:28:07 +0530 Subject: [PATCH 03/11] Delete .travis.yml --- .travis.yml | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 1e0ad55bb..000000000 --- a/.travis.yml +++ /dev/null @@ -1,19 +0,0 @@ -language: python -python: - - "2.6" - - "2.7" - - "3.2" - - "3.3" - - "3.4" - - "3.5" - - "3.5-dev" # 3.5 development branch - - "3.6" - - "3.6-dev" # 3.6 development branch - - "3.7-dev" # 3.7 development branch - - "nightly" - -install: - - pip install -r requirements.txt - -script: - - py.test From 2a916b010b9ccb2aee2d8f6a7cfffd45b6a3cbc9 Mon Sep 17 00:00:00 2001 From: b1o0d4x3 Date: Fri, 6 Oct 2017 15:24:56 +0530 Subject: [PATCH 04/11] Delete P01_BreadthFirstSearch.py --- .../Graph/P01_BreadthFirstSearch.py | 61 ------------------- 1 file changed, 61 deletions(-) delete mode 100644 data_structures/Graph/P01_BreadthFirstSearch.py diff --git a/data_structures/Graph/P01_BreadthFirstSearch.py b/data_structures/Graph/P01_BreadthFirstSearch.py deleted file mode 100644 index 16b1b2007..000000000 --- a/data_structures/Graph/P01_BreadthFirstSearch.py +++ /dev/null @@ -1,61 +0,0 @@ -# Author: OMKAR PATHAK - -class Graph(): - def __init__(self): - self.vertex = {} - - # for printing the Graph vertexes - def printGraph(self): - for i in self.vertex.keys(): - print(i,' -> ', ' -> '.join([str(j) for j in self.vertex[i]])) - - # for adding the edge beween two vertexes - def addEdge(self, fromVertex, toVertex): - # check if vertex is already present, - if fromVertex in self.vertex.keys(): - self.vertex[fromVertex].append(toVertex) - else: - # else make a new vertex - self.vertex[fromVertex] = [toVertex] - - def BFS(self, startVertex): - # Take a list for stoting already visited vertexes - visited = [False] * len(self.vertex) - - # create a list to store all the vertexes for BFS - queue = [] - - # mark the source node as visited and enqueue it - visited[startVertex] = True - queue.append(startVertex) - - while queue: - startVertex = queue.pop(0) - print(startVertex, end = ' ') - - # mark all adjacent nodes as visited and print them - for i in self.vertex[startVertex]: - if visited[i] == False: - queue.append(i) - visited[i] = True - -if __name__ == '__main__': - g = Graph() - g.addEdge(0, 1) - g.addEdge(0, 2) - g.addEdge(1, 2) - g.addEdge(2, 0) - g.addEdge(2, 3) - g.addEdge(3, 3) - - g.printGraph() - print('BFS:') - g.BFS(2) - - # OUTPUT: - # 0  ->  1 -> 2 - # 1  ->  2 - # 2  ->  0 -> 3 - # 3  ->  3 - # BFS: - # 2 0 3 1 From d33044eb058b0fd5e04aef5620996cbe110589bf Mon Sep 17 00:00:00 2001 From: b1o0d4x3 Date: Fri, 6 Oct 2017 15:25:25 +0530 Subject: [PATCH 05/11] Delete P02_DepthFirstSearch.py --- data_structures/Graph/P02_DepthFirstSearch.py | 61 ------------------- 1 file changed, 61 deletions(-) delete mode 100644 data_structures/Graph/P02_DepthFirstSearch.py diff --git a/data_structures/Graph/P02_DepthFirstSearch.py b/data_structures/Graph/P02_DepthFirstSearch.py deleted file mode 100644 index 94ef3cb86..000000000 --- a/data_structures/Graph/P02_DepthFirstSearch.py +++ /dev/null @@ -1,61 +0,0 @@ -# Author: OMKAR PATHAK - -class Graph(): - def __init__(self): - self.vertex = {} - - # for printing the Graph vertexes - def printGraph(self): - print(self.vertex) - for i in self.vertex.keys(): - print(i,' -> ', ' -> '.join([str(j) for j in self.vertex[i]])) - - # for adding the edge beween two vertexes - def addEdge(self, fromVertex, toVertex): - # check if vertex is already present, - if fromVertex in self.vertex.keys(): - self.vertex[fromVertex].append(toVertex) - else: - # else make a new vertex - self.vertex[fromVertex] = [toVertex] - - def DFS(self): - # visited array for storing already visited nodes - visited = [False] * len(self.vertex) - - # call the recursive helper function - for i in range(len(self.vertex)): - if visited[i] == False: - self.DFSRec(i, visited) - - def DFSRec(self, startVertex, visited): - # mark start vertex as visited - visited[startVertex] = True - - print(startVertex, end = ' ') - - # Recur for all the vertexes that are adjacent to this node - for i in self.vertex.keys(): - if visited[i] == False: - self.DFSRec(i, visited) - -if __name__ == '__main__': - g = Graph() - g.addEdge(0, 1) - g.addEdge(0, 2) - g.addEdge(1, 2) - g.addEdge(2, 0) - g.addEdge(2, 3) - g.addEdge(3, 3) - - g.printGraph() - print('DFS:') - g.DFS() - - # OUTPUT: - # 0  ->  1 -> 2 - # 1  ->  2 - # 2  ->  0 -> 3 - # 3  ->  3 - # DFS: - # 0 1 2 3 From 8fb1eb7bdf390b9f711bba02f20185f180447eca Mon Sep 17 00:00:00 2001 From: Nathan Berger Date: Mon, 9 Oct 2017 12:36:33 -0500 Subject: [PATCH 06/11] Implementation of a regression tree in python I've implemented a basic decision tree in python as an example of how they work. Although the class I've created only works on one dimensional data sets, the reader should be able to generalize it to higher dimensions should they need to. --- machine_learning/decision_tree.py | 136 ++++++++++++++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 machine_learning/decision_tree.py diff --git a/machine_learning/decision_tree.py b/machine_learning/decision_tree.py new file mode 100644 index 000000000..dfc2e1676 --- /dev/null +++ b/machine_learning/decision_tree.py @@ -0,0 +1,136 @@ +""" +Implementation of a basic regression decision tree. +Input data set: The input data set must be 1-dimensional with continuous labels. +Output: The decision tree maps a real number input to a real number output. +""" + +import numpy as np + +class Decision_Tree: + def __init__(self, depth = 5, min_leaf_size = 5): + self.depth = depth + self.decision_boundary = 0 + self.left = None + self.right = None + self.min_leaf_size = min_leaf_size + self.prediction = None + + def mean_squared_error(self, labels, prediction): + """ + mean_squared_error: + @param labels: a one dimensional numpy array + @param prediction: a floating point value + return value: mean_squared_error calculates the error if prediction is used to estimate the labels + """ + if labels.ndim != 1: + print("Error: Input labels must be one dimensional") + + return np.mean((labels - prediction) ** 2) + + def train(self, X, y): + """ + train: + @param X: a one dimensional numpy array + @param y: a one dimensional numpy array. + The contents of y are the labels for the corresponding X values + + train does not have a return value + """ + + """ + this section is to check that the inputs conform to our dimensionality constraints + """ + if X.ndim != 1: + print("Error: Input data set must be one dimensional") + return + if len(X) != len(y): + print("Error: X and y have different lengths") + return + if y.ndim != 1: + print("Error: Data set labels must be one dimensional") + + if len(X) < 2 * self.min_leaf_size: + self.prediction = np.mean(y) + + if self.depth == 1: + self.prediction = np.mean(y) + + best_split = 0 + min_error = self.mean_squared_error(X,np.mean(y)) * 2 + + + """ + loop over all possible splits for the decision tree. find the best split. + if no split exists that is less than 2 * error for the entire array + then the data set is not split and the average for the entire array is used as the predictor + """ + for i in range(len(X)): + if len(X[:i]) < self.min_leaf_size: + continue + elif len(X[i:]) < self.min_leaf_size: + continue + else: + error_left = self.mean_squared_error(X[:i], np.mean(y[:i])) + error_right = self.mean_squared_error(X[i:], np.mean(y[i:])) + error = error_left + error_right + if error < min_error: + best_split = i + min_error = error + + if best_split != 0: + left_X = X[:best_split] + left_y = y[:best_split] + right_X = X[best_split:] + right_y = y[best_split:] + + self.decision_boundary = X[best_split] + self.left = Decision_Tree(depth = self.depth - 1, min_leaf_size = self.min_leaf_size) + self.right = Decision_Tree(depth = self.depth - 1, min_leaf_size = self.min_leaf_size) + self.left.train(left_X, left_y) + self.right.train(right_X, right_y) + else: + self.prediction = np.mean(y) + + return + + def predict(self, x): + """ + predict: + @param x: a floating point value to predict the label of + the prediction function works by recursively calling the predict function + of the appropriate subtrees based on the tree's decision boundary + """ + if self.prediction is not None: + return self.prediction + elif self.left or self.right is not None: + if x >= self.decision_boundary: + return self.right.predict(x) + else: + return self.left.predict(x) + else: + print("Error: Decision tree not yet trained") + return None + +def main(): + """ + In this demonstration we're generating a sample data set from the sin function in numpy. + We then train a decision tree on the data set and use the decision tree to predict the + label of 10 different test values. Then the mean squared error over this test is displayed. + """ + X = np.arange(-1., 1., 0.005) + y = np.sin(X) + + tree = Decision_Tree(depth = 10, min_leaf_size = 10) + tree.train(X,y) + + test_cases = (np.random.rand(10) * 2) - 1 + predictions = np.array([tree.predict(x) for x in test_cases]) + avg_error = np.mean((predictions - test_cases) ** 2) + + print("Test values: " + str(test_cases)) + print("Predictions: " + str(predictions)) + print("Average error: " + str(avg_error)) + + +if __name__ == '__main__': + main() \ No newline at end of file From 37967bd0cf9bb8b70126d2b9d763869f327021de Mon Sep 17 00:00:00 2001 From: Nathan Berger Date: Mon, 9 Oct 2017 12:42:51 -0500 Subject: [PATCH 07/11] Fixed case where function didn't return where it should I added these return statements so that invalid inputs or valid end cases would no longer continue running through the rest of the function. --- machine_learning/decision_tree.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/machine_learning/decision_tree.py b/machine_learning/decision_tree.py index dfc2e1676..51f600cac 100644 --- a/machine_learning/decision_tree.py +++ b/machine_learning/decision_tree.py @@ -48,12 +48,15 @@ class Decision_Tree: return if y.ndim != 1: print("Error: Data set labels must be one dimensional") + return if len(X) < 2 * self.min_leaf_size: self.prediction = np.mean(y) + return if self.depth == 1: self.prediction = np.mean(y) + return best_split = 0 min_error = self.mean_squared_error(X,np.mean(y)) * 2 From cb3ff4a8f8fbcab28fc3bb08f32fcd5444886f8a Mon Sep 17 00:00:00 2001 From: fickleEfrit Date: Mon, 9 Oct 2017 17:26:27 -0400 Subject: [PATCH 08/11] Create quick_select.py --- searches/quick_select.py | 47 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 searches/quick_select.py diff --git a/searches/quick_select.py b/searches/quick_select.py new file mode 100644 index 000000000..e5e2ce99c --- /dev/null +++ b/searches/quick_select.py @@ -0,0 +1,47 @@ +import collections +import sys +import random +import time +import math +""" +A python implementation of the quick select algorithm, which is efficient for calculating the value that would appear in the index of a list if it would be sorted, even if it is not already sorted +https://en.wikipedia.org/wiki/Quickselect +""" +def _partition(data, pivot): + """ + Three way partition the data into smaller, equal and greater lists, + in relationship to the pivot + :param data: The data to be sorted (a list) + :param pivot: The value to partition the data on + :return: Three list: smaller, equal and greater + """ + less, equal, greater = [], [], [] + for element in data: + if element.address < pivot.address: + less.append(element) + elif element.address > pivot.address: + greater.append(element) + else: + equal.append(element) + return less, equal, greater + + def quickSelect(list, k): + #k = len(list) // 2 when trying to find the median (index that value would be when list is sorted) + smaller = [] + larger = [] + pivot = random.randint(0, len(list) - 1) + pivot = list[pivot] + count = 0 + smaller, equal, larger =_partition(list, pivot) + count = len(equal) + m = len(smaller) + + #k is the pivot + if m <= k < m + count: + return pivot + # must be in smaller + elif m > k: + return quickSelect(smaller, k) + #must be in larger + else: + return quickSelect(larger, k - (m + count)) From dc5e86b7013cb02ceb0a11f32afb4a2b8f7d3ae1 Mon Sep 17 00:00:00 2001 From: Alvin Nguyen Date: Mon, 9 Oct 2017 17:00:37 -0700 Subject: [PATCH 09/11] Fixed compilation errors, fixes for readability/convention, changed double equals to boolean equality operator 'is' --- data_structures/Graph/Breadth_First_Search.py | 44 ++++++++++--------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/data_structures/Graph/Breadth_First_Search.py b/data_structures/Graph/Breadth_First_Search.py index 1a3fdfd4d..9cb234856 100644 --- a/data_structures/Graph/Breadth_First_Search.py +++ b/data_structures/Graph/Breadth_First_Search.py @@ -1,9 +1,9 @@ class GRAPH: """docstring for GRAPH""" def __init__(self, nodes): - self.nodes=nodes - self.graph=[[0]*nodes for i in range (nodes)] - self.visited=[0]*nodes + self.nodes = nodes + self.graph = [[0]*nodes for i in range (nodes)] + self.visited = [0]*nodes def show(self): @@ -23,7 +23,7 @@ class GRAPH: v = queue[0] for u in range(self.vertex): if self.graph[v][u] == 1: - if visited[u]== False: + if visited[u] is False: visited[u] = True queue.append(u) print('%d visited' % (u +1)) @@ -41,30 +41,32 @@ g.add_edge(4,8) g.add_edge(5,9) g.add_edge(6,10) g.bfs(4) -======= - print self.graph + +print(self.graph) def add_edge(self, i, j): self.graph[i][j]=1 self.graph[j][i]=1 - def bfs(self,s): - queue=[s] - self.visited[s]=1 - while len(queue)!=0: - x=queue.pop(0) + def bfs(self, s): + queue = [s] + self.visited[s] = 1 + while len(queue)!= 0: + x = queue.pop(0) print(x) - for i in range(0,self.nodes): - if self.graph[x][i]==1 and self.visited[i]==0: + for i in range(0, self.nodes): + if self.graph[x][i] == 1 and self.visited[i] == 0: queue.append(i) - self.visited[i]=1 + self.visited[i] = 1 -n=int(input("Enter the number of Nodes : ")) -g=GRAPH(n) -e=int(input("Enter the no of edges : ")) +n = int(input("Enter the number of Nodes : ")) +g = GRAPH(n) +e = int(input("Enter the no of edges : ")) print("Enter the edges (u v)") -for i in range(0,e): - u,v=map(int, raw_input().split()) - g.add_edge(u,v) -s=int(input("Enter the source node :")) + +for i in range(0, e): + u ,v = map(int, raw_input().split()) + g.add_edge(u, v) + +s = int(input("Enter the source node :")) g.bfs(s) From ab058ab0b51486f892a1b59a22631eff5083c241 Mon Sep 17 00:00:00 2001 From: Alvin Nguyen Date: Mon, 9 Oct 2017 17:05:14 -0700 Subject: [PATCH 10/11] changed rigt->right, a typo fix. --- data_structures/Binary Tree/binary_seach_tree.py | 6 +++--- data_structures/Graph/Breadth_First_Search.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/data_structures/Binary Tree/binary_seach_tree.py b/data_structures/Binary Tree/binary_seach_tree.py index 1dac948ae..0b1726534 100644 --- a/data_structures/Binary Tree/binary_seach_tree.py +++ b/data_structures/Binary Tree/binary_seach_tree.py @@ -8,7 +8,7 @@ class Node: def __init__(self, label): self.label = label self.left = None - self.rigt = None + self.right = None def getLabel(self): return self.label @@ -23,10 +23,10 @@ class Node: self.left = left def getRight(self): - return self.rigt + return self.right def setRight(self, right): - self.rigt = right + self.right = right class BinarySearchTree: diff --git a/data_structures/Graph/Breadth_First_Search.py b/data_structures/Graph/Breadth_First_Search.py index 9cb234856..92a6e819b 100644 --- a/data_structures/Graph/Breadth_First_Search.py +++ b/data_structures/Graph/Breadth_First_Search.py @@ -67,6 +67,6 @@ print("Enter the edges (u v)") for i in range(0, e): u ,v = map(int, raw_input().split()) g.add_edge(u, v) - + s = int(input("Enter the source node :")) g.bfs(s) From 8f71b309953fec9da78ef64ea8cc2e7314ec1b6f Mon Sep 17 00:00:00 2001 From: TaylorL19 Date: Tue, 10 Oct 2017 11:57:16 -0500 Subject: [PATCH 11/11] Fixed binary search to correctly recurse to left half and right half --- searches/binary_search.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/searches/binary_search.py b/searches/binary_search.py index 13b54f498..8dc3008da 100644 --- a/searches/binary_search.py +++ b/searches/binary_search.py @@ -110,9 +110,9 @@ def binary_search_by_recursion(sorted_collection, item, left, right): if sorted_collection[midpoint] == item: return midpoint elif sorted_collection[midpoint] > item: - return binary_search_by_recursion(sorted_collection, item, left, right-1) + return binary_search_by_recursion(sorted_collection, item, left, midpoint-1) else: - return binary_search_by_recursion(sorted_collection, item, left+1, right) + return binary_search_by_recursion(sorted_collection, item, midpoint+1, right) def __assert_sorted(collection): """Check if collection is sorted, if not - raises :py:class:`ValueError`