Merge remote-tracking branch 'upstream/master'

2025-04-17 11:17:36 +00:00 · 2017-10-11 14:00:41 +08:00 · 2017-10-11 14:00:41 +08:00 · 7c9a07c0a0
commit 7c9a07c0a0
parent e4d537a75c f9156cfb71
11 changed files with 214 additions and 162 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -1,14 +0,0 @@
 language: python
 python:
  - "3.2"
  - "3.3"
  - "3.4"
  - "3.5"
  - "3.6"
  - "3.6-dev"
 install: 
  - if [ "$TRAVIS_PYTHON_VERSION" == "3.2" ]; then travis_retry pip install coverage==3.7.1; fi
  - if [ "$TRAVIS_PYTHON_VERSION" != "3.2" ]; then travis_retry pip install coverage; fi
  - "pip install pytest pytest-cov"
 script: py.test --doctest-modules --cov ./
--- a/Tree/binary_seach_tree.py
+++ b/Tree/binary_seach_tree.py
@ -8,7 +8,7 @@ class Node:
    def __init__(self, label):
        self.label = label
        self.left = None
-        self.rigt = None
+        self.right = None
    def getLabel(self):
        return self.label
@ -23,10 +23,10 @@ class Node:
        self.left = left
    def getRight(self):
-        return self.rigt
+        return self.right
    def setRight(self, right):
-        self.rigt = right
+        self.right = right
 class BinarySearchTree:
--- a/data_structures/Graph/Breadth_First_Search.py
+++ b/data_structures/Graph/Breadth_First_Search.py
@ -23,7 +23,7 @@ class GRAPH:
            v = queue[0]
            for u in range(self.vertex):
                if self.graph[v][u] == 1:
-                    if visited[u]== False:
+                    if visited[u] is False:
                        visited[u] = True
                        queue.append(u)
                        print('%d visited' % (u +1))
@ -41,8 +41,8 @@ g.add_edge(4,8)
 g.add_edge(5,9)
 g.add_edge(6,10)
 g.bfs(4)
-=======
+
-        print self.graph
+print(self.graph)
    def add_edge(self, i, j):
        self.graph[i][j]=1
@ -63,8 +63,10 @@ n=int(input("Enter the number of Nodes : "))
 g = GRAPH(n)
 e = int(input("Enter the no of edges : "))
 print("Enter the edges (u v)")
 for i in range(0, e):
    u ,v = map(int, raw_input().split())
    g.add_edge(u, v)
 s = int(input("Enter the source node :"))
 g.bfs(s)
--- a/data_structures/Graph/Deep_First_Search.py
+++ b/data_structures/Graph/Deep_First_Search.py
--- a/data_structures/Graph/Graph_list.py
+++ b/data_structures/Graph/Graph_list.py
--- a/data_structures/Graph/Graph_matrix.py
+++ b/data_structures/Graph/Graph_matrix.py
--- a/data_structures/Graph/P01_BreadthFirstSearch.py
+++ b/data_structures/Graph/P01_BreadthFirstSearch.py
@ -1,61 +0,0 @@
 # Author: OMKAR PATHAK
 class Graph():
    def __init__(self):
        self.vertex = {}
    # for printing the Graph vertexes
    def printGraph(self):
        for i in self.vertex.keys():
            print(i,' -> ', ' -> '.join([str(j) for j in self.vertex[i]]))
    # for adding the edge beween two vertexes
    def addEdge(self, fromVertex, toVertex):
        # check if vertex is already present,
        if fromVertex in self.vertex.keys():
            self.vertex[fromVertex].append(toVertex)
        else:
            # else make a new vertex
            self.vertex[fromVertex] = [toVertex]
    def BFS(self, startVertex):
        # Take a list for stoting already visited vertexes
        visited = [False] * len(self.vertex)
        # create a list to store all the vertexes for BFS
        queue = []
        # mark the source node as visited and enqueue it
        visited[startVertex] = True
        queue.append(startVertex)
        while queue:
            startVertex = queue.pop(0)
            print(startVertex, end = ' ')
            # mark all adjacent nodes as visited and print them
            for i in self.vertex[startVertex]:
                if visited[i] == False:
                    queue.append(i)
                    visited[i] = True
 if __name__ == '__main__':
    g = Graph()
    g.addEdge(0, 1)
    g.addEdge(0, 2)
    g.addEdge(1, 2)
    g.addEdge(2, 0)
    g.addEdge(2, 3)
    g.addEdge(3, 3)
    g.printGraph()
    print('BFS:')
    g.BFS(2)
    # OUTPUT:
    # 0  ->  1 -> 2
    # 1  ->  2
    # 2  ->  0 -> 3
    # 3  ->  3
    # BFS:
    # 2 0 3 1
--- a/data_structures/Graph/P02_DepthFirstSearch.py
+++ b/data_structures/Graph/P02_DepthFirstSearch.py
@ -1,61 +0,0 @@
 # Author: OMKAR PATHAK
 class Graph():
    def __init__(self):
        self.vertex = {}
    # for printing the Graph vertexes
    def printGraph(self):
        print(self.vertex)
        for i in self.vertex.keys():
            print(i,' -> ', ' -> '.join([str(j) for j in self.vertex[i]]))
    # for adding the edge beween two vertexes
    def addEdge(self, fromVertex, toVertex):
        # check if vertex is already present,
        if fromVertex in self.vertex.keys():
            self.vertex[fromVertex].append(toVertex)
        else:
            # else make a new vertex
            self.vertex[fromVertex] = [toVertex]
    def DFS(self):
        # visited array for storing already visited nodes
        visited = [False] * len(self.vertex)
        # call the recursive helper function
        for i in range(len(self.vertex)):
            if visited[i] == False:
                self.DFSRec(i, visited)
    def DFSRec(self, startVertex, visited):
        # mark start vertex as visited
        visited[startVertex] = True
        print(startVertex, end = ' ')
        # Recur for all the vertexes that are adjacent to this node
        for i in self.vertex.keys():
            if visited[i] == False:
                self.DFSRec(i, visited)
 if __name__ == '__main__':
    g = Graph()
    g.addEdge(0, 1)
    g.addEdge(0, 2)
    g.addEdge(1, 2)
    g.addEdge(2, 0)
    g.addEdge(2, 3)
    g.addEdge(3, 3)
    g.printGraph()
    print('DFS:')
    g.DFS()
    # OUTPUT:
    # 0  ->  1 -> 2
    # 1  ->  2
    # 2  ->  0 -> 3
    # 3  ->  3
    # DFS:
    # 0 1 2 3
--- a/machine_learning/decision_tree.py
+++ b/machine_learning/decision_tree.py
@ -0,0 +1,139 @@
 """
 Implementation of a basic regression decision tree.
 Input data set: The input data set must be 1-dimensional with continuous labels.
 Output: The decision tree maps a real number input to a real number output. 
 """
 import numpy as np
 class Decision_Tree:
    def __init__(self, depth = 5, min_leaf_size = 5):
        self.depth = depth
        self.decision_boundary = 0
        self.left = None
        self.right = None
        self.min_leaf_size = min_leaf_size
        self.prediction = None
    def mean_squared_error(self, labels, prediction):
        """
        mean_squared_error:
        @param labels: a one dimensional numpy array 
        @param prediction: a floating point value
        return value: mean_squared_error calculates the error if prediction is used to estimate the labels
        """
        if labels.ndim != 1:
            print("Error: Input labels must be one dimensional")
        return np.mean((labels - prediction) ** 2)
    def train(self, X, y):
        """
        train:
        @param X: a one dimensional numpy array
        @param y: a one dimensional numpy array. 
        The contents of y are the labels for the corresponding X values
        train does not have a return value
        """
        """
        this section is to check that the inputs conform to our dimensionality constraints
        """
        if X.ndim != 1:
            print("Error: Input data set must be one dimensional")
            return
        if len(X) != len(y):
            print("Error: X and y have different lengths")
            return
        if y.ndim != 1:
            print("Error: Data set labels must be one dimensional")
            return
        if len(X) < 2 * self.min_leaf_size:
            self.prediction = np.mean(y)
            return
        if self.depth == 1:
            self.prediction = np.mean(y)
            return
        best_split = 0
        min_error = self.mean_squared_error(X,np.mean(y)) * 2
        """
        loop over all possible splits for the decision tree. find the best split.
        if no split exists that is less than 2 * error for the entire array
        then the data set is not split and the average for the entire array is used as the predictor
        """
        for i in range(len(X)):
            if len(X[:i]) < self.min_leaf_size:
                continue
            elif len(X[i:]) < self.min_leaf_size:
                continue
            else:
                error_left = self.mean_squared_error(X[:i], np.mean(y[:i]))
                error_right = self.mean_squared_error(X[i:], np.mean(y[i:]))
                error = error_left + error_right
                if error < min_error:
                    best_split = i
                    min_error = error
        if best_split != 0:
            left_X = X[:best_split]
            left_y = y[:best_split]
            right_X = X[best_split:]
            right_y = y[best_split:]
            self.decision_boundary = X[best_split]
            self.left = Decision_Tree(depth = self.depth - 1, min_leaf_size = self.min_leaf_size)
            self.right = Decision_Tree(depth = self.depth - 1, min_leaf_size = self.min_leaf_size)
            self.left.train(left_X, left_y)
            self.right.train(right_X, right_y)
        else:
            self.prediction = np.mean(y)
        return
    def predict(self, x):
        """
        predict:
        @param x: a floating point value to predict the label of
        the prediction function works by recursively calling the predict function
        of the appropriate subtrees based on the tree's decision boundary
        """
        if self.prediction is not None:
            return self.prediction
        elif self.left or self.right is not None:
            if x >= self.decision_boundary:
                return self.right.predict(x)
            else:
                return self.left.predict(x)
        else:
            print("Error: Decision tree not yet trained")
            return None
 def main():
    """
    In this demonstration we're generating a sample data set from the sin function in numpy.
    We then train a decision tree on the data set and use the decision tree to predict the
    label of 10 different test values. Then the mean squared error over this test is displayed.
    """
    X = np.arange(-1., 1., 0.005)
    y = np.sin(X)
    tree = Decision_Tree(depth = 10, min_leaf_size = 10)
    tree.train(X,y)
    test_cases = (np.random.rand(10) * 2) - 1
    predictions = np.array([tree.predict(x) for x in test_cases])
    avg_error = np.mean((predictions - test_cases) ** 2)
    print("Test values: " + str(test_cases))
    print("Predictions: " + str(predictions))
    print("Average error: " + str(avg_error))
 if __name__ == '__main__':
    main()
--- a/searches/binary_search.py
+++ b/searches/binary_search.py
@ -110,9 +110,9 @@ def binary_search_by_recursion(sorted_collection, item, left, right):
    if sorted_collection[midpoint] == item:
        return midpoint
    elif sorted_collection[midpoint] > item:
-        return binary_search_by_recursion(sorted_collection, item, left, right-1)
+        return binary_search_by_recursion(sorted_collection, item, left, midpoint-1)
    else:
-        return binary_search_by_recursion(sorted_collection, item, left+1, right)
+        return binary_search_by_recursion(sorted_collection, item, midpoint+1, right)
 def __assert_sorted(collection):
    """Check if collection is sorted, if not - raises :py:class:`ValueError`
--- a/searches/quick_select.py
+++ b/searches/quick_select.py
@ -0,0 +1,47 @@
 import collections
 import sys
 import random
 import time
 import math
 """
 A python implementation of the quick select algorithm, which is efficient for calculating the value that would appear in the index of a list if it would be sorted, even if it is not already sorted
 https://en.wikipedia.org/wiki/Quickselect
 """
 def _partition(data, pivot):
    """
    Three way partition the data into smaller, equal and greater lists,
    in relationship to the pivot
    :param data: The data to be sorted (a list)
    :param pivot: The value to partition the data on
    :return: Three list: smaller, equal and greater
    """
    less, equal, greater = [], [], []
    for element in data:
        if element.address < pivot.address:
            less.append(element)
        elif element.address > pivot.address:
            greater.append(element)
        else:
            equal.append(element)
    return less, equal, greater
    def quickSelect(list, k):
    #k = len(list) // 2 when trying to find the median (index that value would be when list is sorted)
      smaller = []
      larger = []
      pivot = random.randint(0, len(list) - 1)
      pivot = list[pivot]
      count = 0
      smaller, equal, larger =_partition(list, pivot)
      count = len(equal)
      m = len(smaller)
      #k is the pivot
      if m <= k < m + count:
        return pivot
    # must be in smaller
      elif m > k:
        return quickSelect(smaller, k)
    #must be in larger
      else:
        return quickSelect(larger, k - (m + count))