mirror of
https://github.com/TheAlgorithms/Python.git
synced 2024-12-01 00:41:09 +00:00
Merge remote-tracking branch 'upstream/master'
This commit is contained in:
commit
7c9a07c0a0
14
.travis.yml
14
.travis.yml
|
@ -1,14 +0,0 @@
|
||||||
language: python
|
|
||||||
python:
|
|
||||||
- "3.2"
|
|
||||||
- "3.3"
|
|
||||||
- "3.4"
|
|
||||||
- "3.5"
|
|
||||||
- "3.6"
|
|
||||||
- "3.6-dev"
|
|
||||||
|
|
||||||
install:
|
|
||||||
- if [ "$TRAVIS_PYTHON_VERSION" == "3.2" ]; then travis_retry pip install coverage==3.7.1; fi
|
|
||||||
- if [ "$TRAVIS_PYTHON_VERSION" != "3.2" ]; then travis_retry pip install coverage; fi
|
|
||||||
- "pip install pytest pytest-cov"
|
|
||||||
script: py.test --doctest-modules --cov ./
|
|
|
@ -8,7 +8,7 @@ class Node:
|
||||||
def __init__(self, label):
|
def __init__(self, label):
|
||||||
self.label = label
|
self.label = label
|
||||||
self.left = None
|
self.left = None
|
||||||
self.rigt = None
|
self.right = None
|
||||||
|
|
||||||
def getLabel(self):
|
def getLabel(self):
|
||||||
return self.label
|
return self.label
|
||||||
|
@ -23,10 +23,10 @@ class Node:
|
||||||
self.left = left
|
self.left = left
|
||||||
|
|
||||||
def getRight(self):
|
def getRight(self):
|
||||||
return self.rigt
|
return self.right
|
||||||
|
|
||||||
def setRight(self, right):
|
def setRight(self, right):
|
||||||
self.rigt = right
|
self.right = right
|
||||||
|
|
||||||
|
|
||||||
class BinarySearchTree:
|
class BinarySearchTree:
|
||||||
|
|
|
@ -23,7 +23,7 @@ class GRAPH:
|
||||||
v = queue[0]
|
v = queue[0]
|
||||||
for u in range(self.vertex):
|
for u in range(self.vertex):
|
||||||
if self.graph[v][u] == 1:
|
if self.graph[v][u] == 1:
|
||||||
if visited[u]== False:
|
if visited[u] is False:
|
||||||
visited[u] = True
|
visited[u] = True
|
||||||
queue.append(u)
|
queue.append(u)
|
||||||
print('%d visited' % (u +1))
|
print('%d visited' % (u +1))
|
||||||
|
@ -41,8 +41,8 @@ g.add_edge(4,8)
|
||||||
g.add_edge(5,9)
|
g.add_edge(5,9)
|
||||||
g.add_edge(6,10)
|
g.add_edge(6,10)
|
||||||
g.bfs(4)
|
g.bfs(4)
|
||||||
=======
|
|
||||||
print self.graph
|
print(self.graph)
|
||||||
|
|
||||||
def add_edge(self, i, j):
|
def add_edge(self, i, j):
|
||||||
self.graph[i][j]=1
|
self.graph[i][j]=1
|
||||||
|
@ -63,8 +63,10 @@ n=int(input("Enter the number of Nodes : "))
|
||||||
g = GRAPH(n)
|
g = GRAPH(n)
|
||||||
e = int(input("Enter the no of edges : "))
|
e = int(input("Enter the no of edges : "))
|
||||||
print("Enter the edges (u v)")
|
print("Enter the edges (u v)")
|
||||||
|
|
||||||
for i in range(0, e):
|
for i in range(0, e):
|
||||||
u ,v = map(int, raw_input().split())
|
u ,v = map(int, raw_input().split())
|
||||||
g.add_edge(u, v)
|
g.add_edge(u, v)
|
||||||
|
|
||||||
s = int(input("Enter the source node :"))
|
s = int(input("Enter the source node :"))
|
||||||
g.bfs(s)
|
g.bfs(s)
|
|
@ -1,61 +0,0 @@
|
||||||
# Author: OMKAR PATHAK
|
|
||||||
|
|
||||||
class Graph():
|
|
||||||
def __init__(self):
|
|
||||||
self.vertex = {}
|
|
||||||
|
|
||||||
# for printing the Graph vertexes
|
|
||||||
def printGraph(self):
|
|
||||||
for i in self.vertex.keys():
|
|
||||||
print(i,' -> ', ' -> '.join([str(j) for j in self.vertex[i]]))
|
|
||||||
|
|
||||||
# for adding the edge beween two vertexes
|
|
||||||
def addEdge(self, fromVertex, toVertex):
|
|
||||||
# check if vertex is already present,
|
|
||||||
if fromVertex in self.vertex.keys():
|
|
||||||
self.vertex[fromVertex].append(toVertex)
|
|
||||||
else:
|
|
||||||
# else make a new vertex
|
|
||||||
self.vertex[fromVertex] = [toVertex]
|
|
||||||
|
|
||||||
def BFS(self, startVertex):
|
|
||||||
# Take a list for stoting already visited vertexes
|
|
||||||
visited = [False] * len(self.vertex)
|
|
||||||
|
|
||||||
# create a list to store all the vertexes for BFS
|
|
||||||
queue = []
|
|
||||||
|
|
||||||
# mark the source node as visited and enqueue it
|
|
||||||
visited[startVertex] = True
|
|
||||||
queue.append(startVertex)
|
|
||||||
|
|
||||||
while queue:
|
|
||||||
startVertex = queue.pop(0)
|
|
||||||
print(startVertex, end = ' ')
|
|
||||||
|
|
||||||
# mark all adjacent nodes as visited and print them
|
|
||||||
for i in self.vertex[startVertex]:
|
|
||||||
if visited[i] == False:
|
|
||||||
queue.append(i)
|
|
||||||
visited[i] = True
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
g = Graph()
|
|
||||||
g.addEdge(0, 1)
|
|
||||||
g.addEdge(0, 2)
|
|
||||||
g.addEdge(1, 2)
|
|
||||||
g.addEdge(2, 0)
|
|
||||||
g.addEdge(2, 3)
|
|
||||||
g.addEdge(3, 3)
|
|
||||||
|
|
||||||
g.printGraph()
|
|
||||||
print('BFS:')
|
|
||||||
g.BFS(2)
|
|
||||||
|
|
||||||
# OUTPUT:
|
|
||||||
# 0 -> 1 -> 2
|
|
||||||
# 1 -> 2
|
|
||||||
# 2 -> 0 -> 3
|
|
||||||
# 3 -> 3
|
|
||||||
# BFS:
|
|
||||||
# 2 0 3 1
|
|
|
@ -1,61 +0,0 @@
|
||||||
# Author: OMKAR PATHAK
|
|
||||||
|
|
||||||
class Graph():
|
|
||||||
def __init__(self):
|
|
||||||
self.vertex = {}
|
|
||||||
|
|
||||||
# for printing the Graph vertexes
|
|
||||||
def printGraph(self):
|
|
||||||
print(self.vertex)
|
|
||||||
for i in self.vertex.keys():
|
|
||||||
print(i,' -> ', ' -> '.join([str(j) for j in self.vertex[i]]))
|
|
||||||
|
|
||||||
# for adding the edge beween two vertexes
|
|
||||||
def addEdge(self, fromVertex, toVertex):
|
|
||||||
# check if vertex is already present,
|
|
||||||
if fromVertex in self.vertex.keys():
|
|
||||||
self.vertex[fromVertex].append(toVertex)
|
|
||||||
else:
|
|
||||||
# else make a new vertex
|
|
||||||
self.vertex[fromVertex] = [toVertex]
|
|
||||||
|
|
||||||
def DFS(self):
|
|
||||||
# visited array for storing already visited nodes
|
|
||||||
visited = [False] * len(self.vertex)
|
|
||||||
|
|
||||||
# call the recursive helper function
|
|
||||||
for i in range(len(self.vertex)):
|
|
||||||
if visited[i] == False:
|
|
||||||
self.DFSRec(i, visited)
|
|
||||||
|
|
||||||
def DFSRec(self, startVertex, visited):
|
|
||||||
# mark start vertex as visited
|
|
||||||
visited[startVertex] = True
|
|
||||||
|
|
||||||
print(startVertex, end = ' ')
|
|
||||||
|
|
||||||
# Recur for all the vertexes that are adjacent to this node
|
|
||||||
for i in self.vertex.keys():
|
|
||||||
if visited[i] == False:
|
|
||||||
self.DFSRec(i, visited)
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
g = Graph()
|
|
||||||
g.addEdge(0, 1)
|
|
||||||
g.addEdge(0, 2)
|
|
||||||
g.addEdge(1, 2)
|
|
||||||
g.addEdge(2, 0)
|
|
||||||
g.addEdge(2, 3)
|
|
||||||
g.addEdge(3, 3)
|
|
||||||
|
|
||||||
g.printGraph()
|
|
||||||
print('DFS:')
|
|
||||||
g.DFS()
|
|
||||||
|
|
||||||
# OUTPUT:
|
|
||||||
# 0 -> 1 -> 2
|
|
||||||
# 1 -> 2
|
|
||||||
# 2 -> 0 -> 3
|
|
||||||
# 3 -> 3
|
|
||||||
# DFS:
|
|
||||||
# 0 1 2 3
|
|
139
machine_learning/decision_tree.py
Normal file
139
machine_learning/decision_tree.py
Normal file
|
@ -0,0 +1,139 @@
|
||||||
|
"""
|
||||||
|
Implementation of a basic regression decision tree.
|
||||||
|
Input data set: The input data set must be 1-dimensional with continuous labels.
|
||||||
|
Output: The decision tree maps a real number input to a real number output.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
class Decision_Tree:
|
||||||
|
def __init__(self, depth = 5, min_leaf_size = 5):
|
||||||
|
self.depth = depth
|
||||||
|
self.decision_boundary = 0
|
||||||
|
self.left = None
|
||||||
|
self.right = None
|
||||||
|
self.min_leaf_size = min_leaf_size
|
||||||
|
self.prediction = None
|
||||||
|
|
||||||
|
def mean_squared_error(self, labels, prediction):
|
||||||
|
"""
|
||||||
|
mean_squared_error:
|
||||||
|
@param labels: a one dimensional numpy array
|
||||||
|
@param prediction: a floating point value
|
||||||
|
return value: mean_squared_error calculates the error if prediction is used to estimate the labels
|
||||||
|
"""
|
||||||
|
if labels.ndim != 1:
|
||||||
|
print("Error: Input labels must be one dimensional")
|
||||||
|
|
||||||
|
return np.mean((labels - prediction) ** 2)
|
||||||
|
|
||||||
|
def train(self, X, y):
|
||||||
|
"""
|
||||||
|
train:
|
||||||
|
@param X: a one dimensional numpy array
|
||||||
|
@param y: a one dimensional numpy array.
|
||||||
|
The contents of y are the labels for the corresponding X values
|
||||||
|
|
||||||
|
train does not have a return value
|
||||||
|
"""
|
||||||
|
|
||||||
|
"""
|
||||||
|
this section is to check that the inputs conform to our dimensionality constraints
|
||||||
|
"""
|
||||||
|
if X.ndim != 1:
|
||||||
|
print("Error: Input data set must be one dimensional")
|
||||||
|
return
|
||||||
|
if len(X) != len(y):
|
||||||
|
print("Error: X and y have different lengths")
|
||||||
|
return
|
||||||
|
if y.ndim != 1:
|
||||||
|
print("Error: Data set labels must be one dimensional")
|
||||||
|
return
|
||||||
|
|
||||||
|
if len(X) < 2 * self.min_leaf_size:
|
||||||
|
self.prediction = np.mean(y)
|
||||||
|
return
|
||||||
|
|
||||||
|
if self.depth == 1:
|
||||||
|
self.prediction = np.mean(y)
|
||||||
|
return
|
||||||
|
|
||||||
|
best_split = 0
|
||||||
|
min_error = self.mean_squared_error(X,np.mean(y)) * 2
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
loop over all possible splits for the decision tree. find the best split.
|
||||||
|
if no split exists that is less than 2 * error for the entire array
|
||||||
|
then the data set is not split and the average for the entire array is used as the predictor
|
||||||
|
"""
|
||||||
|
for i in range(len(X)):
|
||||||
|
if len(X[:i]) < self.min_leaf_size:
|
||||||
|
continue
|
||||||
|
elif len(X[i:]) < self.min_leaf_size:
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
error_left = self.mean_squared_error(X[:i], np.mean(y[:i]))
|
||||||
|
error_right = self.mean_squared_error(X[i:], np.mean(y[i:]))
|
||||||
|
error = error_left + error_right
|
||||||
|
if error < min_error:
|
||||||
|
best_split = i
|
||||||
|
min_error = error
|
||||||
|
|
||||||
|
if best_split != 0:
|
||||||
|
left_X = X[:best_split]
|
||||||
|
left_y = y[:best_split]
|
||||||
|
right_X = X[best_split:]
|
||||||
|
right_y = y[best_split:]
|
||||||
|
|
||||||
|
self.decision_boundary = X[best_split]
|
||||||
|
self.left = Decision_Tree(depth = self.depth - 1, min_leaf_size = self.min_leaf_size)
|
||||||
|
self.right = Decision_Tree(depth = self.depth - 1, min_leaf_size = self.min_leaf_size)
|
||||||
|
self.left.train(left_X, left_y)
|
||||||
|
self.right.train(right_X, right_y)
|
||||||
|
else:
|
||||||
|
self.prediction = np.mean(y)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
def predict(self, x):
|
||||||
|
"""
|
||||||
|
predict:
|
||||||
|
@param x: a floating point value to predict the label of
|
||||||
|
the prediction function works by recursively calling the predict function
|
||||||
|
of the appropriate subtrees based on the tree's decision boundary
|
||||||
|
"""
|
||||||
|
if self.prediction is not None:
|
||||||
|
return self.prediction
|
||||||
|
elif self.left or self.right is not None:
|
||||||
|
if x >= self.decision_boundary:
|
||||||
|
return self.right.predict(x)
|
||||||
|
else:
|
||||||
|
return self.left.predict(x)
|
||||||
|
else:
|
||||||
|
print("Error: Decision tree not yet trained")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""
|
||||||
|
In this demonstration we're generating a sample data set from the sin function in numpy.
|
||||||
|
We then train a decision tree on the data set and use the decision tree to predict the
|
||||||
|
label of 10 different test values. Then the mean squared error over this test is displayed.
|
||||||
|
"""
|
||||||
|
X = np.arange(-1., 1., 0.005)
|
||||||
|
y = np.sin(X)
|
||||||
|
|
||||||
|
tree = Decision_Tree(depth = 10, min_leaf_size = 10)
|
||||||
|
tree.train(X,y)
|
||||||
|
|
||||||
|
test_cases = (np.random.rand(10) * 2) - 1
|
||||||
|
predictions = np.array([tree.predict(x) for x in test_cases])
|
||||||
|
avg_error = np.mean((predictions - test_cases) ** 2)
|
||||||
|
|
||||||
|
print("Test values: " + str(test_cases))
|
||||||
|
print("Predictions: " + str(predictions))
|
||||||
|
print("Average error: " + str(avg_error))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
|
@ -110,9 +110,9 @@ def binary_search_by_recursion(sorted_collection, item, left, right):
|
||||||
if sorted_collection[midpoint] == item:
|
if sorted_collection[midpoint] == item:
|
||||||
return midpoint
|
return midpoint
|
||||||
elif sorted_collection[midpoint] > item:
|
elif sorted_collection[midpoint] > item:
|
||||||
return binary_search_by_recursion(sorted_collection, item, left, right-1)
|
return binary_search_by_recursion(sorted_collection, item, left, midpoint-1)
|
||||||
else:
|
else:
|
||||||
return binary_search_by_recursion(sorted_collection, item, left+1, right)
|
return binary_search_by_recursion(sorted_collection, item, midpoint+1, right)
|
||||||
|
|
||||||
def __assert_sorted(collection):
|
def __assert_sorted(collection):
|
||||||
"""Check if collection is sorted, if not - raises :py:class:`ValueError`
|
"""Check if collection is sorted, if not - raises :py:class:`ValueError`
|
||||||
|
|
47
searches/quick_select.py
Normal file
47
searches/quick_select.py
Normal file
|
@ -0,0 +1,47 @@
|
||||||
|
import collections
|
||||||
|
import sys
|
||||||
|
import random
|
||||||
|
import time
|
||||||
|
import math
|
||||||
|
"""
|
||||||
|
A python implementation of the quick select algorithm, which is efficient for calculating the value that would appear in the index of a list if it would be sorted, even if it is not already sorted
|
||||||
|
https://en.wikipedia.org/wiki/Quickselect
|
||||||
|
"""
|
||||||
|
def _partition(data, pivot):
|
||||||
|
"""
|
||||||
|
Three way partition the data into smaller, equal and greater lists,
|
||||||
|
in relationship to the pivot
|
||||||
|
:param data: The data to be sorted (a list)
|
||||||
|
:param pivot: The value to partition the data on
|
||||||
|
:return: Three list: smaller, equal and greater
|
||||||
|
"""
|
||||||
|
less, equal, greater = [], [], []
|
||||||
|
for element in data:
|
||||||
|
if element.address < pivot.address:
|
||||||
|
less.append(element)
|
||||||
|
elif element.address > pivot.address:
|
||||||
|
greater.append(element)
|
||||||
|
else:
|
||||||
|
equal.append(element)
|
||||||
|
return less, equal, greater
|
||||||
|
|
||||||
|
def quickSelect(list, k):
|
||||||
|
#k = len(list) // 2 when trying to find the median (index that value would be when list is sorted)
|
||||||
|
smaller = []
|
||||||
|
larger = []
|
||||||
|
pivot = random.randint(0, len(list) - 1)
|
||||||
|
pivot = list[pivot]
|
||||||
|
count = 0
|
||||||
|
smaller, equal, larger =_partition(list, pivot)
|
||||||
|
count = len(equal)
|
||||||
|
m = len(smaller)
|
||||||
|
|
||||||
|
#k is the pivot
|
||||||
|
if m <= k < m + count:
|
||||||
|
return pivot
|
||||||
|
# must be in smaller
|
||||||
|
elif m > k:
|
||||||
|
return quickSelect(smaller, k)
|
||||||
|
#must be in larger
|
||||||
|
else:
|
||||||
|
return quickSelect(larger, k - (m + count))
|
Loading…
Reference in New Issue
Block a user