Merge branch 'master' into master

This commit is contained in:
Francisco Matias 2017-08-28 14:32:54 -03:00 committed by GitHub
commit 6ee6f1236f
19 changed files with 977 additions and 43 deletions

View File

@ -6,8 +6,7 @@ python:
- "3.5"
- "3.6"
- "3.6-dev"
- "3.7-dev"
- "nightly"
install:
- if [ "$TRAVIS_PYTHON_VERSION" == "3.2" ]; then travis_retry pip install coverage==3.7.1; fi
- if [ "$TRAVIS_PYTHON_VERSION" != "3.2" ]; then travis_retry pip install coverage; fi

View File

@ -1,12 +1,10 @@
class Graph:
class GRAPH:
"""docstring for GRAPH"""
def __init__(self, nodes):
self.nodes=nodes
self.graph=[[0]*nodes for i in range (nodes)]
self.visited=[0]*nodes
def __init__(self, vertex):
self.vertex = vertex
self.graph = [[0] * vertex for i in range(vertex) ]
def add_edge(self, u, v):
self.graph[u - 1][v - 1] = 1
self.graph[v - 1][u - 1] = 1
def show(self):
@ -43,3 +41,30 @@ g.add_edge(4,8)
g.add_edge(5,9)
g.add_edge(6,10)
g.bfs(4)
=======
print self.graph
def add_edge(self, i, j):
self.graph[i][j]=1
self.graph[j][i]=1
def bfs(self,s):
queue=[s]
self.visited[s]=1
while len(queue)!=0:
x=queue.pop(0)
print(x)
for i in range(0,self.nodes):
if self.graph[x][i]==1 and self.visited[i]==0:
queue.append(i)
self.visited[i]=1
n=int(input("Enter the number of Nodes : "))
g=GRAPH(n)
e=int(input("Enter the no of edges : "))
print("Enter the edges (u v)")
for i in range(0,e):
u,v=map(int, raw_input().split())
g.add_edge(u,v)
s=int(input("Enter the source node :"))
g.bfs(s)

View File

@ -1,33 +1,32 @@
class Graph:
class GRAPH:
"""docstring for GRAPH"""
def __init__(self, nodes):
self.nodes=nodes
self.graph=[[0]*nodes for i in range (nodes)]
self.visited=[0]*nodes
def __init__(self, vertex):
self.vertex = vertex
self.graph = [[0] * vertex for i in range(vertex) ]
self.visited = [False] * vertex
def add_edge(self, u, v):
self.graph[u - 1][v - 1] = 1
self.graph[v - 1][u - 1] = 1
def show(self):
print self.graph
for i in self.graph:
for j in i:
print(j, end=' ')
print(' ')
def add_edge(self, i, j):
self.graph[i][j]=1
self.graph[j][i]=1
def dfs(self, u):
self.visited[u - 1] = True
print('%d visited' % u)
for i in range(1, self.vertex + 1):
if self.graph[u - 1][i - 1] == 1 and self.visited[i - 1] == False:
def dfs(self,s):
self.visited[s]=1
print(s)
for i in range(0,self.nodes):
if self.visited[i]==0 and self.graph[s][i]==1:
self.dfs(i)
g = Graph(5)
g.add_edge(1,4)
g.add_edge(4,2)
g.add_edge(4,5)
g.add_edge(2,5)
g.add_edge(5,3)
g.dfs(1)
n=int(input("Enter the number of Nodes : "))
g=GRAPH(n)
e=int(input("Enter the no of edges : "))
print("Enter the edges (u v)")
for i in range(0,e):
u,v=map(int, raw_input().split())
g.add_edge(u,v)
s=int(input("Enter the source node :"))
g.dfs(s)

View File

@ -74,7 +74,7 @@ __Properties__
### Shell
![alt text][shell-image]
From [Wikipedia][shell-wiki]: Shellsort is a generalization of insertion sort that allows the exchange of items that are far apart. The idea is to arrange the list of elements so that, starting anywherem considereing every nth element gives a sorted list. Such a list is said to be h-sorted. Equivanelty, it can be thought of as h intterleaved lists, each individually sorted.
From [Wikipedia][shell-wiki]: Shellsort is a generalization of insertion sort that allows the exchange of items that are far apart. The idea is to arrange the list of elements so that, starting anywhere, considering every nth element gives a sorted list. Such a list is said to be h-sorted. Equivalently, it can be thought of as h interleaved lists, each individually sorted.
__Properties__
* Worst case performance O(nlog2 2n)
@ -83,7 +83,7 @@ __Properties__
###### View the algorithm in [action][shell-toptal]
###Time-Compexity Graphs
### Time-Compexity Graphs
Comparing the complexity of sorting algorithms (Bubble Sort, Insertion Sort, Selection Sort)

131
data_structures/AVL/AVL.py Normal file
View File

@ -0,0 +1,131 @@
'''
A AVL tree
'''
class Node:
def __init__(self, label):
self.label = label
self.left = None
self.rigt = None
self.parent = None
self.height = 0
def getLabel(self):
return self.label
def setLabel(self, label):
self.label = label
def getLeft(self):
return self.left
def setLeft(self, left):
self.left = left
def getRight(self):
return self.rigt
def setRight(self, right):
self.rigt = right
def getParent(self):
return self.parent
def setParent(self, parent):
self.parent = parent
def setHeight(self, height):
self.height = height
def getHeight(self, height):
return self.height
class AVL:
def __init__(self):
self.root = None
self.size = 0
def insert(self, value):
node = Node(value)
if self.root is None:
self.root = node
self.size = 1
else:
# Same as Binary Tree
dad_node = None
curr_node = self.root
while True:
if curr_node is not None:
dad_node = curr_node
if node.getLabel() < curr_node.getLabel():
curr_node = curr_node.getLeft()
else:
curr_node = curr_node.getRight()
else:
if node.getLabel() < dad_node.getLabel():
dad_node.setLeft(node)
dad_node.setHeight(dad_node.getHeight() + 1)
if (dad_node.getRight().getHeight() -
dad_node.getLeft.getHeight() > 1):
self.rebalance(dad_node)
else:
dad_node.setRight(node)
dad_node.setHeight(dad_node.getHeight() + 1)
if (dad_node.getRight().getHeight() -
dad_node.getLeft.getHeight() > 1):
self.rebalance(dad_node)
break
def rebalance(self, node):
if (node.getRight().getHeight() -
node.getLeft.getHeight() > 1):
if (node.getRight().getHeight() >
node.getLeft.getHeight()):
pass
else:
pass
pass
elif (node.getRight().getHeight() -
node.getLeft.getHeight() > 2):
if (node.getRight().getHeight() >
node.getLeft.getHeight()):
pass
else:
pass
pass
pass
def rotate_left(self, node):
# TODO: is this pythonic enought?
aux = node.getLabel()
node = aux.getRight()
node.setHeight(node.getHeight() - 1)
node.setLeft(Node(aux))
node.getLeft().setHeight(node.getHeight() + 1)
node.getRight().setHeight(node.getRight().getHeight() - 1)
def rotate_right(self, node):
aux = node.getLabel()
node = aux.getLeft()
node.setHeight(node.getHeight() - 1)
node.setRight(Node(aux))
node.getLeft().setHeight(node.getHeight() + 1)
node.getLeft().setHeight(node.getLeft().getHeight() - 1)
def double_rotate_left(self, node):
self.rotate_right(node.getRight().getRight())
self.rotate_left(node)
def double_rotate_right(self, node):
self.rotate_left(node.getLeft().getLeft())
self.rotate_right(node)

View File

@ -1,6 +1,8 @@
'''
A binary search Tree
'''
class Node:
def __init__(self, label):
@ -12,7 +14,7 @@ class Node:
return self.label
def setLabel(self, label):
self.label = label
self.label = label
def getLeft(self):
return self.left
@ -34,7 +36,7 @@ class BinarySearchTree:
def insert(self, label):
#Create a new Node
# Create a new Node
node = Node(label)
@ -45,7 +47,7 @@ class BinarySearchTree:
curr_node = self.root
while True:
if curr_node != None:
if curr_node is not None:
dad_node = curr_node
@ -61,12 +63,12 @@ class BinarySearchTree:
break
def empty(self):
if self.root == None:
if self.root is None:
return True
return False
def preShow(self, curr_node):
if curr_node != None:
if curr_node is None:
print(curr_node.getLabel(), end=" ")
self.preShow(curr_node.getLeft())

View File

@ -0,0 +1,40 @@
# Author: OMKAR PATHAK
# We can use Python's dictionary for constructing the graph
class AdjacencyList(object):
def __init__(self):
self.List = {}
def addEdge(self, fromVertex, toVertex):
# check if vertex is already present
if fromVertex in self.List.keys():
self.List[fromVertex].append(toVertex)
else:
self.List[fromVertex] = [toVertex]
def printList(self):
for i in self.List:
print(i,'->',' -> '.join([str(j) for j in self.List[i]]))
if __name__ == '__main__':
al = AdjacencyList()
al.addEdge(0, 1)
al.addEdge(0, 4)
al.addEdge(4, 1)
al.addEdge(4, 3)
al.addEdge(1, 0)
al.addEdge(1, 4)
al.addEdge(1, 3)
al.addEdge(1, 2)
al.addEdge(2, 3)
al.addEdge(3, 4)
al.printList()
# OUTPUT:
# 0 -> 1 -> 4
# 1 -> 0 -> 4 -> 3 -> 2
# 2 -> 3
# 3 -> 4
# 4 -> 1 -> 3

View File

@ -0,0 +1,61 @@
# Author: OMKAR PATHAK
class Graph():
def __init__(self):
self.vertex = {}
# for printing the Graph vertexes
def printGraph(self):
for i in self.vertex.keys():
print(i,' -> ', ' -> '.join([str(j) for j in self.vertex[i]]))
# for adding the edge beween two vertexes
def addEdge(self, fromVertex, toVertex):
# check if vertex is already present,
if fromVertex in self.vertex.keys():
self.vertex[fromVertex].append(toVertex)
else:
# else make a new vertex
self.vertex[fromVertex] = [toVertex]
def BFS(self, startVertex):
# Take a list for stoting already visited vertexes
visited = [False] * len(self.vertex)
# create a list to store all the vertexes for BFS
queue = []
# mark the source node as visited and enqueue it
visited[startVertex] = True
queue.append(startVertex)
while queue:
startVertex = queue.pop(0)
print(startVertex, end = ' ')
# mark all adjacent nodes as visited and print them
for i in self.vertex[startVertex]:
if visited[i] == False:
queue.append(i)
visited[i] = True
if __name__ == '__main__':
g = Graph()
g.addEdge(0, 1)
g.addEdge(0, 2)
g.addEdge(1, 2)
g.addEdge(2, 0)
g.addEdge(2, 3)
g.addEdge(3, 3)
g.printGraph()
print('BFS:')
g.BFS(2)
# OUTPUT:
# 0  ->  1 -> 2
# 1  ->  2
# 2  ->  0 -> 3
# 3  ->  3
# BFS:
# 2 0 3 1

View File

@ -0,0 +1,61 @@
# Author: OMKAR PATHAK
class Graph():
def __init__(self):
self.vertex = {}
# for printing the Graph vertexes
def printGraph(self):
print(self.vertex)
for i in self.vertex.keys():
print(i,' -> ', ' -> '.join([str(j) for j in self.vertex[i]]))
# for adding the edge beween two vertexes
def addEdge(self, fromVertex, toVertex):
# check if vertex is already present,
if fromVertex in self.vertex.keys():
self.vertex[fromVertex].append(toVertex)
else:
# else make a new vertex
self.vertex[fromVertex] = [toVertex]
def DFS(self):
# visited array for storing already visited nodes
visited = [False] * len(self.vertex)
# call the recursive helper function
for i in range(len(self.vertex)):
if visited[i] == False:
self.DFSRec(i, visited)
def DFSRec(self, startVertex, visited):
# mark start vertex as visited
visited[startVertex] = True
print(startVertex, end = ' ')
# Recur for all the vertexes that are adjacent to this node
for i in self.vertex.keys():
if visited[i] == False:
self.DFSRec(i, visited)
if __name__ == '__main__':
g = Graph()
g.addEdge(0, 1)
g.addEdge(0, 2)
g.addEdge(1, 2)
g.addEdge(2, 0)
g.addEdge(2, 3)
g.addEdge(3, 3)
g.printGraph()
print('DFS:')
g.DFS()
# OUTPUT:
# 0  ->  1 -> 2
# 1  ->  2
# 2  ->  0 -> 3
# 3  ->  3
# DFS:
# 0 1 2 3

View File

@ -0,0 +1,27 @@
# Author: OMKAR PATHAK
import Stack
def parseParenthesis(string):
balanced = 1
index = 0
myStack = Stack.Stack(len(string))
while (index < len(string)) and (balanced == 1):
check = string[index]
if check == '(':
myStack.push(check)
else:
if myStack.isEmpty():
balanced = 0
else:
myStack.pop()
index += 1
if balanced == 1 and myStack.isEmpty():
return True
else:
return False
if __name__ == '__main__':
print(parseParenthesis('((()))')) # True
print(parseParenthesis('((())')) # False

View File

@ -0,0 +1,48 @@
# Author: OMKAR PATHAK
import Stack
def isOperand(char):
return (ord(char) >= ord('a') and ord(char) <= ord('z')) or (ord(char) >= ord('A') and ord(char) <= ord('Z'))
def precedence(char):
if char == '+' or char == '-':
return 1
elif char == '*' or char == '/':
return 2
elif char == '^':
return 3
else:
return -1
def infixToPostfix(myExp, myStack):
postFix = []
for i in range(len(myExp)):
if (isOperand(myExp[i])):
postFix.append(myExp[i])
elif(myExp[i] == '('):
myStack.push(myExp[i])
elif(myExp[i] == ')'):
topOperator = myStack.pop()
while(not myStack.isEmpty() and topOperator != '('):
postFix.append(topOperator)
topOperator = myStack.pop()
else:
while (not myStack.isEmpty()) and (precedence(myExp[i]) <= precedence(myStack.peek())):
postFix.append(myStack.pop())
myStack.push(myExp[i])
while(not myStack.isEmpty()):
postFix.append(myStack.pop())
return ' '.join(postFix)
if __name__ == '__main__':
myExp = 'a+b*(c^d-e)^(f+g*h)-i'
myExp = [i for i in myExp]
print('Infix:',' '.join(myExp))
myStack = Stack.Stack(len(myExp))
print('Postfix:',infixToPostfix(myExp, myStack))
# OUTPUT:
# Infix: a + b * ( c ^ d - e ) ^ ( f + g * h ) - i
# Postfix: a b c d ^ e - f g h * + ^ * + i -

View File

@ -0,0 +1,50 @@
# Author: OMKAR PATHAK
class Stack(object):
def __init__(self, limit = 10):
self.stack = []
self.limit = limit
# for printing the stack contents
def __str__(self):
return ' '.join([str(i) for i in self.stack])
# for pushing an element on to the stack
def push(self, data):
if len(self.stack) >= self.limit:
print('Stack Overflow')
else:
self.stack.append(data)
# for popping the uppermost element
def pop(self):
if len(self.stack) <= 0:
return -1
else:
return self.stack.pop()
# for peeking the top-most element of the stack
def peek(self):
if len(self.stack) <= 0:
return -1
else:
return self.stack[len(self.stack) - 1]
# to check if stack is empty
def isEmpty(self):
return self.stack == []
# for checking the size of stack
def size(self):
return len(self.stack)
if __name__ == '__main__':
myStack = Stack()
for i in range(10):
myStack.push(i)
print(myStack)
myStack.pop() # popping the top element
print(myStack)
myStack.peek() # printing the top element
myStack.isEmpty()
myStack.size()

View File

@ -0,0 +1,141 @@
import tensorflow as tf
from random import choice, shuffle
from numpy import array
def TFKMeansCluster(vectors, noofclusters):
"""
K-Means Clustering using TensorFlow.
'vectors' should be a n*k 2-D NumPy array, where n is the number
of vectors of dimensionality k.
'noofclusters' should be an integer.
"""
noofclusters = int(noofclusters)
assert noofclusters < len(vectors)
#Find out the dimensionality
dim = len(vectors[0])
#Will help select random centroids from among the available vectors
vector_indices = list(range(len(vectors)))
shuffle(vector_indices)
#GRAPH OF COMPUTATION
#We initialize a new graph and set it as the default during each run
#of this algorithm. This ensures that as this function is called
#multiple times, the default graph doesn't keep getting crowded with
#unused ops and Variables from previous function calls.
graph = tf.Graph()
with graph.as_default():
#SESSION OF COMPUTATION
sess = tf.Session()
##CONSTRUCTING THE ELEMENTS OF COMPUTATION
##First lets ensure we have a Variable vector for each centroid,
##initialized to one of the vectors from the available data points
centroids = [tf.Variable((vectors[vector_indices[i]]))
for i in range(noofclusters)]
##These nodes will assign the centroid Variables the appropriate
##values
centroid_value = tf.placeholder("float64", [dim])
cent_assigns = []
for centroid in centroids:
cent_assigns.append(tf.assign(centroid, centroid_value))
##Variables for cluster assignments of individual vectors(initialized
##to 0 at first)
assignments = [tf.Variable(0) for i in range(len(vectors))]
##These nodes will assign an assignment Variable the appropriate
##value
assignment_value = tf.placeholder("int32")
cluster_assigns = []
for assignment in assignments:
cluster_assigns.append(tf.assign(assignment,
assignment_value))
##Now lets construct the node that will compute the mean
#The placeholder for the input
mean_input = tf.placeholder("float", [None, dim])
#The Node/op takes the input and computes a mean along the 0th
#dimension, i.e. the list of input vectors
mean_op = tf.reduce_mean(mean_input, 0)
##Node for computing Euclidean distances
#Placeholders for input
v1 = tf.placeholder("float", [dim])
v2 = tf.placeholder("float", [dim])
euclid_dist = tf.sqrt(tf.reduce_sum(tf.pow(tf.sub(
v1, v2), 2)))
##This node will figure out which cluster to assign a vector to,
##based on Euclidean distances of the vector from the centroids.
#Placeholder for input
centroid_distances = tf.placeholder("float", [noofclusters])
cluster_assignment = tf.argmin(centroid_distances, 0)
##INITIALIZING STATE VARIABLES
##This will help initialization of all Variables defined with respect
##to the graph. The Variable-initializer should be defined after
##all the Variables have been constructed, so that each of them
##will be included in the initialization.
init_op = tf.initialize_all_variables()
#Initialize all variables
sess.run(init_op)
##CLUSTERING ITERATIONS
#Now perform the Expectation-Maximization steps of K-Means clustering
#iterations. To keep things simple, we will only do a set number of
#iterations, instead of using a Stopping Criterion.
noofiterations = 100
for iteration_n in range(noofiterations):
##EXPECTATION STEP
##Based on the centroid locations till last iteration, compute
##the _expected_ centroid assignments.
#Iterate over each vector
for vector_n in range(len(vectors)):
vect = vectors[vector_n]
#Compute Euclidean distance between this vector and each
#centroid. Remember that this list cannot be named
#'centroid_distances', since that is the input to the
#cluster assignment node.
distances = [sess.run(euclid_dist, feed_dict={
v1: vect, v2: sess.run(centroid)})
for centroid in centroids]
#Now use the cluster assignment node, with the distances
#as the input
assignment = sess.run(cluster_assignment, feed_dict = {
centroid_distances: distances})
#Now assign the value to the appropriate state variable
sess.run(cluster_assigns[vector_n], feed_dict={
assignment_value: assignment})
##MAXIMIZATION STEP
#Based on the expected state computed from the Expectation Step,
#compute the locations of the centroids so as to maximize the
#overall objective of minimizing within-cluster Sum-of-Squares
for cluster_n in range(noofclusters):
#Collect all the vectors assigned to this cluster
assigned_vects = [vectors[i] for i in range(len(vectors))
if sess.run(assignments[i]) == cluster_n]
#Compute new centroid location
new_location = sess.run(mean_op, feed_dict={
mean_input: array(assigned_vects)})
#Assign value to appropriate variable
sess.run(cent_assigns[cluster_n], feed_dict={
centroid_value: new_location})
#Return centroids and assignments
centroids = sess.run(centroids)
assignments = sess.run(assignments)
return centroids, assignments

View File

@ -0,0 +1,121 @@
"""
Implementation of gradient descent algorithm for minimizing cost of a linear hypothesis function.
"""
import numpy
# List of input, output pairs
train_data = (((5, 2, 3), 15), ((6, 5, 9), 25),
((11, 12, 13), 41), ((1, 1, 1), 8), ((11, 12, 13), 41))
test_data = (((515, 22, 13), 555), ((61, 35, 49), 150))
parameter_vector = [2, 4, 1, 5]
m = len(train_data)
LEARNING_RATE = 0.009
def _error(example_no, data_set='train'):
"""
:param data_set: train data or test data
:param example_no: example number whose error has to be checked
:return: error in example pointed by example number.
"""
return calculate_hypothesis_value(example_no, data_set) - output(example_no, data_set)
def _hypothesis_value(data_input_tuple):
"""
Calculates hypothesis function value for a given input
:param data_input_tuple: Input tuple of a particular example
:return: Value of hypothesis function at that point.
Note that there is an 'biased input' whose value is fixed as 1.
It is not explicitly mentioned in input data.. But, ML hypothesis functions use it.
So, we have to take care of it separately. Line 36 takes care of it.
"""
hyp_val = 0
for i in range(len(parameter_vector) - 1):
hyp_val += data_input_tuple[i]*parameter_vector[i+1]
hyp_val += parameter_vector[0]
return hyp_val
def output(example_no, data_set):
"""
:param data_set: test data or train data
:param example_no: example whose output is to be fetched
:return: output for that example
"""
if data_set == 'train':
return train_data[example_no][1]
elif data_set == 'test':
return test_data[example_no][1]
def calculate_hypothesis_value(example_no, data_set):
"""
Calculates hypothesis value for a given example
:param data_set: test data or train_data
:param example_no: example whose hypothesis value is to be calculated
:return: hypothesis value for that example
"""
if data_set == "train":
return _hypothesis_value(train_data[example_no][0])
elif data_set == "test":
return _hypothesis_value(test_data[example_no][0])
def summation_of_cost_derivative(index, end=m):
"""
Calculates the sum of cost function derivative
:param index: index wrt derivative is being calculated
:param end: value where summation ends, default is m, number of examples
:return: Returns the summation of cost derivative
Note: If index is -1, this means we are calculating summation wrt to biased parameter.
"""
summation_value = 0
for i in range(end):
if index == -1:
summation_value += _error(i)
else:
summation_value += _error(i)*train_data[i][0][index]
return summation_value
def get_cost_derivative(index):
"""
:param index: index of the parameter vector wrt to derivative is to be calculated
:return: derivative wrt to that index
Note: If index is -1, this means we are calculating summation wrt to biased parameter.
"""
cost_derivative_value = summation_of_cost_derivative(index, m)/m
return cost_derivative_value
def run_gradient_descent():
global parameter_vector
# Tune these values to set a tolerance value for predicted output
absolute_error_limit = 0.000002
relative_error_limit = 0
j = 0
while True:
j += 1
temp_parameter_vector = [0, 0, 0, 0]
for i in range(0, len(parameter_vector)):
cost_derivative = get_cost_derivative(i-1)
temp_parameter_vector[i] = parameter_vector[i] - \
LEARNING_RATE*cost_derivative
if numpy.allclose(parameter_vector, temp_parameter_vector,
atol=absolute_error_limit, rtol=relative_error_limit):
break
parameter_vector = temp_parameter_vector
print("Number of iterations:", j)
def test_gradient_descent():
for i in range(len(test_data)):
print("Actual output value:", output(i, 'test'))
print("Hypothesis output:", calculate_hypothesis_value(i, 'test'))
if __name__ == '__main__':
run_gradient_descent()
print("\nTesting gradient descent for a linear hypothesis function.\n")
test_gradient_descent()

View File

@ -0,0 +1,108 @@
"""
Linear regression is the most basic type of regression commonly used for
predictive analysis. The idea is preety simple, we have a dataset and we have
a feature's associated with it. The Features should be choose very cautiously
as they determine, how much our model will be able to make future predictions.
We try to set these Feature weights, over many iterations, so that they best
fits our dataset. In this particular code, i had used a CSGO dataset (ADR vs
Rating). We try to best fit a line through dataset and estimate the parameters.
"""
import requests
import numpy as np
def collect_dataset():
""" Collect dataset of CSGO
The dataset contains ADR vs Rating of a Player
:return : dataset obtained from the link, as matrix
"""
response = requests.get('https://raw.githubusercontent.com/yashLadha/' +
'The_Math_of_Intelligence/master/Week1/ADRvs' +
'Rating.csv')
lines = response.text.splitlines()
data = []
for item in lines:
item = item.split(',')
data.append(item)
data.pop(0) # This is for removing the labels from the list
dataset = np.matrix(data)
return dataset
def run_steep_gradient_descent(data_x, data_y,
len_data, alpha, theta):
""" Run steep gradient descent and updates the Feature vector accordingly_
:param data_x : contains the dataset
:param data_y : contains the output associated with each data-entry
:param len_data : length of the data_
:param alpha : Learning rate of the model
:param theta : Feature vector (weight's for our model)
;param return : Updated Feature's, using
curr_features - alpha_ * gradient(w.r.t. feature)
"""
n = len_data
prod = np.dot(theta, data_x.transpose())
prod -= data_y.transpose()
sum_grad = np.dot(prod, data_x)
theta = theta - (alpha / n) * sum_grad
return theta
def sum_of_square_error(data_x, data_y, len_data, theta):
""" Return sum of square error for error calculation
:param data_x : contains our dataset
:param data_y : contains the output (result vector)
:param len_data : len of the dataset
:param theta : contains the feature vector
:return : sum of square error computed from given feature's
"""
error = 0.0
prod = np.dot(theta, data_x.transpose())
prod -= data_y.transpose()
sum_elem = np.sum(np.square(prod))
error = sum_elem / (2 * len_data)
return error
def run_linear_regression(data_x, data_y):
""" Implement Linear regression over the dataset
:param data_x : contains our dataset
:param data_y : contains the output (result vector)
:return : feature for line of best fit (Feature vector)
"""
iterations = 100000
alpha = 0.0001550
no_features = data_x.shape[1]
len_data = data_x.shape[0] - 1
theta = np.zeros((1, no_features))
for i in range(0, iterations):
theta = run_steep_gradient_descent(data_x, data_y,
len_data, alpha, theta)
error = sum_of_square_error(data_x, data_y, len_data, theta)
print('At Iteration %d - Error is %.5f ' % (i + 1, error))
return theta
def main():
""" Driver function """
data = collect_dataset()
len_data = data.shape[0]
data_x = np.c_[np.ones(len_data), data[:, :-1]].astype(float)
data_y = data[:, -1].astype(float)
theta = run_linear_regression(data_x, data_y)
len_result = theta.shape[1]
print('Resultant Feature vector : ')
for i in range(0, len_result):
print('%.5f' % (theta[0, i]))
if __name__ == '__main__':
main()

View File

View File

@ -80,6 +80,39 @@ def binary_search_std_lib(sorted_collection, item):
return index
return None
def binary_search_by_recursion(sorted_collection, item, left, right):
"""Pure implementation of binary search algorithm in Python by recursion
Be careful collection must be sorted, otherwise result will be
unpredictable
First recursion should be started with left=0 and right=(len(sorted_collection)-1)
:param sorted_collection: some sorted collection with comparable items
:param item: item value to search
:return: index of found item or None if item is not found
Examples:
>>> binary_search_std_lib([0, 5, 7, 10, 15], 0)
0
>>> binary_search_std_lib([0, 5, 7, 10, 15], 15)
4
>>> binary_search_std_lib([0, 5, 7, 10, 15], 5)
1
>>> binary_search_std_lib([0, 5, 7, 10, 15], 6)
"""
midpoint = left + (right - left) // 2
if sorted_collection[midpoint] == item:
return midpoint
elif sorted_collection[midpoint] > item:
return binary_search_by_recursion(sorted_collection, item, left, right-1)
else:
return binary_search_by_recursion(sorted_collection, item, left+1, right)
def __assert_sorted(collection):
"""Check if collection is sorted, if not - raises :py:class:`ValueError`

56
sorts/bucket_sort.py Normal file
View File

@ -0,0 +1,56 @@
#!/usr/bin/env python
# Author: OMKAR PATHAK
# This program will illustrate how to implement bucket sort algorithm
# Wikipedia says: Bucket sort, or bin sort, is a sorting algorithm that works by distributing the
# elements of an array into a number of buckets. Each bucket is then sorted individually, either using
# a different sorting algorithm, or by recursively applying the bucket sorting algorithm. It is a
# distribution sort, and is a cousin of radix sort in the most to least significant digit flavour.
# Bucket sort is a generalization of pigeonhole sort. Bucket sort can be implemented with comparisons
# and therefore can also be considered a comparison sort algorithm. The computational complexity estimates
# involve the number of buckets.
# Time Complexity of Solution:
# Best Case O(n); Average Case O(n); Worst Case O(n)
from P26_InsertionSort import insertionSort
import math
DEFAULT_BUCKET_SIZE = 5
def bucketSort(myList, bucketSize=DEFAULT_BUCKET_SIZE):
if(len(myList) == 0):
print('You don\'t have any elements in array!')
minValue = myList[0]
maxValue = myList[0]
# For finding minimum and maximum values
for i in range(0, len(myList)):
if myList[i] < minValue:
minValue = myList[i]
elif myList[i] > maxValue:
maxValue = myList[i]
# Initialize buckets
bucketCount = math.floor((maxValue - minValue) / bucketSize) + 1
buckets = []
for i in range(0, bucketCount):
buckets.append([])
# For putting values in buckets
for i in range(0, len(myList)):
buckets[math.floor((myList[i] - minValue) / bucketSize)].append(myList[i])
# Sort buckets and place back into input array
sortedArray = []
for i in range(0, len(buckets)):
insertionSort(buckets[i])
for j in range(0, len(buckets[i])):
sortedArray.append(buckets[i][j])
return sortedArray
if __name__ == '__main__':
sortedArray = bucketSort([12, 23, 4, 5, 3, 2, 12, 81, 56, 95])
print(sortedArray)

32
sorts/topological_sort.py Normal file
View File

@ -0,0 +1,32 @@
# a
# / \
# b c
# / \
# d e
edges = {'a': ['c', 'b'], 'b': ['d', 'e'], 'c': [], 'd': [], 'e': []}
vertices = ['a', 'b', 'c', 'd', 'e']
def topological_sort(start, visited, sort):
"""Perform topolical sort on a directed acyclic graph."""
current = start
# add current to visited
visited.append(current)
neighbors = edges[current]
for neighbor in neighbors:
# if neighbor not in visited, visit
if neighbor not in visited:
sort = topological_sort(neighbor, visited, sort)
# if all neighbors visited add current to sort
sort.append(current)
# if all vertices haven't been visited select a new one to visit
if len(visited) != len(vertices):
for vertice in vertices:
if vertice not in visited:
sort = topological_sort(vertice, visited, sort)
# return sort
return sort
sort = topological_sort('a', [], [])
print(sort)