Merge branch 'master' into patch-1

This commit is contained in:
Sayan Bandyopadhyay 2017-10-23 19:17:45 +05:30 committed by GitHub
commit b96412c019
43 changed files with 1843 additions and 348 deletions

101
Graphs/A*.py Normal file
View File

@ -0,0 +1,101 @@
grid = [[0, 1, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0],#0 are free path whereas 1's are obstacles
[0, 1, 0, 0, 0, 0],
[0, 1, 0, 0, 1, 0],
[0, 0, 0, 0, 1, 0]]
'''
heuristic = [[9, 8, 7, 6, 5, 4],
[8, 7, 6, 5, 4, 3],
[7, 6, 5, 4, 3, 2],
[6, 5, 4, 3, 2, 1],
[5, 4, 3, 2, 1, 0]]'''
init = [0, 0]
goal = [len(grid)-1, len(grid[0])-1] #all coordinates are given in format [y,x]
cost = 1
#the cost map which pushes the path closer to the goal
heuristic = [[0 for row in range(len(grid[0]))] for col in range(len(grid))]
for i in range(len(grid)):
for j in range(len(grid[0])):
heuristic[i][j] = abs(i - goal[0]) + abs(j - goal[1])
if grid[i][j] == 1:
heuristic[i][j] = 99 #added extra penalty in the heuristic map
#the actions we can take
delta = [[-1, 0 ], # go up
[ 0, -1], # go left
[ 1, 0 ], # go down
[ 0, 1 ]] # go right
#function to search the path
def search(grid,init,goal,cost,heuristic):
closed = [[0 for col in range(len(grid[0]))] for row in range(len(grid))]# the referrence grid
closed[init[0]][init[1]] = 1
action = [[0 for col in range(len(grid[0]))] for row in range(len(grid))]#the action grid
x = init[0]
y = init[1]
g = 0
f = g + heuristic[init[0]][init[0]]
cell = [[f, g, x, y]]
found = False # flag that is set when search is complete
resign = False # flag set if we can't find expand
while not found and not resign:
if len(cell) == 0:
resign = True
return "FAIL"
else:
cell.sort()#to choose the least costliest action so as to move closer to the goal
cell.reverse()
next = cell.pop()
x = next[2]
y = next[3]
g = next[1]
f = next[0]
if x == goal[0] and y == goal[1]:
found = True
else:
for i in range(len(delta)):#to try out different valid actions
x2 = x + delta[i][0]
y2 = y + delta[i][1]
if x2 >= 0 and x2 < len(grid) and y2 >=0 and y2 < len(grid[0]):
if closed[x2][y2] == 0 and grid[x2][y2] == 0:
g2 = g + cost
f2 = g2 + heuristic[x2][y2]
cell.append([f2, g2, x2, y2])
closed[x2][y2] = 1
action[x2][y2] = i
invpath = []
x = goal[0]
y = goal[1]
invpath.append([x, y])#we get the reverse path from here
while x != init[0] or y != init[1]:
x2 = x - delta[action[x][y]][0]
y2 = y - delta[action[x][y]][1]
x = x2
y = y2
invpath.append([x, y])
path = []
for i in range(len(invpath)):
path.append(invpath[len(invpath) - 1 - i])
print "ACTION MAP"
for i in range(len(action)):
print action[i]
return path
a = search(grid,init,goal,cost,heuristic)
for i in range(len(a)):
print a[i]

View File

@ -0,0 +1,305 @@
#-*- coding: utf-8 -*-
'''
- - - - - -- - - - - - - - - - - - - - - - - - - - - - -
Name - - CNN - Convolution Neural Network For Photo Recognizing
Goal - - Recognize Handing Writting Word Photo
DetailTotal 5 layers neural network
* Convolution layer
* Pooling layer
* Input layer layer of BP
* Hiden layer of BP
* Output layer of BP
Author: Stephen Lee
Github: 245885195@qq.com
Date: 2017.9.20
- - - - - -- - - - - - - - - - - - - - - - - - - - - - -
'''
import numpy as np
import matplotlib.pyplot as plt
class CNN():
def __init__(self,conv1_get,size_p1,bp_num1,bp_num2,bp_num3,rate_w=0.2,rate_t=0.2):
'''
:param conv1_get: [a,c,d]size, number, step of convolution kernel
:param size_p1: pooling size
:param bp_num1: units number of flatten layer
:param bp_num2: units number of hidden layer
:param bp_num3: units number of output layer
:param rate_w: rate of weight learning
:param rate_t: rate of threshold learning
'''
self.num_bp1 = bp_num1
self.num_bp2 = bp_num2
self.num_bp3 = bp_num3
self.conv1 = conv1_get[:2]
self.step_conv1 = conv1_get[2]
self.size_pooling1 = size_p1
self.rate_weight = rate_w
self.rate_thre = rate_t
self.w_conv1 = [np.mat(-1*np.random.rand(self.conv1[0],self.conv1[0])+0.5) for i in range(self.conv1[1])]
self.wkj = np.mat(-1 * np.random.rand(self.num_bp3, self.num_bp2) + 0.5)
self.vji = np.mat(-1*np.random.rand(self.num_bp2, self.num_bp1)+0.5)
self.thre_conv1 = -2*np.random.rand(self.conv1[1])+1
self.thre_bp2 = -2*np.random.rand(self.num_bp2)+1
self.thre_bp3 = -2*np.random.rand(self.num_bp3)+1
def save_model(self,save_path):
#save model dict with pickle
import pickle
model_dic = {'num_bp1':self.num_bp1,
'num_bp2':self.num_bp2,
'num_bp3':self.num_bp3,
'conv1':self.conv1,
'step_conv1':self.step_conv1,
'size_pooling1':self.size_pooling1,
'rate_weight':self.rate_weight,
'rate_thre':self.rate_thre,
'w_conv1':self.w_conv1,
'wkj':self.wkj,
'vji':self.vji,
'thre_conv1':self.thre_conv1,
'thre_bp2':self.thre_bp2,
'thre_bp3':self.thre_bp3}
with open(save_path, 'wb') as f:
pickle.dump(model_dic, f)
print('Model saved %s'% save_path)
@classmethod
def ReadModel(cls,model_path):
#read saved model
import pickle
with open(model_path, 'rb') as f:
model_dic = pickle.load(f)
conv_get= model_dic.get('conv1')
conv_get.append(model_dic.get('step_conv1'))
size_p1 = model_dic.get('size_pooling1')
bp1 = model_dic.get('num_bp1')
bp2 = model_dic.get('num_bp2')
bp3 = model_dic.get('num_bp3')
r_w = model_dic.get('rate_weight')
r_t = model_dic.get('rate_thre')
#create model instance
conv_ins = CNN(conv_get,size_p1,bp1,bp2,bp3,r_w,r_t)
#modify model parameter
conv_ins.w_conv1 = model_dic.get('w_conv1')
conv_ins.wkj = model_dic.get('wkj')
conv_ins.vji = model_dic.get('vji')
conv_ins.thre_conv1 = model_dic.get('thre_conv1')
conv_ins.thre_bp2 = model_dic.get('thre_bp2')
conv_ins.thre_bp3 = model_dic.get('thre_bp3')
return conv_ins
def sig(self,x):
return 1 / (1 + np.exp(-1*x))
def do_round(self,x):
return round(x, 3)
def convolute(self,data,convs,w_convs,thre_convs,conv_step):
#convolution process
size_conv = convs[0]
num_conv =convs[1]
size_data = np.shape(data)[0]
#get the data slice of original image data, data_focus
data_focus = []
for i_focus in range(0, size_data - size_conv + 1, conv_step):
for j_focus in range(0, size_data - size_conv + 1, conv_step):
focus = data[i_focus:i_focus + size_conv, j_focus:j_focus + size_conv]
data_focus.append(focus)
#caculate the feature map of every single kernel, and saved as list of matrix
data_featuremap = []
Size_FeatureMap = int((size_data - size_conv) / conv_step + 1)
for i_map in range(num_conv):
featuremap = []
for i_focus in range(len(data_focus)):
net_focus = np.sum(np.multiply(data_focus[i_focus], w_convs[i_map])) - thre_convs[i_map]
featuremap.append(self.sig(net_focus))
featuremap = np.asmatrix(featuremap).reshape(Size_FeatureMap, Size_FeatureMap)
data_featuremap.append(featuremap)
#expanding the data slice to One dimenssion
focus1_list = []
for each_focus in data_focus:
focus1_list.extend(self.Expand_Mat(each_focus))
focus_list = np.asarray(focus1_list)
return focus_list,data_featuremap
def pooling(self,featuremaps,size_pooling,type='average_pool'):
#pooling process
size_map = len(featuremaps[0])
size_pooled = int(size_map/size_pooling)
featuremap_pooled = []
for i_map in range(len(featuremaps)):
map = featuremaps[i_map]
map_pooled = []
for i_focus in range(0,size_map,size_pooling):
for j_focus in range(0, size_map, size_pooling):
focus = map[i_focus:i_focus + size_pooling, j_focus:j_focus + size_pooling]
if type == 'average_pool':
#average pooling
map_pooled.append(np.average(focus))
elif type == 'max_pooling':
#max pooling
map_pooled.append(np.max(focus))
map_pooled = np.asmatrix(map_pooled).reshape(size_pooled,size_pooled)
featuremap_pooled.append(map_pooled)
return featuremap_pooled
def _expand(self,datas):
#expanding three dimension data to one dimension list
data_expanded = []
for i in range(len(datas)):
shapes = np.shape(datas[i])
data_listed = datas[i].reshape(1,shapes[0]*shapes[1])
data_listed = data_listed.getA().tolist()[0]
data_expanded.extend(data_listed)
data_expanded = np.asarray(data_expanded)
return data_expanded
def _expand_mat(self,data_mat):
#expanding matrix to one dimension list
data_mat = np.asarray(data_mat)
shapes = np.shape(data_mat)
data_expanded = data_mat.reshape(1,shapes[0]*shapes[1])
return data_expanded
def _calculate_gradient_from_pool(self,out_map,pd_pool,num_map,size_map,size_pooling):
'''
calcluate the gradient from the data slice of pool layer
pd_pool: list of matrix
out_map: the shape of data slice(size_map*size_map)
return: pd_all: list of matrix, [num, size_map, size_map]
'''
pd_all = []
i_pool = 0
for i_map in range(num_map):
pd_conv1 = np.ones((size_map, size_map))
for i in range(0, size_map, size_pooling):
for j in range(0, size_map, size_pooling):
pd_conv1[i:i + size_pooling, j:j + size_pooling] = pd_pool[i_pool]
i_pool = i_pool + 1
pd_conv2 = np.multiply(pd_conv1,np.multiply(out_map[i_map],(1-out_map[i_map])))
pd_all.append(pd_conv2)
return pd_all
def trian(self,patterns,datas_train, datas_teach, n_repeat, error_accuracy,draw_e = bool):
#model traning
print('----------------------Start Training-------------------------')
print(' - - Shape: Train_Data ',np.shape(datas_train))
print(' - - Shape: Teach_Data ',np.shape(datas_teach))
rp = 0
all_mse = []
mse = 10000
while rp < n_repeat and mse >= error_accuracy:
alle = 0
print('-------------Learning Time %d--------------'%rp)
for p in range(len(datas_train)):
#print('------------Learning Image: %d--------------'%p)
data_train = np.asmatrix(datas_train[p])
data_teach = np.asarray(datas_teach[p])
data_focus1,data_conved1 = self.convolute(data_train,self.conv1,self.w_conv1,
self.thre_conv1,conv_step=self.step_conv1)
data_pooled1 = self.pooling(data_conved1,self.size_pooling1)
shape_featuremap1 = np.shape(data_conved1)
'''
print(' -----original shape ', np.shape(data_train))
print(' ---- after convolution ',np.shape(data_conv1))
print(' -----after pooling ',np.shape(data_pooled1))
'''
data_bp_input = self._expand(data_pooled1)
bp_out1 = data_bp_input
bp_net_j = np.dot(bp_out1,self.vji.T) - self.thre_bp2
bp_out2 = self.sig(bp_net_j)
bp_net_k = np.dot(bp_out2 ,self.wkj.T) - self.thre_bp3
bp_out3 = self.sig(bp_net_k)
#--------------Model Leaning ------------------------
# calcluate error and gradient---------------
pd_k_all = np.multiply((data_teach - bp_out3), np.multiply(bp_out3, (1 - bp_out3)))
pd_j_all = np.multiply(np.dot(pd_k_all,self.wkj), np.multiply(bp_out2, (1 - bp_out2)))
pd_i_all = np.dot(pd_j_all,self.vji)
pd_conv1_pooled = pd_i_all / (self.size_pooling1*self.size_pooling1)
pd_conv1_pooled = pd_conv1_pooled.T.getA().tolist()
pd_conv1_all = self._calculate_gradient_from_pool(data_conved1,pd_conv1_pooled,shape_featuremap1[0],
shape_featuremap1[1],self.size_pooling1)
#weight and threshold learning process---------
#convolution layer
for k_conv in range(self.conv1[1]):
pd_conv_list = self._expand_mat(pd_conv1_all[k_conv])
delta_w = self.rate_weight * np.dot(pd_conv_list,data_focus1)
self.w_conv1[k_conv] = self.w_conv1[k_conv] + delta_w.reshape((self.conv1[0],self.conv1[0]))
self.thre_conv1[k_conv] = self.thre_conv1[k_conv] - np.sum(pd_conv1_all[k_conv]) * self.rate_thre
#all connected layer
self.wkj = self.wkj + pd_k_all.T * bp_out2 * self.rate_weight
self.vji = self.vji + pd_j_all.T * bp_out1 * self.rate_weight
self.thre_bp3 = self.thre_bp3 - pd_k_all * self.rate_thre
self.thre_bp2 = self.thre_bp2 - pd_j_all * self.rate_thre
# calculate the sum error of all single image
errors = np.sum(abs((data_teach - bp_out3)))
alle = alle + errors
#print(' ----Teach ',data_teach)
#print(' ----BP_output ',bp_out3)
rp = rp + 1
mse = alle/patterns
all_mse.append(mse)
def draw_error():
yplot = [error_accuracy for i in range(int(n_repeat * 1.2))]
plt.plot(all_mse, '+-')
plt.plot(yplot, 'r--')
plt.xlabel('Learning Times')
plt.ylabel('All_mse')
plt.grid(True, alpha=0.5)
plt.show()
print('------------------Training Complished---------------------')
print(' - - Training epoch: ', rp, ' - - Mse: %.6f' % mse)
if draw_e:
draw_error()
return mse
def predict(self,datas_test):
#model predict
produce_out = []
print('-------------------Start Testing-------------------------')
print(' - - Shape: Test_Data ',np.shape(datas_test))
for p in range(len(datas_test)):
data_test = np.asmatrix(datas_test[p])
data_focus1, data_conved1 = self.convolute(data_test, self.conv1, self.w_conv1,
self.thre_conv1, conv_step=self.step_conv1)
data_pooled1 = self.pooling(data_conved1, self.size_pooling1)
data_bp_input = self._expand(data_pooled1)
bp_out1 = data_bp_input
bp_net_j = bp_out1 * self.vji.T - self.thre_bp2
bp_out2 = self.sig(bp_net_j)
bp_net_k = bp_out2 * self.wkj.T - self.thre_bp3
bp_out3 = self.sig(bp_net_k)
produce_out.extend(bp_out3.getA().tolist())
res = [list(map(self.do_round,each)) for each in produce_out]
return np.asarray(res)
def convolution(self,data):
#return the data of image after convoluting process so we can check it out
data_test = np.asmatrix(data)
data_focus1, data_conved1 = self.convolute(data_test, self.conv1, self.w_conv1,
self.thre_conv1, conv_step=self.step_conv1)
data_pooled1 = self.pooling(data_conved1, self.size_pooling1)
return data_conved1,data_pooled1
if __name__ == '__main__':
pass
'''
I will put the example on other file
'''

View File

@ -1,4 +1,4 @@
# The Algorithms - Python [![Build Status](https://travis-ci.org/TheAlgorithms/Python.svg)](https://travis-ci.org/TheAlgorithms/Python)
# The Algorithms - Python <!-- [![Build Status](https://travis-ci.org/TheAlgorithms/Python.svg)](https://travis-ci.org/TheAlgorithms/Python) -->
### All algorithms implemented in Python (for education)
@ -128,6 +128,13 @@ The method is named after **Julius Caesar**, who used it in his private correspo
The encryption step performed by a Caesar cipher is often incorporated as part of more complex schemes, such as the Vigenère cipher, and still has modern application in the ROT13 system. As with all single-alphabet substitution ciphers, the Caesar cipher is easily broken and in modern practice offers essentially no communication security.
###### Source: [Wikipedia](https://en.wikipedia.org/wiki/Caesar_cipher)
### Vigenère
The **Vigenère cipher** is a method of encrypting alphabetic text by using a series of **interwoven Caesar ciphers** based on the letters of a keyword. It is **a form of polyalphabetic substitution**.<br>
The Vigenère cipher has been reinvented many times. The method was originally described by Giovan Battista Bellaso in his 1553 book La cifra del. Sig. Giovan Battista Bellaso; however, the scheme was later misattributed to Blaise de Vigenère in the 19th century, and is now widely known as the "Vigenère cipher".<br>
Though the cipher is easy to understand and implement, for three centuries it resisted all attempts to break it; this earned it the description **le chiffre indéchiffrable**(French for 'the indecipherable cipher').
Many people have tried to implement encryption schemes that are essentially Vigenère ciphers. Friedrich Kasiski was the first to publish a general method of deciphering a Vigenère cipher in 1863.
###### Source: [Wikipedia](https://en.wikipedia.org/wiki/Vigen%C3%A8re_cipher)
### Transposition
In cryptography, a **transposition cipher** is a method of encryption by which the positions held by units of plaintext (which are commonly characters or groups of characters) are shifted according to a regular system, so that the ciphertext constitutes a permutation of the plaintext. That is, the order of the units is changed (the plaintext is reordered).<br>
Mathematically a bijective function is used on the characters' positions to encrypt and an inverse function to decrypt.

View File

@ -7,40 +7,42 @@ class Node:
def __init__(self, label):
self.label = label
self.left = None
self.rigt = None
self.parent = None
self._parent = None
self._left = None
self._right = None
self.height = 0
def getLabel(self):
return self.label
@property
def right(self):
return self._right
def setLabel(self, label):
self.label = label
@right.setter
def right(self, node):
if node is not None:
node._parent = self
self._right = node
def getLeft(self):
return self.left
@property
def left(self):
return self._left
def setLeft(self, left):
self.left = left
@left.setter
def left(self, node):
if node is not None:
node._parent = self
self._left = node
def getRight(self):
return self.rigt
@property
def parent(self):
return self._parent
def setRight(self, right):
self.rigt = right
def getParent(self):
return self.parent
def setParent(self, parent):
self.parent = parent
def setHeight(self, height):
self.height = height
def getHeight(self, height):
return self.height
@parent.setter
def parent(self, node):
if node is not None:
self._parent = node
self.height = self.parent.height + 1
else:
self.height = 0
class AVL:
@ -51,8 +53,10 @@ class AVL:
def insert(self, value):
node = Node(value)
if self.root is None:
self.root = node
self.root.height = 0
self.size = 1
else:
# Same as Binary Tree
@ -64,63 +68,77 @@ class AVL:
dad_node = curr_node
if node.getLabel() < curr_node.getLabel():
curr_node = curr_node.getLeft()
if node.label < curr_node.label:
curr_node = curr_node.left
else:
curr_node = curr_node.getRight()
curr_node = curr_node.right
else:
if node.getLabel() < dad_node.getLabel():
dad_node.setLeft(node)
dad_node.setHeight(dad_node.getHeight() + 1)
if (dad_node.getRight().getHeight() -
dad_node.getLeft.getHeight() > 1):
self.rebalance(dad_node)
node.height = dad_node.height
dad_node.height += 1
if node.label < dad_node.label:
dad_node.left = node
else:
dad_node.setRight(node)
dad_node.setHeight(dad_node.getHeight() + 1)
if (dad_node.getRight().getHeight() -
dad_node.getLeft.getHeight() > 1):
self.rebalance(dad_node)
dad_node.right = node
self.rebalance(node)
self.size += 1
break
def rebalance(self, node):
if (node.getRight().getHeight() -
node.getLeft.getHeight() > 1):
if (node.getRight().getHeight() >
node.getLeft.getHeight()):
pass
else:
pass
pass
elif (node.getRight().getHeight() -
node.getLeft.getHeight() > 2):
if (node.getRight().getHeight() >
node.getLeft.getHeight()):
pass
else:
pass
pass
pass
n = node
while n is not None:
height_right = n.height
height_left = n.height
if n.right is not None:
height_right = n.right.height
if n.left is not None:
height_left = n.left.height
if abs(height_left - height_right) > 1:
if height_left > height_right:
left_child = n.left
if left_child is not None:
h_right = (right_child.right.height
if (right_child.right is not None) else 0)
h_left = (right_child.left.height
if (right_child.left is not None) else 0)
if (h_left > h_right):
self.rotate_left(n)
break
else:
self.double_rotate_right(n)
break
else:
right_child = n.right
if right_child is not None:
h_right = (right_child.right.height
if (right_child.right is not None) else 0)
h_left = (right_child.left.height
if (right_child.left is not None) else 0)
if (h_left > h_right):
self.double_rotate_left(n)
break
else:
self.rotate_right(n)
break
n = n.parent
def rotate_left(self, node):
# TODO: is this pythonic enought?
aux = node.getLabel()
node = aux.getRight()
node.setHeight(node.getHeight() - 1)
node.setLeft(Node(aux))
node.getLeft().setHeight(node.getHeight() + 1)
node.getRight().setHeight(node.getRight().getHeight() - 1)
aux = node.parent.label
node.parent.label = node.label
node.parent.right = Node(aux)
node.parent.right.height = node.parent.height + 1
node.parent.left = node.right
def rotate_right(self, node):
aux = node.getLabel()
node = aux.getLeft()
node.setHeight(node.getHeight() - 1)
node.setRight(Node(aux))
node.getLeft().setHeight(node.getHeight() + 1)
node.getLeft().setHeight(node.getLeft().getHeight() - 1)
aux = node.parent.label
node.parent.label = node.label
node.parent.left = Node(aux)
node.parent.left.height = node.parent.height + 1
node.parent.right = node.right
def double_rotate_left(self, node):
self.rotate_right(node.getRight().getRight())
@ -129,3 +147,34 @@ class AVL:
def double_rotate_right(self, node):
self.rotate_left(node.getLeft().getLeft())
self.rotate_right(node)
def empty(self):
if self.root is None:
return True
return False
def preShow(self, curr_node):
if curr_node is not None:
self.preShow(curr_node.left)
print(curr_node.label, end=" ")
self.preShow(curr_node.right)
def preorder(self, curr_node):
if curr_node is not None:
self.preShow(curr_node.left)
self.preShow(curr_node.right)
print(curr_node.label, end=" ")
def getRoot(self):
return self.root
t = AVL()
t.insert(1)
t.insert(2)
t.insert(3)
# t.preShow(t.root)
# print("\n")
# t.insert(4)
# t.insert(5)
# t.preShow(t.root)
# t.preorden(t.root)

View File

@ -68,7 +68,7 @@ class BinarySearchTree:
return False
def preShow(self, curr_node):
if curr_node is None:
if curr_node is not None:
print(curr_node.getLabel(), end=" ")
self.preShow(curr_node.getLeft())

View File

@ -0,0 +1,61 @@
# Author: OMKAR PATHAK
class Graph():
def __init__(self):
self.vertex = {}
# for printing the Graph vertexes
def printGraph(self):
for i in self.vertex.keys():
print(i,' -> ', ' -> '.join([str(j) for j in self.vertex[i]]))
# for adding the edge beween two vertexes
def addEdge(self, fromVertex, toVertex):
# check if vertex is already present,
if fromVertex in self.vertex.keys():
self.vertex[fromVertex].append(toVertex)
else:
# else make a new vertex
self.vertex[fromVertex] = [toVertex]
def BFS(self, startVertex):
# Take a list for stoting already visited vertexes
visited = [False] * len(self.vertex)
# create a list to store all the vertexes for BFS
queue = []
# mark the source node as visited and enqueue it
visited[startVertex] = True
queue.append(startVertex)
while queue:
startVertex = queue.pop(0)
print(startVertex, end = ' ')
# mark all adjacent nodes as visited and print them
for i in self.vertex[startVertex]:
if visited[i] == False:
queue.append(i)
visited[i] = True
if __name__ == '__main__':
g = Graph()
g.addEdge(0, 1)
g.addEdge(0, 2)
g.addEdge(1, 2)
g.addEdge(2, 0)
g.addEdge(2, 3)
g.addEdge(3, 3)
g.printGraph()
print('BFS:')
g.BFS(2)
# OUTPUT:
# 0  ->  1 -> 2
# 1  ->  2
# 2  ->  0 -> 3
# 3  ->  3
# BFS:
# 2 0 3 1

View File

@ -1,72 +0,0 @@
class GRAPH:
"""docstring for GRAPH"""
def __init__(self, nodes):
self.nodes = nodes
self.graph = [[0]*nodes for i in range (nodes)]
self.visited = [0]*nodes
def show(self):
for i in self.graph:
for j in i:
print(j, end=' ')
print(' ')
def bfs(self,v):
visited = [False]*self.vertex
visited[v - 1] = True
print('%d visited' % (v))
queue = [v - 1]
while len(queue) > 0:
v = queue[0]
for u in range(self.vertex):
if self.graph[v][u] == 1:
if visited[u] is False:
visited[u] = True
queue.append(u)
print('%d visited' % (u +1))
queue.pop(0)
g = Graph(10)
g.add_edge(1,2)
g.add_edge(1,3)
g.add_edge(1,4)
g.add_edge(2,5)
g.add_edge(3,6)
g.add_edge(3,7)
g.add_edge(4,8)
g.add_edge(5,9)
g.add_edge(6,10)
g.bfs(4)
print(self.graph)
def add_edge(self, i, j):
self.graph[i][j]=1
self.graph[j][i]=1
def bfs(self, s):
queue = [s]
self.visited[s] = 1
while len(queue)!= 0:
x = queue.pop(0)
print(x)
for i in range(0, self.nodes):
if self.graph[x][i] == 1 and self.visited[i] == 0:
queue.append(i)
self.visited[i] = 1
n = int(input("Enter the number of Nodes : "))
g = GRAPH(n)
e = int(input("Enter the no of edges : "))
print("Enter the edges (u v)")
for i in range(0, e):
u ,v = map(int, raw_input().split())
g.add_edge(u, v)
s = int(input("Enter the source node :"))
g.bfs(s)

View File

@ -1,32 +0,0 @@
class GRAPH:
"""docstring for GRAPH"""
def __init__(self, nodes):
self.nodes=nodes
self.graph=[[0]*nodes for i in range (nodes)]
self.visited=[0]*nodes
def show(self):
print self.graph
def add_edge(self, i, j):
self.graph[i][j]=1
self.graph[j][i]=1
def dfs(self,s):
self.visited[s]=1
print(s)
for i in range(0,self.nodes):
if self.visited[i]==0 and self.graph[s][i]==1:
self.dfs(i)
n=int(input("Enter the number of Nodes : "))
g=GRAPH(n)
e=int(input("Enter the no of edges : "))
print("Enter the edges (u v)")
for i in range(0,e):
u,v=map(int, raw_input().split())
g.add_edge(u,v)
s=int(input("Enter the source node :"))
g.dfs(s)

View File

@ -0,0 +1,61 @@
# Author: OMKAR PATHAK
class Graph():
def __init__(self):
self.vertex = {}
# for printing the Graph vertexes
def printGraph(self):
print(self.vertex)
for i in self.vertex.keys():
print(i,' -> ', ' -> '.join([str(j) for j in self.vertex[i]]))
# for adding the edge beween two vertexes
def addEdge(self, fromVertex, toVertex):
# check if vertex is already present,
if fromVertex in self.vertex.keys():
self.vertex[fromVertex].append(toVertex)
else:
# else make a new vertex
self.vertex[fromVertex] = [toVertex]
def DFS(self):
# visited array for storing already visited nodes
visited = [False] * len(self.vertex)
# call the recursive helper function
for i in range(len(self.vertex)):
if visited[i] == False:
self.DFSRec(i, visited)
def DFSRec(self, startVertex, visited):
# mark start vertex as visited
visited[startVertex] = True
print(startVertex, end = ' ')
# Recur for all the vertexes that are adjacent to this node
for i in self.vertex.keys():
if visited[i] == False:
self.DFSRec(i, visited)
if __name__ == '__main__':
g = Graph()
g.addEdge(0, 1)
g.addEdge(0, 2)
g.addEdge(1, 2)
g.addEdge(2, 0)
g.addEdge(2, 3)
g.addEdge(3, 3)
g.printGraph()
print('DFS:')
g.DFS()
# OUTPUT:
# 0  ->  1 -> 2
# 1  ->  2
# 2  ->  0 -> 3
# 3  ->  3
# DFS:
# 0 1 2 3

View File

@ -0,0 +1,211 @@
# Title: Dijkstra's Algorithm for finding single source shortest path from scratch
# Author: Shubham Malik
# References: https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm
import math
import sys
# For storing the vertex set to retreive node with the lowest distance
class PriorityQueue:
# Based on Min Heap
def __init__(self):
self.cur_size = 0
self.array = []
self.pos = {} # To store the pos of node in array
def isEmpty(self):
return self.cur_size == 0
def min_heapify(self, idx):
lc = self.left(idx)
rc = self.right(idx)
if lc < self.cur_size and self.array(lc)[0] < self.array(idx)[0]:
smallest = lc
else:
smallest = idx
if rc < self.cur_size and self.array(rc)[0] < self.array(smallest)[0]:
smallest = rc
if smallest != idx:
self.swap(idx, smallest)
self.min_heapify(smallest)
def insert(self, tup):
# Inserts a node into the Priority Queue
self.pos[tup[1]] = self.cur_size
self.cur_size += 1
self.array.append((sys.maxsize, tup[1]))
self.decrease_key((sys.maxsize, tup[1]), tup[0])
def extract_min(self):
# Removes and returns the min element at top of priority queue
min_node = self.array[0][1]
self.array[0] = self.array[self.cur_size - 1]
self.cur_size -= 1
self.min_heapify(1)
del self.pos[min_node]
return min_node
def left(self, i):
# returns the index of left child
return 2 * i + 1
def right(self, i):
# returns the index of right child
return 2 * i + 2
def par(self, i):
# returns the index of parent
return math.floor(i / 2)
def swap(self, i, j):
# swaps array elements at indices i and j
# update the pos{}
self.pos[self.array[i][1]] = j
self.pos[self.array[j][1]] = i
temp = self.array[i]
self.array[i] = self.array[j]
self.array[j] = temp
def decrease_key(self, tup, new_d):
idx = self.pos[tup[1]]
# assuming the new_d is atmost old_d
self.array[idx] = (new_d, tup[1])
while idx > 0 and self.array[self.par(idx)][0] > self.array[idx][0]:
self.swap(idx, self.par(idx))
idx = self.par(idx)
class Graph:
def __init__(self, num):
self.adjList = {} # To store graph: u -> (v,w)
self.num_nodes = num # Number of nodes in graph
# To store the distance from source vertex
self.dist = [0] * self.num_nodes
self.par = [-1] * self.num_nodes # To store the path
def add_edge(self, u, v, w):
# Edge going from node u to v and v to u with weight w
# u (w)-> v, v (w) -> u
# Check if u already in graph
if u in self.adjList.keys():
self.adjList[u].append((v, w))
else:
self.adjList[u] = [(v, w)]
# Assuming undirected graph
if v in self.adjList.keys():
self.adjList[v].append((u, w))
else:
self.adjList[v] = [(u, w)]
def show_graph(self):
# u -> v(w)
for u in self.adjList:
print(u, '->', ' -> '.join(str("{}({})".format(v, w))
for v, w in self.adjList[u]))
def dijkstra(self, src):
# Flush old junk values in par[]
self.par = [-1] * self.num_nodes
# src is the source node
self.dist[src] = 0
Q = PriorityQueue()
Q.insert((0, src)) # (dist from src, node)
for u in self.adjList.keys():
if u != src:
self.dist[u] = sys.maxsize # Infinity
self.par[u] = -1
while not Q.isEmpty():
u = Q.extract_min() # Returns node with the min dist from source
# Update the distance of all the neighbours of u and
# if their prev dist was INFINITY then push them in Q
for v, w in self.adjList[u]:
new_dist = self.dist[u] + w
if self.dist[v] > new_dist:
if self.dist[v] == sys.maxsize:
Q.insert((new_dist, v))
else:
Q.decrease_key((self.dist[v], v), new_dist)
self.dist[v] = new_dist
self.par[v] = u
# Show the shortest distances from src
self.show_distances(src)
def show_distances(self, src):
print("Distance from node: {}".format(src))
for u in range(self.num_nodes):
print('Node {} has distance: {}'.format(u, self.dist[u]))
def show_path(self, src, dest):
# To show the shortest path from src to dest
# WARNING: Use it *after* calling dijkstra
path = []
cost = 0
temp = dest
# Backtracking from dest to src
while self.par[temp] != -1:
path.append(temp)
if temp != src:
for v, w in self.adjList[temp]:
if v == self.par[temp]:
cost += w
break
temp = self.par[temp]
path.append(src)
path.reverse()
print('----Path to reach {} from {}----'.format(dest, src))
for u in path:
print('{}'.format(u), end=' ')
if u != dest:
print('-> ', end='')
print('\nTotal cost of path: ', cost)
if __name__ == '__main__':
graph = Graph(9)
graph.add_edge(0, 1, 4)
graph.add_edge(0, 7, 8)
graph.add_edge(1, 2, 8)
graph.add_edge(1, 7, 11)
graph.add_edge(2, 3, 7)
graph.add_edge(2, 8, 2)
graph.add_edge(2, 5, 4)
graph.add_edge(3, 4, 9)
graph.add_edge(3, 5, 14)
graph.add_edge(4, 5, 10)
graph.add_edge(5, 6, 2)
graph.add_edge(6, 7, 1)
graph.add_edge(6, 8, 6)
graph.add_edge(7, 8, 7)
graph.show_graph()
graph.dijkstra(0)
graph.show_path(0, 4)
# OUTPUT
# 0 -> 1(4) -> 7(8)
# 1 -> 0(4) -> 2(8) -> 7(11)
# 7 -> 0(8) -> 1(11) -> 6(1) -> 8(7)
# 2 -> 1(8) -> 3(7) -> 8(2) -> 5(4)
# 3 -> 2(7) -> 4(9) -> 5(14)
# 8 -> 2(2) -> 6(6) -> 7(7)
# 5 -> 2(4) -> 3(14) -> 4(10) -> 6(2)
# 4 -> 3(9) -> 5(10)
# 6 -> 5(2) -> 7(1) -> 8(6)
# Distance from node: 0
# Node 0 has distance: 0
# Node 1 has distance: 4
# Node 2 has distance: 12
# Node 3 has distance: 19
# Node 4 has distance: 21
# Node 5 has distance: 11
# Node 6 has distance: 9
# Node 7 has distance: 8
# Node 8 has distance: 14
# ----Path to reach 4 from 0----
# 0 -> 7 -> 6 -> 5 -> 4
# Total cost of path: 21

View File

@ -3,22 +3,15 @@ class Node:#create a Node
self.data=data#given data
self.next=None#given next to None
class Linked_List:
pass
def insert_tail(Head,data):#insert the data at tail
tamp=Head#create a tamp as a head
if(tamp==None):#if linkedlist is empty
newNod=Node()#create newNode Node type and given data and next
newNod.data=data
newNod.next=None
Head=newNod
def insert_tail(Head,data):
if(Head.next is None):
Head.next = Node(data)
else:
while tamp.next!=None:#find the last Node
tamp=tamp.next
newNod = Node()#create a new node
newNod.data = data
newNod.next = None
tamp.next=newNod#put the newnode into last node
return Head#return first node of linked list
insert_tail(Head.next, data)
def insert_head(Head,data):
tamp = Head
if (tamp == None):
@ -32,16 +25,18 @@ class Linked_List:
newNod.next = Head#put the Head at NewNode Next
Head=newNod#make a NewNode to Head
return Head
def Print(Head):#print every node data
tamp=Node()
def printList(Head):#print every node data
tamp=Head
while tamp!=None:
print(tamp.data)
tamp=tamp.next
def delete_head(Head):#delete from head
if Head!=None:
Head=Head.next
return Head#return new Head
def delete_tail(Head):#delete from tail
if Head!=None:
tamp = Node()
@ -50,12 +45,6 @@ class Linked_List:
tamp = tamp.next
tamp.next=None#delete the last element by give next None to 2nd last Element
return Head
def isEmpty(Head):
if(Head==None):#check Head is None or Not
return True#return Ture if list is empty
else:
return False#check False if it's not empty
return Head is None #Return if Head is none

View File

@ -1,27 +0,0 @@
# Author: OMKAR PATHAK
import Stack
def parseParenthesis(string):
balanced = 1
index = 0
myStack = Stack.Stack(len(string))
while (index < len(string)) and (balanced == 1):
check = string[index]
if check == '(':
myStack.push(check)
else:
if myStack.isEmpty():
balanced = 0
else:
myStack.pop()
index += 1
if balanced == 1 and myStack.isEmpty():
return True
else:
return False
if __name__ == '__main__':
print(parseParenthesis('((()))')) # True
print(parseParenthesis('((())')) # False

View File

@ -1,48 +0,0 @@
# Author: OMKAR PATHAK
import Stack
def isOperand(char):
return (ord(char) >= ord('a') and ord(char) <= ord('z')) or (ord(char) >= ord('A') and ord(char) <= ord('Z'))
def precedence(char):
if char == '+' or char == '-':
return 1
elif char == '*' or char == '/':
return 2
elif char == '^':
return 3
else:
return -1
def infixToPostfix(myExp, myStack):
postFix = []
for i in range(len(myExp)):
if (isOperand(myExp[i])):
postFix.append(myExp[i])
elif(myExp[i] == '('):
myStack.push(myExp[i])
elif(myExp[i] == ')'):
topOperator = myStack.pop()
while(not myStack.isEmpty() and topOperator != '('):
postFix.append(topOperator)
topOperator = myStack.pop()
else:
while (not myStack.isEmpty()) and (precedence(myExp[i]) <= precedence(myStack.peek())):
postFix.append(myStack.pop())
myStack.push(myExp[i])
while(not myStack.isEmpty()):
postFix.append(myStack.pop())
return ' '.join(postFix)
if __name__ == '__main__':
myExp = 'a+b*(c^d-e)^(f+g*h)-i'
myExp = [i for i in myExp]
print('Infix:',' '.join(myExp))
myStack = Stack.Stack(len(myExp))
print('Postfix:',infixToPostfix(myExp, myStack))
# OUTPUT:
# Infix: a + b * ( c ^ d - e ) ^ ( f + g * h ) - i
# Postfix: a b c d ^ e - f g h * + ^ * + i -

View File

@ -1,50 +0,0 @@
# Author: OMKAR PATHAK
class Stack(object):
def __init__(self, limit = 10):
self.stack = []
self.limit = limit
# for printing the stack contents
def __str__(self):
return ' '.join([str(i) for i in self.stack])
# for pushing an element on to the stack
def push(self, data):
if len(self.stack) >= self.limit:
print('Stack Overflow')
else:
self.stack.append(data)
# for popping the uppermost element
def pop(self):
if len(self.stack) <= 0:
return -1
else:
return self.stack.pop()
# for peeking the top-most element of the stack
def peek(self):
if len(self.stack) <= 0:
return -1
else:
return self.stack[len(self.stack) - 1]
# to check if stack is empty
def isEmpty(self):
return self.stack == []
# for checking the size of stack
def size(self):
return len(self.stack)
if __name__ == '__main__':
myStack = Stack()
for i in range(10):
myStack.push(i)
print(myStack)
myStack.pop() # popping the top element
print(myStack)
myStack.peek() # printing the top element
myStack.isEmpty()
myStack.size()

View File

@ -0,0 +1,21 @@
from Stack import Stack
__author__ = 'Omkar Pathak'
def balanced_parentheses(parentheses):
""" Use a stack to check if a string of parentheses are balanced."""
stack = Stack(len(parentheses))
for parenthesis in parentheses:
if parenthesis == '(':
stack.push(parenthesis)
elif parenthesis == ')':
stack.pop()
return not stack.is_empty()
if __name__ == '__main__':
examples = ['((()))', '((())']
print('Balanced parentheses demonstration:\n')
for example in examples:
print(example + ': ' + str(balanced_parentheses(example)))

View File

@ -0,0 +1,62 @@
import string
from Stack import Stack
__author__ = 'Omkar Pathak'
def is_operand(char):
return char in string.ascii_letters or char in string.digits
def precedence(char):
""" Return integer value representing an operator's precedence, or
order of operation.
https://en.wikipedia.org/wiki/Order_of_operations
"""
dictionary = {'+': 1, '-': 1,
'*': 2, '/': 2,
'^': 3}
return dictionary.get(char, -1)
def infix_to_postfix(expression):
""" Convert infix notation to postfix notation using the Shunting-yard
algorithm.
https://en.wikipedia.org/wiki/Shunting-yard_algorithm
https://en.wikipedia.org/wiki/Infix_notation
https://en.wikipedia.org/wiki/Reverse_Polish_notation
"""
stack = Stack(len(expression))
postfix = []
for char in expression:
if is_operand(char):
postfix.append(char)
elif char not in {'(', ')'}:
while (not stack.is_empty()
and precedence(char) <= precedence(stack.peek())):
postfix.append(stack.pop())
stack.push(char)
elif char == '(':
stack.push(char)
elif char == ')':
while not stack.is_empty() and stack.peek() != '(':
postfix.append(stack.pop())
# Pop '(' from stack. If there is no '(', there is a mismatched
# parentheses.
if stack.peek() != '(':
raise ValueError('Mismatched parentheses')
stack.pop()
while not stack.is_empty():
postfix.append(stack.pop())
return ' '.join(postfix)
if __name__ == '__main__':
expression = 'a+b*(c^d-e)^(f+g*h)-i'
print('Infix to Postfix Notation demonstration:\n')
print('Infix notation: ' + expression)
print('Postfix notation: ' + infix_to_postfix(expression))

View File

@ -0,0 +1,16 @@
# Function to print element and NGE pair for all elements of list
def printNGE(arr):
for i in range(0, len(arr), 1):
next = -1
for j in range(i+1, len(arr), 1):
if arr[i] < arr[j]:
next = arr[j]
break
print(str(arr[i]) + " -- " + str(next))
# Driver program to test above function
arr = [11,13,21,3]
printNGE(arr)

View File

@ -0,0 +1,68 @@
__author__ = 'Omkar Pathak'
class Stack(object):
""" A stack is an abstract data type that serves as a collection of
elements with two principal operations: push() and pop(). push() adds an
element to the top of the stack, and pop() removes an element from the top
of a stack. The order in which elements come off of a stack are
Last In, First Out (LIFO).
https://en.wikipedia.org/wiki/Stack_(abstract_data_type)
"""
def __init__(self, limit=10):
self.stack = []
self.limit = limit
def __bool__(self):
return not bool(self.stack)
def __str__(self):
return str(self.stack)
def push(self, data):
""" Push an element to the top of the stack."""
if len(self.stack) >= self.limit:
raise StackOverflowError
self.stack.append(data)
def pop(self):
""" Pop an element off of the top of the stack."""
if self.stack:
return self.stack.pop()
else:
raise IndexError('pop from an empty stack')
def peek(self):
""" Peek at the top-most element of the stack."""
if self.stack:
return self.stack[-1]
def is_empty(self):
""" Check if a stack is empty."""
return not bool(self.stack)
def size(self):
""" Return the size of the stack."""
return len(self.stack)
class StackOverflowError(BaseException):
pass
if __name__ == '__main__':
stack = Stack()
for i in range(10):
stack.push(i)
print('Stack demonstration:\n')
print('Initial stack: ' + str(stack))
print('pop(): ' + str(stack.pop()))
print('After pop(), the stack is now: ' + str(stack))
print('peek(): ' + str(stack.peek()))
stack.push(100)
print('After push(100), the stack is now: ' + str(stack))
print('is_empty(): ' + str(stack.is_empty()))
print('size(): ' + str(stack.size()))

View File

@ -0,0 +1,42 @@
"""
This program calculates the nth Fibonacci number in O(log(n)).
It's possible to calculate F(1000000) in less than a second.
"""
import sys
# returns F(n)
def fibonacci(n: int):
if n < 0:
raise ValueError("Negative arguments are not supported")
return _fib(n)[0]
# returns (F(n), F(n-1))
def _fib(n: int):
if n == 0:
# (F(0), F(1))
return (0, 1)
else:
# F(2n) = F(n)[2F(n+1) F(n)]
# F(2n+1) = F(n+1)^2+F(n)^2
a, b = _fib(n // 2)
c = a * (b * 2 - a)
d = a * a + b * b
if n % 2 == 0:
return (c, d)
else:
return (d, c + d)
if __name__ == "__main__":
args = sys.argv[1:]
if len(args) != 1:
print("Too few or too much parameters given.")
exit(1)
try:
n = int(args[0])
except ValueError:
print("Could not convert data to an integer.")
exit(1)
print("F(%d) = %d" % (n, fibonacci(n)))

View File

@ -30,7 +30,7 @@ if __name__ == '__main__':
import sys
print("\n********* Fibonacci Series Using Dynamic Programming ************\n")
# For python 2.x and 3.x compatibility: 3.x has not raw_input builtin
# For python 2.x and 3.x compatibility: 3.x has no raw_input builtin
# otherwise 2.x's input builtin function is too "smart"
if sys.version_info.major < 3:
input_function = raw_input

View File

@ -0,0 +1,40 @@
#############################
# Author: Aravind Kashyap
# File: lis.py
# comments: This programme outputs the Longest Strictly Increasing Subsequence in O(NLogN)
# Where N is the Number of elements in the list
#############################
def CeilIndex(v,l,r,key):
while r-l > 1:
m = (l + r)/2
if v[m] >= key:
r = m
else:
l = m
return r
def LongestIncreasingSubsequenceLength(v):
if(len(v) == 0):
return 0
tail = [0]*len(v)
length = 1
tail[0] = v[0]
for i in range(1,len(v)):
if v[i] < tail[0]:
tail[0] = v[i]
elif v[i] > tail[length-1]:
tail[length] = v[i]
length += 1
else:
tail[CeilIndex(tail,-1,length-1,v[i])] = v[i]
return length
v = [2, 5, 3, 7, 11, 8, 10, 13, 6]
print LongestIncreasingSubsequenceLength(v)

View File

@ -0,0 +1,172 @@
'''README, Author - Anurag Kumar(mailto:anuragkumarak95@gmail.com)
Requirements:
- sklearn
- numpy
- matplotlib
Python:
- 3.5
Inputs:
- X , a 2D numpy array of features.
- k , number of clusters to create.
- initial_centroids , initial centroid values generated by utility function(mentioned in usage).
- maxiter , maximum number of iterations to process.
- heterogeneity , empty list that will be filled with hetrogeneity values if passed to kmeans func.
Usage:
1. define 'k' value, 'X' features array and 'hetrogeneity' empty list
2. create initial_centroids,
initial_centroids = get_initial_centroids(
X,
k,
seed=0 # seed value for initial centroid generation, None for randomness(default=None)
)
3. find centroids and clusters using kmeans function.
centroids, cluster_assignment = kmeans(
X,
k,
initial_centroids,
maxiter=400,
record_heterogeneity=heterogeneity,
verbose=True # whether to print logs in console or not.(default=False)
)
4. Plot the loss function, hetrogeneity values for every iteration saved in hetrogeneity list.
plot_heterogeneity(
heterogeneity,
k
)
5. Have fun..
'''
from sklearn.metrics import pairwise_distances
import numpy as np
TAG = 'K-MEANS-CLUST/ '
def get_initial_centroids(data, k, seed=None):
'''Randomly choose k data points as initial centroids'''
if seed is not None: # useful for obtaining consistent results
np.random.seed(seed)
n = data.shape[0] # number of data points
# Pick K indices from range [0, N).
rand_indices = np.random.randint(0, n, k)
# Keep centroids as dense format, as many entries will be nonzero due to averaging.
# As long as at least one document in a cluster contains a word,
# it will carry a nonzero weight in the TF-IDF vector of the centroid.
centroids = data[rand_indices,:]
return centroids
def centroid_pairwise_dist(X,centroids):
return pairwise_distances(X,centroids,metric='euclidean')
def assign_clusters(data, centroids):
# Compute distances between each data point and the set of centroids:
# Fill in the blank (RHS only)
distances_from_centroids = centroid_pairwise_dist(data,centroids)
# Compute cluster assignments for each data point:
# Fill in the blank (RHS only)
cluster_assignment = np.argmin(distances_from_centroids,axis=1)
return cluster_assignment
def revise_centroids(data, k, cluster_assignment):
new_centroids = []
for i in range(k):
# Select all data points that belong to cluster i. Fill in the blank (RHS only)
member_data_points = data[cluster_assignment==i]
# Compute the mean of the data points. Fill in the blank (RHS only)
centroid = member_data_points.mean(axis=0)
new_centroids.append(centroid)
new_centroids = np.array(new_centroids)
return new_centroids
def compute_heterogeneity(data, k, centroids, cluster_assignment):
heterogeneity = 0.0
for i in range(k):
# Select all data points that belong to cluster i. Fill in the blank (RHS only)
member_data_points = data[cluster_assignment==i, :]
if member_data_points.shape[0] > 0: # check if i-th cluster is non-empty
# Compute distances from centroid to data points (RHS only)
distances = pairwise_distances(member_data_points, [centroids[i]], metric='euclidean')
squared_distances = distances**2
heterogeneity += np.sum(squared_distances)
return heterogeneity
from matplotlib import pyplot as plt
def plot_heterogeneity(heterogeneity, k):
plt.figure(figsize=(7,4))
plt.plot(heterogeneity, linewidth=4)
plt.xlabel('# Iterations')
plt.ylabel('Heterogeneity')
plt.title('Heterogeneity of clustering over time, K={0:d}'.format(k))
plt.rcParams.update({'font.size': 16})
plt.show()
def kmeans(data, k, initial_centroids, maxiter=500, record_heterogeneity=None, verbose=False):
'''This function runs k-means on given data and initial set of centroids.
maxiter: maximum number of iterations to run.(default=500)
record_heterogeneity: (optional) a list, to store the history of heterogeneity as function of iterations
if None, do not store the history.
verbose: if True, print how many data points changed their cluster labels in each iteration'''
centroids = initial_centroids[:]
prev_cluster_assignment = None
for itr in range(maxiter):
if verbose:
print(itr, end='')
# 1. Make cluster assignments using nearest centroids
cluster_assignment = assign_clusters(data,centroids)
# 2. Compute a new centroid for each of the k clusters, averaging all data points assigned to that cluster.
centroids = revise_centroids(data,k, cluster_assignment)
# Check for convergence: if none of the assignments changed, stop
if prev_cluster_assignment is not None and \
(prev_cluster_assignment==cluster_assignment).all():
break
# Print number of new assignments
if prev_cluster_assignment is not None:
num_changed = np.sum(prev_cluster_assignment!=cluster_assignment)
if verbose:
print(' {0:5d} elements changed their cluster assignment.'.format(num_changed))
# Record heterogeneity convergence metric
if record_heterogeneity is not None:
# YOUR CODE HERE
score = compute_heterogeneity(data,k,centroids,cluster_assignment)
record_heterogeneity.append(score)
prev_cluster_assignment = cluster_assignment[:]
return centroids, cluster_assignment
# Mock test below
if False: # change to true to run this test case.
import sklearn.datasets as ds
dataset = ds.load_iris()
k = 3
heterogeneity = []
initial_centroids = get_initial_centroids(dataset['data'], k, seed=0)
centroids, cluster_assignment = kmeans(dataset['data'], k, initial_centroids, maxiter=400,
record_heterogeneity=heterogeneity, verbose=True)
plot_heterogeneity(heterogeneity, k)

View File

@ -0,0 +1,34 @@
__author__ = "Tobias Carryer"
from time import time
class LinearCongruentialGenerator(object):
"""
A pseudorandom number generator.
"""
def __init__( self, multiplier, increment, modulo, seed=int(time()) ):
"""
These parameters are saved and used when nextNumber() is called.
modulo is the largest number that can be generated (exclusive). The most
efficent values are powers of 2. 2^32 is a common value.
"""
self.multiplier = multiplier
self.increment = increment
self.modulo = modulo
self.seed = seed
def next_number( self ):
"""
The smallest number that can be generated is zero.
The largest number that can be generated is modulo-1. modulo is set in the constructor.
"""
self.seed = (self.multiplier * self.seed + self.increment) % self.modulo
return self.seed
if __name__ == "__main__":
# Show the LCG in action.
lcg = LinearCongruentialGenerator(1664525, 1013904223, 2<<31)
while True :
print lcg.next_number()

View File

@ -0,0 +1,49 @@
"""
* Binary Exponentiation for Powers
* This is a method to find a^b in a time complexity of O(log b)
* This is one of the most commonly used methods of finding powers.
* Also useful in cases where solution to (a^b)%c is required,
* where a,b,c can be numbers over the computers calculation limits.
* Done using iteration, can also be done using recursion
* @author chinmoy159
* @version 1.0 dated 10/08/2017
"""
def b_expo(a, b):
res = 1
while b > 0:
if b&1:
res *= a
a *= a
b >>= 1
return res
def b_expo_mod(a, b, c):
res = 1
while b > 0:
if b&1:
res = ((res%c) * (a%c)) % c
a *= a
b >>= 1
return res
"""
* Wondering how this method works !
* It's pretty simple.
* Let's say you need to calculate a ^ b
* RULE 1 : a ^ b = (a*a) ^ (b/2) ---- example : 4 ^ 4 = (4*4) ^ (4/2) = 16 ^ 2
* RULE 2 : IF b is ODD, then ---- a ^ b = a * (a ^ (b - 1)) :: where (b - 1) is even.
* Once b is even, repeat the process to get a ^ b
* Repeat the process till b = 1 OR b = 0, because a^1 = a AND a^0 = 1
*
* As far as the modulo is concerned,
* the fact : (a*b) % c = ((a%c) * (b%c)) % c
* Now apply RULE 1 OR 2 whichever is required.
"""

View File

@ -0,0 +1,50 @@
"""
* Binary Exponentiation with Multiplication
* This is a method to find a*b in a time complexity of O(log b)
* This is one of the most commonly used methods of finding result of multiplication.
* Also useful in cases where solution to (a*b)%c is required,
* where a,b,c can be numbers over the computers calculation limits.
* Done using iteration, can also be done using recursion
* @author chinmoy159
* @version 1.0 dated 10/08/2017
"""
def b_expo(a, b):
res = 0
while b > 0:
if b&1:
res += a
a += a
b >>= 1
return res
def b_expo_mod(a, b, c):
res = 0
while b > 0:
if b&1:
res = ((res%c) + (a%c)) % c
a += a
b >>= 1
return res
"""
* Wondering how this method works !
* It's pretty simple.
* Let's say you need to calculate a ^ b
* RULE 1 : a * b = (a+a) * (b/2) ---- example : 4 * 4 = (4+4) * (4/2) = 8 * 2
* RULE 2 : IF b is ODD, then ---- a * b = a + (a * (b - 1)) :: where (b - 1) is even.
* Once b is even, repeat the process to get a * b
* Repeat the process till b = 1 OR b = 0, because a*1 = a AND a*0 = 0
*
* As far as the modulo is concerned,
* the fact : (a+b) % c = ((a%c) + (b%c)) % c
* Now apply RULE 1 OR 2, whichever is required.
"""

18
other/euclidean_gcd.py Normal file
View File

@ -0,0 +1,18 @@
# https://en.wikipedia.org/wiki/Euclidean_algorithm
def euclidean_gcd(a, b):
while b:
t = b
b = a % b
a = t
return a
def main():
print("GCD(3, 5) = " + str(euclidean_gcd(3, 5)))
print("GCD(5, 3) = " + str(euclidean_gcd(5, 3)))
print("GCD(1, 3) = " + str(euclidean_gcd(1, 3)))
print("GCD(3, 6) = " + str(euclidean_gcd(3, 6)))
print("GCD(6, 3) = " + str(euclidean_gcd(6, 3)))
if __name__ == '__main__':
main()

View File

@ -113,7 +113,7 @@ def binary_search_by_recursion(sorted_collection, item, left, right):
return binary_search_by_recursion(sorted_collection, item, left, midpoint-1)
else:
return binary_search_by_recursion(sorted_collection, item, midpoint+1, right)
def __assert_sorted(collection):
"""Check if collection is sorted, if not - raises :py:class:`ValueError`
@ -137,14 +137,14 @@ def __assert_sorted(collection):
if __name__ == '__main__':
import sys
# For python 2.x and 3.x compatibility: 3.x has not raw_input builtin
# For python 2.x and 3.x compatibility: 3.x has no raw_input builtin
# otherwise 2.x's input builtin function is too "smart"
if sys.version_info.major < 3:
input_function = raw_input
else:
input_function = input
user_input = input_function('Enter numbers separated by coma:\n')
user_input = input_function('Enter numbers separated by comma:\n')
collection = [int(item) for item in user_input.split(',')]
try:
__assert_sorted(collection)

View File

@ -0,0 +1,102 @@
"""
This is pure python implementation of interpolation search algorithm
"""
from __future__ import print_function
import bisect
def interpolation_search(sorted_collection, item):
"""Pure implementation of interpolation search algorithm in Python
Be careful collection must be sorted, otherwise result will be
unpredictable
:param sorted_collection: some sorted collection with comparable items
:param item: item value to search
:return: index of found item or None if item is not found
"""
left = 0
right = len(sorted_collection) - 1
while left <= right:
point = left + ((item - sorted_collection[left]) * (right - left)) // (sorted_collection[right] - sorted_collection[left])
#out of range check
if point<0 or point>=len(sorted_collection):
return None
current_item = sorted_collection[point]
if current_item == item:
return point
else:
if item < current_item:
right = point - 1
else:
left = point + 1
return None
def interpolation_search_by_recursion(sorted_collection, item, left, right):
"""Pure implementation of interpolation search algorithm in Python by recursion
Be careful collection must be sorted, otherwise result will be
unpredictable
First recursion should be started with left=0 and right=(len(sorted_collection)-1)
:param sorted_collection: some sorted collection with comparable items
:param item: item value to search
:return: index of found item or None if item is not found
"""
point = left + ((item - sorted_collection[left]) * (right - left)) // (sorted_collection[right] - sorted_collection[left])
#out of range check
if point<0 or point>=len(sorted_collection):
return None
if sorted_collection[point] == item:
return point
elif sorted_collection[point] > item:
return interpolation_search_by_recursion(sorted_collection, item, left, point-1)
else:
return interpolation_search_by_recursion(sorted_collection, item, point+1, right)
def __assert_sorted(collection):
"""Check if collection is sorted, if not - raises :py:class:`ValueError`
:param collection: collection
:return: True if collection is sorted
:raise: :py:class:`ValueError` if collection is not sorted
Examples:
>>> __assert_sorted([0, 1, 2, 4])
True
>>> __assert_sorted([10, -1, 5])
Traceback (most recent call last):
...
ValueError: Collection must be sorted
"""
if collection != sorted(collection):
raise ValueError('Collection must be sorted')
return True
if __name__ == '__main__':
import sys
# For python 2.x and 3.x compatibility: 3.x has no raw_input builtin
# otherwise 2.x's input builtin function is too "smart"
if sys.version_info.major < 3:
input_function = raw_input
else:
input_function = input
user_input = input_function('Enter numbers separated by comma:\n')
collection = [int(item) for item in user_input.split(',')]
try:
__assert_sorted(collection)
except ValueError:
sys.exit('Sequence must be sorted to apply interpolation search')
target_input = input_function(
'Enter a single number to be found in the list:\n'
)
target = int(target_input)
result = interpolation_search(collection, target)
if result is not None:
print('{} found at positions: {}'.format(target, result))
else:
print('Not found')

View File

@ -41,7 +41,7 @@ def linear_search(sequence, target):
if __name__ == '__main__':
import sys
# For python 2.x and 3.x compatibility: 3.x has not raw_input builtin
# For python 2.x and 3.x compatibility: 3.x has no raw_input builtin
# otherwise 2.x's input builtin function is too "smart"
if sys.version_info.major < 3:
input_function = raw_input

112
searches/ternary_search.py Normal file
View File

@ -0,0 +1,112 @@
'''
This is a type of divide and conquer algorithm which divides the search space into
3 parts and finds the target value based on the property of the array or list
(usually monotonic property).
Time Complexity : O(log3 N)
Space Complexity : O(1)
'''
import sys
# This is the precision for this function which can be altered.
# It is recommended for users to keep this number greater than or equal to 10.
precision = 10
# This is the linear search that will occur after the search space has become smaller.
def lin_search(left, right, A, target):
for i in range(left, right+1):
if(A[i] == target):
return i
# This is the iterative method of the ternary search algorithm.
def ite_ternary_search(A, target):
left = 0
right = len(A) - 1;
while(True):
if(left<right):
if(right-left < precision):
return lin_search(left,right,A,target)
oneThird = (left+right)/3+1;
twoThird = 2*(left+right)/3+1;
if(A[oneThird] == target):
return oneThird
elif(A[twoThird] == target):
return twoThird
elif(target < A[oneThird]):
right = oneThird-1
elif(A[twoThird] < target):
left = twoThird+1
else:
left = oneThird+1
right = twoThird-1
else:
return None
# This is the recursive method of the ternary search algorithm.
def rec_ternary_search(left, right, A, target):
if(left<right):
if(right-left < precision):
return lin_search(left,right,A,target)
oneThird = (left+right)/3+1;
twoThird = 2*(left+right)/3+1;
if(A[oneThird] == target):
return oneThird
elif(A[twoThird] == target):
return twoThird
elif(target < A[oneThird]):
return rec_ternary_search(left, oneThird-1, A, target)
elif(A[twoThird] < target):
return rec_ternary_search(twoThird+1, right, A, target)
else:
return rec_ternary_search(oneThird+1, twoThird-1, A, target)
else:
return None
# This function is to check if the array is sorted.
def __assert_sorted(collection):
if collection != sorted(collection):
raise ValueError('Collection must be sorted')
return True
if __name__ == '__main__':
# For python 2.x and 3.x compatibility: 3.x has not raw_input builtin
# otherwise 2.x's input builtin function is too "smart"
if sys.version_info.major < 3:
input_function = raw_input
else:
input_function = input
user_input = input_function('Enter numbers separated by coma:\n')
collection = [int(item) for item in user_input.split(',')]
try:
__assert_sorted(collection)
except ValueError:
sys.exit('Sequence must be sorted to apply the ternary search')
target_input = input_function(
'Enter a single number to be found in the list:\n'
)
target = int(target_input)
result1 = ite_ternary_search(collection, target)
result2 = rec_ternary_search(0, len(collection)-1, collection, target)
if result2 is not None:
print('Iterative search: {} found at positions: {}'.format(target, result1))
print('Recursive search: {} found at positions: {}'.format(target, result2))
else:
print('Not found')

View File

@ -41,7 +41,7 @@ def bogosort(collection):
if __name__ == '__main__':
import sys
# For python 2.x and 3.x compatibility: 3.x has not raw_input builtin
# For python 2.x and 3.x compatibility: 3.x has no raw_input builtin
# otherwise 2.x's input builtin function is too "smart"
if sys.version_info.major < 3:
input_function = raw_input

View File

@ -41,7 +41,7 @@ def bubble_sort(collection):
if __name__ == '__main__':
import sys
# For python 2.x and 3.x compatibility: 3.x has not raw_input builtin
# For python 2.x and 3.x compatibility: 3.x has no raw_input builtin
# otherwise 2.x's input builtin function is too "smart"
if sys.version_info.major < 3:
input_function = raw_input

View File

@ -23,7 +23,7 @@ def cocktail_shaker_sort(unsorted):
if __name__ == '__main__':
import sys
# For python 2.x and 3.x compatibility: 3.x has not raw_input builtin
# For python 2.x and 3.x compatibility: 3.x has no raw_input builtin
# otherwise 2.x's input builtin function is too "smart"
if sys.version_info.major < 3:
input_function = raw_input

72
sorts/counting_sort.py Normal file
View File

@ -0,0 +1,72 @@
"""
This is pure python implementation of counting sort algorithm
For doctests run following command:
python -m doctest -v counting_sort.py
or
python3 -m doctest -v counting_sort.py
For manual testing run:
python counting_sort.py
"""
from __future__ import print_function
def counting_sort(collection):
"""Pure implementation of counting sort algorithm in Python
:param collection: some mutable ordered collection with heterogeneous
comparable items inside
:return: the same collection ordered by ascending
Examples:
>>> counting_sort([0, 5, 3, 2, 2])
[0, 2, 2, 3, 5]
>>> counting_sort([])
[]
>>> counting_sort([-2, -5, -45])
[-45, -5, -2]
"""
# if the collection is empty, returns empty
if collection == []:
return []
# get some information about the collection
coll_len = len(collection)
coll_max = max(collection)
coll_min = min(collection)
# create the counting array
counting_arr_length = coll_max + 1 - coll_min
counting_arr = [0] * counting_arr_length
# count how much a number appears in the collection
for number in collection:
counting_arr[number - coll_min] += 1
# sum each position with it's predecessors. now, counting_arr[i] tells
# us how many elements <= i has in the collection
for i in range(1, counting_arr_length):
counting_arr[i] = counting_arr[i] + counting_arr[i-1]
# create the output collection
ordered = [0] * coll_len
# place the elements in the output, respecting the original order (stable
# sort) from end to begin, updating counting_arr
for i in reversed(range(0, coll_len)):
ordered[counting_arr[collection[i] - coll_min]-1] = collection[i]
counting_arr[collection[i] - coll_min] -= 1
return ordered
if __name__ == '__main__':
import sys
# For python 2.x and 3.x compatibility: 3.x has not raw_input builtin
# otherwise 2.x's input builtin function is too "smart"
if sys.version_info.major < 3:
input_function = raw_input
else:
input_function = input
user_input = input_function('Enter numbers separated by a comma:\n')
unsorted = [int(item) for item in user_input.split(',')]
print(counting_sort(unsorted))

View File

@ -21,7 +21,7 @@ def gnome_sort(unsorted):
if __name__ == '__main__':
import sys
# For python 2.x and 3.x compatibility: 3.x has not raw_input builtin
# For python 2.x and 3.x compatibility: 3.x has no raw_input builtin
# otherwise 2.x's input builtin function is too "smart"
if sys.version_info.major < 3:
input_function = raw_input

View File

@ -41,7 +41,7 @@ def insertion_sort(collection):
if __name__ == '__main__':
import sys
# For python 2.x and 3.x compatibility: 3.x has not raw_input builtin
# For python 2.x and 3.x compatibility: 3.x has no raw_input builtin
# otherwise 2.x's input builtin function is too "smart"
if sys.version_info.major < 3:
input_function = raw_input

View File

@ -64,7 +64,7 @@ def merge_sort(collection):
if __name__ == '__main__':
import sys
# For python 2.x and 3.x compatibility: 3.x has not raw_input builtin
# For python 2.x and 3.x compatibility: 3.x has no raw_input builtin
# otherwise 2.x's input builtin function is too "smart"
if sys.version_info.major < 3:
input_function = raw_input

View File

@ -42,7 +42,7 @@ def quick_sort(ARRAY):
if __name__ == '__main__':
import sys
# For python 2.x and 3.x compatibility: 3.x has not raw_input builtin
# For python 2.x and 3.x compatibility: 3.x has no raw_input builtin
# otherwise 2.x's input builtin function is too "smart"
if sys.version_info.major < 3:
input_function = raw_input

View File

@ -2,19 +2,20 @@ def radixsort(lst):
RADIX = 10
maxLength = False
tmp , placement = -1, 1
while not maxLength:
maxLength = True
# declare and initialize buckets
buckets = [list() for _ in range( RADIX )]
# split lst between lists
for i in lst:
tmp = int((i / placement) % RADIX)
buckets[tmp].append(i)
if maxLength and tmp > 0:
maxLength = False
# empty lists into lst array
a = 0
for b in range( RADIX ):
@ -22,6 +23,6 @@ def radixsort(lst):
for i in buck:
lst[a] = i
a += 1
# move to next
placement *= RADIX

View File

@ -44,7 +44,7 @@ def selection_sort(collection):
if __name__ == '__main__':
import sys
# For python 2.x and 3.x compatibility: 3.x has not raw_input builtin
# For python 2.x and 3.x compatibility: 3.x has no raw_input builtin
# otherwise 2.x's input builtin function is too "smart"
if sys.version_info.major < 3:
input_function = raw_input

View File

@ -45,7 +45,7 @@ def shell_sort(collection):
if __name__ == '__main__':
import sys
# For python 2.x and 3.x compatibility: 3.x has not raw_input builtin
# For python 2.x and 3.x compatibility: 3.x has no raw_input builtin
# otherwise 2.x's input builtin function is too "smart"
if sys.version_info.major < 3:
input_function = raw_input

81
sorts/timsort.py Normal file
View File

@ -0,0 +1,81 @@
def binary_search(lst, item, start, end):
if start == end:
if lst[start] > item:
return start
else:
return start + 1
if start > end:
return start
mid = (start + end) // 2
if lst[mid] < item:
return binary_search(lst, item, mid + 1, end)
elif lst[mid] > item:
return binary_search(lst, item, start, mid - 1)
else:
return mid
def insertion_sort(lst):
length = len(lst)
for index in range(1, length):
value = lst[index]
pos = binary_search(lst, value, 0, index - 1)
lst = lst[:pos] + [value] + lst[pos:index] + lst[index+1:]
return lst
def merge(left, right):
if not left:
return right
if not right:
return left
if left[0] < right[0]:
return [left[0]] + merge(left[1:], right)
return [right[0]] + merge(left, right[1:])
def timsort(lst):
runs, sorted_runs = [], []
length = len(lst)
new_run = [lst[0]]
sorted_array = []
for i in range(1, length):
if i == length - 1:
new_run.append(lst[i])
runs.append(new_run)
break
if lst[i] < lst[i - 1]:
if not new_run:
runs.append([lst[i - 1]])
new_run.append(lst[i])
else:
runs.append(new_run)
new_run = []
else:
new_run.append(lst[i])
for run in runs:
sorted_runs.append(insertion_sort(run))
for run in sorted_runs:
sorted_array = merge(sorted_array, run)
return sorted_array
def main():
lst = [5,9,10,3,-4,5,178,92,46,-18,0,7]
sorted_lst = timsort(lst)
print(sorted_lst)
if __name__ == '__main__':
main()

View File

@ -84,7 +84,7 @@ if __name__ == '__main__':
import sys
print("\n********* Binary Tree Traversals ************\n")
# For python 2.x and 3.x compatibility: 3.x has not raw_input builtin
# For python 2.x and 3.x compatibility: 3.x has no raw_input builtin
# otherwise 2.x's input builtin function is too "smart"
if sys.version_info.major < 3:
input_function = raw_input