diff --git a/Graphs/A*.py b/Graphs/A*.py new file mode 100644 index 000000000..2ca9476e5 --- /dev/null +++ b/Graphs/A*.py @@ -0,0 +1,101 @@ + +grid = [[0, 1, 0, 0, 0, 0], + [0, 1, 0, 0, 0, 0],#0 are free path whereas 1's are obstacles + [0, 1, 0, 0, 0, 0], + [0, 1, 0, 0, 1, 0], + [0, 0, 0, 0, 1, 0]] + +''' +heuristic = [[9, 8, 7, 6, 5, 4], + [8, 7, 6, 5, 4, 3], + [7, 6, 5, 4, 3, 2], + [6, 5, 4, 3, 2, 1], + [5, 4, 3, 2, 1, 0]]''' + +init = [0, 0] +goal = [len(grid)-1, len(grid[0])-1] #all coordinates are given in format [y,x] +cost = 1 + +#the cost map which pushes the path closer to the goal +heuristic = [[0 for row in range(len(grid[0]))] for col in range(len(grid))] +for i in range(len(grid)): + for j in range(len(grid[0])): + heuristic[i][j] = abs(i - goal[0]) + abs(j - goal[1]) + if grid[i][j] == 1: + heuristic[i][j] = 99 #added extra penalty in the heuristic map + + +#the actions we can take +delta = [[-1, 0 ], # go up + [ 0, -1], # go left + [ 1, 0 ], # go down + [ 0, 1 ]] # go right + + +#function to search the path +def search(grid,init,goal,cost,heuristic): + + closed = [[0 for col in range(len(grid[0]))] for row in range(len(grid))]# the referrence grid + closed[init[0]][init[1]] = 1 + action = [[0 for col in range(len(grid[0]))] for row in range(len(grid))]#the action grid + + x = init[0] + y = init[1] + g = 0 + f = g + heuristic[init[0]][init[0]] + cell = [[f, g, x, y]] + + found = False # flag that is set when search is complete + resign = False # flag set if we can't find expand + + while not found and not resign: + if len(cell) == 0: + resign = True + return "FAIL" + else: + cell.sort()#to choose the least costliest action so as to move closer to the goal + cell.reverse() + next = cell.pop() + x = next[2] + y = next[3] + g = next[1] + f = next[0] + + + if x == goal[0] and y == goal[1]: + found = True + else: + for i in range(len(delta)):#to try out different valid actions + x2 = x + delta[i][0] + y2 = y + delta[i][1] + if x2 >= 0 and x2 < len(grid) and y2 >=0 and y2 < len(grid[0]): + if closed[x2][y2] == 0 and grid[x2][y2] == 0: + g2 = g + cost + f2 = g2 + heuristic[x2][y2] + cell.append([f2, g2, x2, y2]) + closed[x2][y2] = 1 + action[x2][y2] = i + invpath = [] + x = goal[0] + y = goal[1] + invpath.append([x, y])#we get the reverse path from here + while x != init[0] or y != init[1]: + x2 = x - delta[action[x][y]][0] + y2 = y - delta[action[x][y]][1] + x = x2 + y = y2 + invpath.append([x, y]) + + path = [] + for i in range(len(invpath)): + path.append(invpath[len(invpath) - 1 - i]) + print "ACTION MAP" + for i in range(len(action)): + print action[i] + + return path + +a = search(grid,init,goal,cost,heuristic) +for i in range(len(a)): + print a[i] + diff --git a/Neural_Network/convolution_neural_network.py b/Neural_Network/convolution_neural_network.py new file mode 100644 index 000000000..d8ab0d2e5 --- /dev/null +++ b/Neural_Network/convolution_neural_network.py @@ -0,0 +1,305 @@ +#-*- coding: utf-8 -*- + +''' + - - - - - -- - - - - - - - - - - - - - - - - - - - - - - + Name - - CNN - Convolution Neural Network For Photo Recognizing + Goal - - Recognize Handing Writting Word Photo + Detail:Total 5 layers neural network + * Convolution layer + * Pooling layer + * Input layer layer of BP + * Hiden layer of BP + * Output layer of BP + Author: Stephen Lee + Github: 245885195@qq.com + Date: 2017.9.20 + - - - - - -- - - - - - - - - - - - - - - - - - - - - - - + ''' + +import numpy as np +import matplotlib.pyplot as plt + +class CNN(): + + def __init__(self,conv1_get,size_p1,bp_num1,bp_num2,bp_num3,rate_w=0.2,rate_t=0.2): + ''' + :param conv1_get: [a,c,d],size, number, step of convolution kernel + :param size_p1: pooling size + :param bp_num1: units number of flatten layer + :param bp_num2: units number of hidden layer + :param bp_num3: units number of output layer + :param rate_w: rate of weight learning + :param rate_t: rate of threshold learning + ''' + self.num_bp1 = bp_num1 + self.num_bp2 = bp_num2 + self.num_bp3 = bp_num3 + self.conv1 = conv1_get[:2] + self.step_conv1 = conv1_get[2] + self.size_pooling1 = size_p1 + self.rate_weight = rate_w + self.rate_thre = rate_t + self.w_conv1 = [np.mat(-1*np.random.rand(self.conv1[0],self.conv1[0])+0.5) for i in range(self.conv1[1])] + self.wkj = np.mat(-1 * np.random.rand(self.num_bp3, self.num_bp2) + 0.5) + self.vji = np.mat(-1*np.random.rand(self.num_bp2, self.num_bp1)+0.5) + self.thre_conv1 = -2*np.random.rand(self.conv1[1])+1 + self.thre_bp2 = -2*np.random.rand(self.num_bp2)+1 + self.thre_bp3 = -2*np.random.rand(self.num_bp3)+1 + + + def save_model(self,save_path): + #save model dict with pickle + import pickle + model_dic = {'num_bp1':self.num_bp1, + 'num_bp2':self.num_bp2, + 'num_bp3':self.num_bp3, + 'conv1':self.conv1, + 'step_conv1':self.step_conv1, + 'size_pooling1':self.size_pooling1, + 'rate_weight':self.rate_weight, + 'rate_thre':self.rate_thre, + 'w_conv1':self.w_conv1, + 'wkj':self.wkj, + 'vji':self.vji, + 'thre_conv1':self.thre_conv1, + 'thre_bp2':self.thre_bp2, + 'thre_bp3':self.thre_bp3} + with open(save_path, 'wb') as f: + pickle.dump(model_dic, f) + + print('Model saved: %s'% save_path) + + @classmethod + def ReadModel(cls,model_path): + #read saved model + import pickle + with open(model_path, 'rb') as f: + model_dic = pickle.load(f) + + conv_get= model_dic.get('conv1') + conv_get.append(model_dic.get('step_conv1')) + size_p1 = model_dic.get('size_pooling1') + bp1 = model_dic.get('num_bp1') + bp2 = model_dic.get('num_bp2') + bp3 = model_dic.get('num_bp3') + r_w = model_dic.get('rate_weight') + r_t = model_dic.get('rate_thre') + #create model instance + conv_ins = CNN(conv_get,size_p1,bp1,bp2,bp3,r_w,r_t) + #modify model parameter + conv_ins.w_conv1 = model_dic.get('w_conv1') + conv_ins.wkj = model_dic.get('wkj') + conv_ins.vji = model_dic.get('vji') + conv_ins.thre_conv1 = model_dic.get('thre_conv1') + conv_ins.thre_bp2 = model_dic.get('thre_bp2') + conv_ins.thre_bp3 = model_dic.get('thre_bp3') + return conv_ins + + + def sig(self,x): + return 1 / (1 + np.exp(-1*x)) + + def do_round(self,x): + return round(x, 3) + + def convolute(self,data,convs,w_convs,thre_convs,conv_step): + #convolution process + size_conv = convs[0] + num_conv =convs[1] + size_data = np.shape(data)[0] + #get the data slice of original image data, data_focus + data_focus = [] + for i_focus in range(0, size_data - size_conv + 1, conv_step): + for j_focus in range(0, size_data - size_conv + 1, conv_step): + focus = data[i_focus:i_focus + size_conv, j_focus:j_focus + size_conv] + data_focus.append(focus) + #caculate the feature map of every single kernel, and saved as list of matrix + data_featuremap = [] + Size_FeatureMap = int((size_data - size_conv) / conv_step + 1) + for i_map in range(num_conv): + featuremap = [] + for i_focus in range(len(data_focus)): + net_focus = np.sum(np.multiply(data_focus[i_focus], w_convs[i_map])) - thre_convs[i_map] + featuremap.append(self.sig(net_focus)) + featuremap = np.asmatrix(featuremap).reshape(Size_FeatureMap, Size_FeatureMap) + data_featuremap.append(featuremap) + + #expanding the data slice to One dimenssion + focus1_list = [] + for each_focus in data_focus: + focus1_list.extend(self.Expand_Mat(each_focus)) + focus_list = np.asarray(focus1_list) + return focus_list,data_featuremap + + def pooling(self,featuremaps,size_pooling,type='average_pool'): + #pooling process + size_map = len(featuremaps[0]) + size_pooled = int(size_map/size_pooling) + featuremap_pooled = [] + for i_map in range(len(featuremaps)): + map = featuremaps[i_map] + map_pooled = [] + for i_focus in range(0,size_map,size_pooling): + for j_focus in range(0, size_map, size_pooling): + focus = map[i_focus:i_focus + size_pooling, j_focus:j_focus + size_pooling] + if type == 'average_pool': + #average pooling + map_pooled.append(np.average(focus)) + elif type == 'max_pooling': + #max pooling + map_pooled.append(np.max(focus)) + map_pooled = np.asmatrix(map_pooled).reshape(size_pooled,size_pooled) + featuremap_pooled.append(map_pooled) + return featuremap_pooled + + def _expand(self,datas): + #expanding three dimension data to one dimension list + data_expanded = [] + for i in range(len(datas)): + shapes = np.shape(datas[i]) + data_listed = datas[i].reshape(1,shapes[0]*shapes[1]) + data_listed = data_listed.getA().tolist()[0] + data_expanded.extend(data_listed) + data_expanded = np.asarray(data_expanded) + return data_expanded + + def _expand_mat(self,data_mat): + #expanding matrix to one dimension list + data_mat = np.asarray(data_mat) + shapes = np.shape(data_mat) + data_expanded = data_mat.reshape(1,shapes[0]*shapes[1]) + return data_expanded + + def _calculate_gradient_from_pool(self,out_map,pd_pool,num_map,size_map,size_pooling): + ''' + calcluate the gradient from the data slice of pool layer + pd_pool: list of matrix + out_map: the shape of data slice(size_map*size_map) + return: pd_all: list of matrix, [num, size_map, size_map] + ''' + pd_all = [] + i_pool = 0 + for i_map in range(num_map): + pd_conv1 = np.ones((size_map, size_map)) + for i in range(0, size_map, size_pooling): + for j in range(0, size_map, size_pooling): + pd_conv1[i:i + size_pooling, j:j + size_pooling] = pd_pool[i_pool] + i_pool = i_pool + 1 + pd_conv2 = np.multiply(pd_conv1,np.multiply(out_map[i_map],(1-out_map[i_map]))) + pd_all.append(pd_conv2) + return pd_all + + def trian(self,patterns,datas_train, datas_teach, n_repeat, error_accuracy,draw_e = bool): + #model traning + print('----------------------Start Training-------------------------') + print(' - - Shape: Train_Data ',np.shape(datas_train)) + print(' - - Shape: Teach_Data ',np.shape(datas_teach)) + rp = 0 + all_mse = [] + mse = 10000 + while rp < n_repeat and mse >= error_accuracy: + alle = 0 + print('-------------Learning Time %d--------------'%rp) + for p in range(len(datas_train)): + #print('------------Learning Image: %d--------------'%p) + data_train = np.asmatrix(datas_train[p]) + data_teach = np.asarray(datas_teach[p]) + data_focus1,data_conved1 = self.convolute(data_train,self.conv1,self.w_conv1, + self.thre_conv1,conv_step=self.step_conv1) + data_pooled1 = self.pooling(data_conved1,self.size_pooling1) + shape_featuremap1 = np.shape(data_conved1) + ''' + print(' -----original shape ', np.shape(data_train)) + print(' ---- after convolution ',np.shape(data_conv1)) + print(' -----after pooling ',np.shape(data_pooled1)) + ''' + data_bp_input = self._expand(data_pooled1) + bp_out1 = data_bp_input + + bp_net_j = np.dot(bp_out1,self.vji.T) - self.thre_bp2 + bp_out2 = self.sig(bp_net_j) + bp_net_k = np.dot(bp_out2 ,self.wkj.T) - self.thre_bp3 + bp_out3 = self.sig(bp_net_k) + + #--------------Model Leaning ------------------------ + # calcluate error and gradient--------------- + pd_k_all = np.multiply((data_teach - bp_out3), np.multiply(bp_out3, (1 - bp_out3))) + pd_j_all = np.multiply(np.dot(pd_k_all,self.wkj), np.multiply(bp_out2, (1 - bp_out2))) + pd_i_all = np.dot(pd_j_all,self.vji) + + pd_conv1_pooled = pd_i_all / (self.size_pooling1*self.size_pooling1) + pd_conv1_pooled = pd_conv1_pooled.T.getA().tolist() + pd_conv1_all = self._calculate_gradient_from_pool(data_conved1,pd_conv1_pooled,shape_featuremap1[0], + shape_featuremap1[1],self.size_pooling1) + #weight and threshold learning process--------- + #convolution layer + for k_conv in range(self.conv1[1]): + pd_conv_list = self._expand_mat(pd_conv1_all[k_conv]) + delta_w = self.rate_weight * np.dot(pd_conv_list,data_focus1) + + self.w_conv1[k_conv] = self.w_conv1[k_conv] + delta_w.reshape((self.conv1[0],self.conv1[0])) + + self.thre_conv1[k_conv] = self.thre_conv1[k_conv] - np.sum(pd_conv1_all[k_conv]) * self.rate_thre + #all connected layer + self.wkj = self.wkj + pd_k_all.T * bp_out2 * self.rate_weight + self.vji = self.vji + pd_j_all.T * bp_out1 * self.rate_weight + self.thre_bp3 = self.thre_bp3 - pd_k_all * self.rate_thre + self.thre_bp2 = self.thre_bp2 - pd_j_all * self.rate_thre + # calculate the sum error of all single image + errors = np.sum(abs((data_teach - bp_out3))) + alle = alle + errors + #print(' ----Teach ',data_teach) + #print(' ----BP_output ',bp_out3) + rp = rp + 1 + mse = alle/patterns + all_mse.append(mse) + def draw_error(): + yplot = [error_accuracy for i in range(int(n_repeat * 1.2))] + plt.plot(all_mse, '+-') + plt.plot(yplot, 'r--') + plt.xlabel('Learning Times') + plt.ylabel('All_mse') + plt.grid(True, alpha=0.5) + plt.show() + print('------------------Training Complished---------------------') + print(' - - Training epoch: ', rp, ' - - Mse: %.6f' % mse) + if draw_e: + draw_error() + return mse + + def predict(self,datas_test): + #model predict + produce_out = [] + print('-------------------Start Testing-------------------------') + print(' - - Shape: Test_Data ',np.shape(datas_test)) + for p in range(len(datas_test)): + data_test = np.asmatrix(datas_test[p]) + data_focus1, data_conved1 = self.convolute(data_test, self.conv1, self.w_conv1, + self.thre_conv1, conv_step=self.step_conv1) + data_pooled1 = self.pooling(data_conved1, self.size_pooling1) + data_bp_input = self._expand(data_pooled1) + + bp_out1 = data_bp_input + bp_net_j = bp_out1 * self.vji.T - self.thre_bp2 + bp_out2 = self.sig(bp_net_j) + bp_net_k = bp_out2 * self.wkj.T - self.thre_bp3 + bp_out3 = self.sig(bp_net_k) + produce_out.extend(bp_out3.getA().tolist()) + res = [list(map(self.do_round,each)) for each in produce_out] + return np.asarray(res) + + def convolution(self,data): + #return the data of image after convoluting process so we can check it out + data_test = np.asmatrix(data) + data_focus1, data_conved1 = self.convolute(data_test, self.conv1, self.w_conv1, + self.thre_conv1, conv_step=self.step_conv1) + data_pooled1 = self.pooling(data_conved1, self.size_pooling1) + + return data_conved1,data_pooled1 + + +if __name__ == '__main__': + pass + ''' + I will put the example on other file + ''' \ No newline at end of file diff --git a/README.md b/README.md index 9589bccd9..70077e98f 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# The Algorithms - Python [![Build Status](https://travis-ci.org/TheAlgorithms/Python.svg)](https://travis-ci.org/TheAlgorithms/Python) +# The Algorithms - Python ### All algorithms implemented in Python (for education) @@ -128,6 +128,13 @@ The method is named after **Julius Caesar**, who used it in his private correspo The encryption step performed by a Caesar cipher is often incorporated as part of more complex schemes, such as the Vigenère cipher, and still has modern application in the ROT13 system. As with all single-alphabet substitution ciphers, the Caesar cipher is easily broken and in modern practice offers essentially no communication security. ###### Source: [Wikipedia](https://en.wikipedia.org/wiki/Caesar_cipher) +### Vigenère +The **Vigenère cipher** is a method of encrypting alphabetic text by using a series of **interwoven Caesar ciphers** based on the letters of a keyword. It is **a form of polyalphabetic substitution**.
+The Vigenère cipher has been reinvented many times. The method was originally described by Giovan Battista Bellaso in his 1553 book La cifra del. Sig. Giovan Battista Bellaso; however, the scheme was later misattributed to Blaise de Vigenère in the 19th century, and is now widely known as the "Vigenère cipher".
+Though the cipher is easy to understand and implement, for three centuries it resisted all attempts to break it; this earned it the description **le chiffre indéchiffrable**(French for 'the indecipherable cipher'). +Many people have tried to implement encryption schemes that are essentially Vigenère ciphers. Friedrich Kasiski was the first to publish a general method of deciphering a Vigenère cipher in 1863. +###### Source: [Wikipedia](https://en.wikipedia.org/wiki/Vigen%C3%A8re_cipher) + ### Transposition In cryptography, a **transposition cipher** is a method of encryption by which the positions held by units of plaintext (which are commonly characters or groups of characters) are shifted according to a regular system, so that the ciphertext constitutes a permutation of the plaintext. That is, the order of the units is changed (the plaintext is reordered).
Mathematically a bijective function is used on the characters' positions to encrypt and an inverse function to decrypt. diff --git a/data_structures/Binary Tree/binary_seach_tree.py b/data_structures/Binary Tree/binary_seach_tree.py index 1dac948ae..0b1726534 100644 --- a/data_structures/Binary Tree/binary_seach_tree.py +++ b/data_structures/Binary Tree/binary_seach_tree.py @@ -8,7 +8,7 @@ class Node: def __init__(self, label): self.label = label self.left = None - self.rigt = None + self.right = None def getLabel(self): return self.label @@ -23,10 +23,10 @@ class Node: self.left = left def getRight(self): - return self.rigt + return self.right def setRight(self, right): - self.rigt = right + self.right = right class BinarySearchTree: diff --git a/data_structures/Graph/P01_BreadthFirstSearch.py b/data_structures/Graph/BreadthFirstSearch.py similarity index 100% rename from data_structures/Graph/P01_BreadthFirstSearch.py rename to data_structures/Graph/BreadthFirstSearch.py diff --git a/data_structures/Graph/Breadth_First_Search.py b/data_structures/Graph/Breadth_First_Search.py deleted file mode 100644 index 1a3fdfd4d..000000000 --- a/data_structures/Graph/Breadth_First_Search.py +++ /dev/null @@ -1,70 +0,0 @@ -class GRAPH: - """docstring for GRAPH""" - def __init__(self, nodes): - self.nodes=nodes - self.graph=[[0]*nodes for i in range (nodes)] - self.visited=[0]*nodes - - - def show(self): - - for i in self.graph: - for j in i: - print(j, end=' ') - print(' ') - def bfs(self,v): - - visited = [False]*self.vertex - visited[v - 1] = True - print('%d visited' % (v)) - - queue = [v - 1] - while len(queue) > 0: - v = queue[0] - for u in range(self.vertex): - if self.graph[v][u] == 1: - if visited[u]== False: - visited[u] = True - queue.append(u) - print('%d visited' % (u +1)) - queue.pop(0) - -g = Graph(10) - -g.add_edge(1,2) -g.add_edge(1,3) -g.add_edge(1,4) -g.add_edge(2,5) -g.add_edge(3,6) -g.add_edge(3,7) -g.add_edge(4,8) -g.add_edge(5,9) -g.add_edge(6,10) -g.bfs(4) -======= - print self.graph - - def add_edge(self, i, j): - self.graph[i][j]=1 - self.graph[j][i]=1 - - def bfs(self,s): - queue=[s] - self.visited[s]=1 - while len(queue)!=0: - x=queue.pop(0) - print(x) - for i in range(0,self.nodes): - if self.graph[x][i]==1 and self.visited[i]==0: - queue.append(i) - self.visited[i]=1 - -n=int(input("Enter the number of Nodes : ")) -g=GRAPH(n) -e=int(input("Enter the no of edges : ")) -print("Enter the edges (u v)") -for i in range(0,e): - u,v=map(int, raw_input().split()) - g.add_edge(u,v) -s=int(input("Enter the source node :")) -g.bfs(s) diff --git a/data_structures/Graph/Deep_First_Search.py b/data_structures/Graph/Deep_First_Search.py deleted file mode 100644 index 656ddfbaf..000000000 --- a/data_structures/Graph/Deep_First_Search.py +++ /dev/null @@ -1,32 +0,0 @@ -class GRAPH: - """docstring for GRAPH""" - def __init__(self, nodes): - self.nodes=nodes - self.graph=[[0]*nodes for i in range (nodes)] - self.visited=[0]*nodes - - - def show(self): - print self.graph - - def add_edge(self, i, j): - self.graph[i][j]=1 - self.graph[j][i]=1 - - def dfs(self,s): - self.visited[s]=1 - print(s) - for i in range(0,self.nodes): - if self.visited[i]==0 and self.graph[s][i]==1: - self.dfs(i) - - -n=int(input("Enter the number of Nodes : ")) -g=GRAPH(n) -e=int(input("Enter the no of edges : ")) -print("Enter the edges (u v)") -for i in range(0,e): - u,v=map(int, raw_input().split()) - g.add_edge(u,v) -s=int(input("Enter the source node :")) -g.dfs(s) diff --git a/data_structures/Graph/P02_DepthFirstSearch.py b/data_structures/Graph/DepthFirstSearch.py similarity index 100% rename from data_structures/Graph/P02_DepthFirstSearch.py rename to data_structures/Graph/DepthFirstSearch.py diff --git a/data_structures/Graph/dijkstra_algorithm.py b/data_structures/Graph/dijkstra_algorithm.py new file mode 100644 index 000000000..c43ff37f5 --- /dev/null +++ b/data_structures/Graph/dijkstra_algorithm.py @@ -0,0 +1,211 @@ +# Title: Dijkstra's Algorithm for finding single source shortest path from scratch +# Author: Shubham Malik +# References: https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm + +import math +import sys +# For storing the vertex set to retreive node with the lowest distance + + +class PriorityQueue: + # Based on Min Heap + def __init__(self): + self.cur_size = 0 + self.array = [] + self.pos = {} # To store the pos of node in array + + def isEmpty(self): + return self.cur_size == 0 + + def min_heapify(self, idx): + lc = self.left(idx) + rc = self.right(idx) + if lc < self.cur_size and self.array(lc)[0] < self.array(idx)[0]: + smallest = lc + else: + smallest = idx + if rc < self.cur_size and self.array(rc)[0] < self.array(smallest)[0]: + smallest = rc + if smallest != idx: + self.swap(idx, smallest) + self.min_heapify(smallest) + + def insert(self, tup): + # Inserts a node into the Priority Queue + self.pos[tup[1]] = self.cur_size + self.cur_size += 1 + self.array.append((sys.maxsize, tup[1])) + self.decrease_key((sys.maxsize, tup[1]), tup[0]) + + def extract_min(self): + # Removes and returns the min element at top of priority queue + min_node = self.array[0][1] + self.array[0] = self.array[self.cur_size - 1] + self.cur_size -= 1 + self.min_heapify(1) + del self.pos[min_node] + return min_node + + def left(self, i): + # returns the index of left child + return 2 * i + 1 + + def right(self, i): + # returns the index of right child + return 2 * i + 2 + + def par(self, i): + # returns the index of parent + return math.floor(i / 2) + + def swap(self, i, j): + # swaps array elements at indices i and j + # update the pos{} + self.pos[self.array[i][1]] = j + self.pos[self.array[j][1]] = i + temp = self.array[i] + self.array[i] = self.array[j] + self.array[j] = temp + + def decrease_key(self, tup, new_d): + idx = self.pos[tup[1]] + # assuming the new_d is atmost old_d + self.array[idx] = (new_d, tup[1]) + while idx > 0 and self.array[self.par(idx)][0] > self.array[idx][0]: + self.swap(idx, self.par(idx)) + idx = self.par(idx) + + +class Graph: + def __init__(self, num): + self.adjList = {} # To store graph: u -> (v,w) + self.num_nodes = num # Number of nodes in graph + # To store the distance from source vertex + self.dist = [0] * self.num_nodes + self.par = [-1] * self.num_nodes # To store the path + + def add_edge(self, u, v, w): + # Edge going from node u to v and v to u with weight w + # u (w)-> v, v (w) -> u + # Check if u already in graph + if u in self.adjList.keys(): + self.adjList[u].append((v, w)) + else: + self.adjList[u] = [(v, w)] + + # Assuming undirected graph + if v in self.adjList.keys(): + self.adjList[v].append((u, w)) + else: + self.adjList[v] = [(u, w)] + + def show_graph(self): + # u -> v(w) + for u in self.adjList: + print(u, '->', ' -> '.join(str("{}({})".format(v, w)) + for v, w in self.adjList[u])) + + def dijkstra(self, src): + # Flush old junk values in par[] + self.par = [-1] * self.num_nodes + # src is the source node + self.dist[src] = 0 + Q = PriorityQueue() + Q.insert((0, src)) # (dist from src, node) + for u in self.adjList.keys(): + if u != src: + self.dist[u] = sys.maxsize # Infinity + self.par[u] = -1 + + while not Q.isEmpty(): + u = Q.extract_min() # Returns node with the min dist from source + # Update the distance of all the neighbours of u and + # if their prev dist was INFINITY then push them in Q + for v, w in self.adjList[u]: + new_dist = self.dist[u] + w + if self.dist[v] > new_dist: + if self.dist[v] == sys.maxsize: + Q.insert((new_dist, v)) + else: + Q.decrease_key((self.dist[v], v), new_dist) + self.dist[v] = new_dist + self.par[v] = u + + # Show the shortest distances from src + self.show_distances(src) + + def show_distances(self, src): + print("Distance from node: {}".format(src)) + for u in range(self.num_nodes): + print('Node {} has distance: {}'.format(u, self.dist[u])) + + def show_path(self, src, dest): + # To show the shortest path from src to dest + # WARNING: Use it *after* calling dijkstra + path = [] + cost = 0 + temp = dest + # Backtracking from dest to src + while self.par[temp] != -1: + path.append(temp) + if temp != src: + for v, w in self.adjList[temp]: + if v == self.par[temp]: + cost += w + break + temp = self.par[temp] + path.append(src) + path.reverse() + + print('----Path to reach {} from {}----'.format(dest, src)) + for u in path: + print('{}'.format(u), end=' ') + if u != dest: + print('-> ', end='') + + print('\nTotal cost of path: ', cost) + + +if __name__ == '__main__': + graph = Graph(9) + graph.add_edge(0, 1, 4) + graph.add_edge(0, 7, 8) + graph.add_edge(1, 2, 8) + graph.add_edge(1, 7, 11) + graph.add_edge(2, 3, 7) + graph.add_edge(2, 8, 2) + graph.add_edge(2, 5, 4) + graph.add_edge(3, 4, 9) + graph.add_edge(3, 5, 14) + graph.add_edge(4, 5, 10) + graph.add_edge(5, 6, 2) + graph.add_edge(6, 7, 1) + graph.add_edge(6, 8, 6) + graph.add_edge(7, 8, 7) + graph.show_graph() + graph.dijkstra(0) + graph.show_path(0, 4) + +# OUTPUT +# 0 -> 1(4) -> 7(8) +# 1 -> 0(4) -> 2(8) -> 7(11) +# 7 -> 0(8) -> 1(11) -> 6(1) -> 8(7) +# 2 -> 1(8) -> 3(7) -> 8(2) -> 5(4) +# 3 -> 2(7) -> 4(9) -> 5(14) +# 8 -> 2(2) -> 6(6) -> 7(7) +# 5 -> 2(4) -> 3(14) -> 4(10) -> 6(2) +# 4 -> 3(9) -> 5(10) +# 6 -> 5(2) -> 7(1) -> 8(6) +# Distance from node: 0 +# Node 0 has distance: 0 +# Node 1 has distance: 4 +# Node 2 has distance: 12 +# Node 3 has distance: 19 +# Node 4 has distance: 21 +# Node 5 has distance: 11 +# Node 6 has distance: 9 +# Node 7 has distance: 8 +# Node 8 has distance: 14 +# ----Path to reach 4 from 0---- +# 0 -> 7 -> 6 -> 5 -> 4 +# Total cost of path: 21 diff --git a/data_structures/LinkedList/singly_LinkedList.py b/data_structures/LinkedList/singly_LinkedList.py index 941e8a0fa..c9a3cec27 100644 --- a/data_structures/LinkedList/singly_LinkedList.py +++ b/data_structures/LinkedList/singly_LinkedList.py @@ -3,22 +3,15 @@ class Node:#create a Node self.data=data#given data self.next=None#given next to None class Linked_List: + pass - def insert_tail(Head,data):#insert the data at tail - tamp=Head#create a tamp as a head - if(tamp==None):#if linkedlist is empty - newNod=Node()#create newNode Node type and given data and next - newNod.data=data - newNod.next=None - Head=newNod + + def insert_tail(Head,data): + if(Head.next is None): + Head.next = Node(data) else: - while tamp.next!=None:#find the last Node - tamp=tamp.next - newNod = Node()#create a new node - newNod.data = data - newNod.next = None - tamp.next=newNod#put the newnode into last node - return Head#return first node of linked list + insert_tail(Head.next, data) + def insert_head(Head,data): tamp = Head if (tamp == None): @@ -32,16 +25,18 @@ class Linked_List: newNod.next = Head#put the Head at NewNode Next Head=newNod#make a NewNode to Head return Head - def Print(Head):#print every node data - tamp=Node() + + def printList(Head):#print every node data tamp=Head while tamp!=None: print(tamp.data) tamp=tamp.next + def delete_head(Head):#delete from head if Head!=None: Head=Head.next return Head#return new Head + def delete_tail(Head):#delete from tail if Head!=None: tamp = Node() @@ -50,12 +45,6 @@ class Linked_List: tamp = tamp.next tamp.next=None#delete the last element by give next None to 2nd last Element return Head + def isEmpty(Head): - if(Head==None):#check Head is None or Not - return True#return Ture if list is empty - else: - return False#check False if it's not empty - - - - + return Head is None #Return if Head is none \ No newline at end of file diff --git a/dynamic_programming/fastfibonacci.py b/dynamic_programming/fastfibonacci.py new file mode 100644 index 000000000..5957fbe0d --- /dev/null +++ b/dynamic_programming/fastfibonacci.py @@ -0,0 +1,42 @@ +""" +This program calculates the nth Fibonacci number in O(log(n)). +It's possible to calculate F(1000000) in less than a second. +""" +import sys + + +# returns F(n) +def fibonacci(n: int): + if n < 0: + raise ValueError("Negative arguments are not supported") + return _fib(n)[0] + + +# returns (F(n), F(n-1)) +def _fib(n: int): + if n == 0: + # (F(0), F(1)) + return (0, 1) + else: + # F(2n) = F(n)[2F(n+1) − F(n)] + # F(2n+1) = F(n+1)^2+F(n)^2 + a, b = _fib(n // 2) + c = a * (b * 2 - a) + d = a * a + b * b + if n % 2 == 0: + return (c, d) + else: + return (d, c + d) + + +if __name__ == "__main__": + args = sys.argv[1:] + if len(args) != 1: + print("Too few or too much parameters given.") + exit(1) + try: + n = int(args[0]) + except ValueError: + print("Could not convert data to an integer.") + exit(1) + print("F(%d) = %d" % (n, fibonacci(n))) diff --git a/dynamic_programming/fibonacci.py b/dynamic_programming/fibonacci.py index 692cb756a..5eaa81b3e 100644 --- a/dynamic_programming/fibonacci.py +++ b/dynamic_programming/fibonacci.py @@ -30,7 +30,7 @@ if __name__ == '__main__': import sys print("\n********* Fibonacci Series Using Dynamic Programming ************\n") - # For python 2.x and 3.x compatibility: 3.x has not raw_input builtin + # For python 2.x and 3.x compatibility: 3.x has no raw_input builtin # otherwise 2.x's input builtin function is too "smart" if sys.version_info.major < 3: input_function = raw_input diff --git a/machine_learning/decision_tree.py b/machine_learning/decision_tree.py new file mode 100644 index 000000000..51f600cac --- /dev/null +++ b/machine_learning/decision_tree.py @@ -0,0 +1,139 @@ +""" +Implementation of a basic regression decision tree. +Input data set: The input data set must be 1-dimensional with continuous labels. +Output: The decision tree maps a real number input to a real number output. +""" + +import numpy as np + +class Decision_Tree: + def __init__(self, depth = 5, min_leaf_size = 5): + self.depth = depth + self.decision_boundary = 0 + self.left = None + self.right = None + self.min_leaf_size = min_leaf_size + self.prediction = None + + def mean_squared_error(self, labels, prediction): + """ + mean_squared_error: + @param labels: a one dimensional numpy array + @param prediction: a floating point value + return value: mean_squared_error calculates the error if prediction is used to estimate the labels + """ + if labels.ndim != 1: + print("Error: Input labels must be one dimensional") + + return np.mean((labels - prediction) ** 2) + + def train(self, X, y): + """ + train: + @param X: a one dimensional numpy array + @param y: a one dimensional numpy array. + The contents of y are the labels for the corresponding X values + + train does not have a return value + """ + + """ + this section is to check that the inputs conform to our dimensionality constraints + """ + if X.ndim != 1: + print("Error: Input data set must be one dimensional") + return + if len(X) != len(y): + print("Error: X and y have different lengths") + return + if y.ndim != 1: + print("Error: Data set labels must be one dimensional") + return + + if len(X) < 2 * self.min_leaf_size: + self.prediction = np.mean(y) + return + + if self.depth == 1: + self.prediction = np.mean(y) + return + + best_split = 0 + min_error = self.mean_squared_error(X,np.mean(y)) * 2 + + + """ + loop over all possible splits for the decision tree. find the best split. + if no split exists that is less than 2 * error for the entire array + then the data set is not split and the average for the entire array is used as the predictor + """ + for i in range(len(X)): + if len(X[:i]) < self.min_leaf_size: + continue + elif len(X[i:]) < self.min_leaf_size: + continue + else: + error_left = self.mean_squared_error(X[:i], np.mean(y[:i])) + error_right = self.mean_squared_error(X[i:], np.mean(y[i:])) + error = error_left + error_right + if error < min_error: + best_split = i + min_error = error + + if best_split != 0: + left_X = X[:best_split] + left_y = y[:best_split] + right_X = X[best_split:] + right_y = y[best_split:] + + self.decision_boundary = X[best_split] + self.left = Decision_Tree(depth = self.depth - 1, min_leaf_size = self.min_leaf_size) + self.right = Decision_Tree(depth = self.depth - 1, min_leaf_size = self.min_leaf_size) + self.left.train(left_X, left_y) + self.right.train(right_X, right_y) + else: + self.prediction = np.mean(y) + + return + + def predict(self, x): + """ + predict: + @param x: a floating point value to predict the label of + the prediction function works by recursively calling the predict function + of the appropriate subtrees based on the tree's decision boundary + """ + if self.prediction is not None: + return self.prediction + elif self.left or self.right is not None: + if x >= self.decision_boundary: + return self.right.predict(x) + else: + return self.left.predict(x) + else: + print("Error: Decision tree not yet trained") + return None + +def main(): + """ + In this demonstration we're generating a sample data set from the sin function in numpy. + We then train a decision tree on the data set and use the decision tree to predict the + label of 10 different test values. Then the mean squared error over this test is displayed. + """ + X = np.arange(-1., 1., 0.005) + y = np.sin(X) + + tree = Decision_Tree(depth = 10, min_leaf_size = 10) + tree.train(X,y) + + test_cases = (np.random.rand(10) * 2) - 1 + predictions = np.array([tree.predict(x) for x in test_cases]) + avg_error = np.mean((predictions - test_cases) ** 2) + + print("Test values: " + str(test_cases)) + print("Predictions: " + str(predictions)) + print("Average error: " + str(avg_error)) + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/other/euclidean_gcd.py b/other/euclidean_gcd.py new file mode 100644 index 000000000..13378379f --- /dev/null +++ b/other/euclidean_gcd.py @@ -0,0 +1,18 @@ +# https://en.wikipedia.org/wiki/Euclidean_algorithm + +def euclidean_gcd(a, b): + while b: + t = b + b = a % b + a = t + return a + +def main(): + print("GCD(3, 5) = " + str(euclidean_gcd(3, 5))) + print("GCD(5, 3) = " + str(euclidean_gcd(5, 3))) + print("GCD(1, 3) = " + str(euclidean_gcd(1, 3))) + print("GCD(3, 6) = " + str(euclidean_gcd(3, 6))) + print("GCD(6, 3) = " + str(euclidean_gcd(6, 3))) + +if __name__ == '__main__': + main() diff --git a/searches/binary_search.py b/searches/binary_search.py index 13b54f498..c54aa96a1 100644 --- a/searches/binary_search.py +++ b/searches/binary_search.py @@ -110,10 +110,10 @@ def binary_search_by_recursion(sorted_collection, item, left, right): if sorted_collection[midpoint] == item: return midpoint elif sorted_collection[midpoint] > item: - return binary_search_by_recursion(sorted_collection, item, left, right-1) + return binary_search_by_recursion(sorted_collection, item, left, midpoint-1) else: - return binary_search_by_recursion(sorted_collection, item, left+1, right) - + return binary_search_by_recursion(sorted_collection, item, midpoint+1, right) + def __assert_sorted(collection): """Check if collection is sorted, if not - raises :py:class:`ValueError` @@ -137,14 +137,14 @@ def __assert_sorted(collection): if __name__ == '__main__': import sys - # For python 2.x and 3.x compatibility: 3.x has not raw_input builtin + # For python 2.x and 3.x compatibility: 3.x has no raw_input builtin # otherwise 2.x's input builtin function is too "smart" if sys.version_info.major < 3: input_function = raw_input else: input_function = input - user_input = input_function('Enter numbers separated by coma:\n') + user_input = input_function('Enter numbers separated by comma:\n') collection = [int(item) for item in user_input.split(',')] try: __assert_sorted(collection) diff --git a/searches/interpolation_search.py b/searches/interpolation_search.py new file mode 100644 index 000000000..068d9c554 --- /dev/null +++ b/searches/interpolation_search.py @@ -0,0 +1,102 @@ +""" +This is pure python implementation of interpolation search algorithm +""" +from __future__ import print_function +import bisect + + +def interpolation_search(sorted_collection, item): + """Pure implementation of interpolation search algorithm in Python + Be careful collection must be sorted, otherwise result will be + unpredictable + :param sorted_collection: some sorted collection with comparable items + :param item: item value to search + :return: index of found item or None if item is not found + """ + left = 0 + right = len(sorted_collection) - 1 + + while left <= right: + point = left + ((item - sorted_collection[left]) * (right - left)) // (sorted_collection[right] - sorted_collection[left]) + + #out of range check + if point<0 or point>=len(sorted_collection): + return None + + current_item = sorted_collection[point] + if current_item == item: + return point + else: + if item < current_item: + right = point - 1 + else: + left = point + 1 + return None + + +def interpolation_search_by_recursion(sorted_collection, item, left, right): + + """Pure implementation of interpolation search algorithm in Python by recursion + Be careful collection must be sorted, otherwise result will be + unpredictable + First recursion should be started with left=0 and right=(len(sorted_collection)-1) + :param sorted_collection: some sorted collection with comparable items + :param item: item value to search + :return: index of found item or None if item is not found + """ + point = left + ((item - sorted_collection[left]) * (right - left)) // (sorted_collection[right] - sorted_collection[left]) + + #out of range check + if point<0 or point>=len(sorted_collection): + return None + + if sorted_collection[point] == item: + return point + elif sorted_collection[point] > item: + return interpolation_search_by_recursion(sorted_collection, item, left, point-1) + else: + return interpolation_search_by_recursion(sorted_collection, item, point+1, right) + +def __assert_sorted(collection): + """Check if collection is sorted, if not - raises :py:class:`ValueError` + :param collection: collection + :return: True if collection is sorted + :raise: :py:class:`ValueError` if collection is not sorted + Examples: + >>> __assert_sorted([0, 1, 2, 4]) + True + >>> __assert_sorted([10, -1, 5]) + Traceback (most recent call last): + ... + ValueError: Collection must be sorted + """ + if collection != sorted(collection): + raise ValueError('Collection must be sorted') + return True + + +if __name__ == '__main__': + import sys + # For python 2.x and 3.x compatibility: 3.x has no raw_input builtin + # otherwise 2.x's input builtin function is too "smart" + if sys.version_info.major < 3: + input_function = raw_input + else: + input_function = input + + user_input = input_function('Enter numbers separated by comma:\n') + collection = [int(item) for item in user_input.split(',')] + try: + __assert_sorted(collection) + except ValueError: + sys.exit('Sequence must be sorted to apply interpolation search') + + target_input = input_function( + 'Enter a single number to be found in the list:\n' + ) + target = int(target_input) + result = interpolation_search(collection, target) + if result is not None: + print('{} found at positions: {}'.format(target, result)) + else: + print('Not found') \ No newline at end of file diff --git a/searches/linear_search.py b/searches/linear_search.py index 24479e45b..ce8098b1a 100644 --- a/searches/linear_search.py +++ b/searches/linear_search.py @@ -41,7 +41,7 @@ def linear_search(sequence, target): if __name__ == '__main__': import sys - # For python 2.x and 3.x compatibility: 3.x has not raw_input builtin + # For python 2.x and 3.x compatibility: 3.x has no raw_input builtin # otherwise 2.x's input builtin function is too "smart" if sys.version_info.major < 3: input_function = raw_input diff --git a/searches/quick_select.py b/searches/quick_select.py new file mode 100644 index 000000000..e5e2ce99c --- /dev/null +++ b/searches/quick_select.py @@ -0,0 +1,47 @@ +import collections +import sys +import random +import time +import math +""" +A python implementation of the quick select algorithm, which is efficient for calculating the value that would appear in the index of a list if it would be sorted, even if it is not already sorted +https://en.wikipedia.org/wiki/Quickselect +""" +def _partition(data, pivot): + """ + Three way partition the data into smaller, equal and greater lists, + in relationship to the pivot + :param data: The data to be sorted (a list) + :param pivot: The value to partition the data on + :return: Three list: smaller, equal and greater + """ + less, equal, greater = [], [], [] + for element in data: + if element.address < pivot.address: + less.append(element) + elif element.address > pivot.address: + greater.append(element) + else: + equal.append(element) + return less, equal, greater + + def quickSelect(list, k): + #k = len(list) // 2 when trying to find the median (index that value would be when list is sorted) + smaller = [] + larger = [] + pivot = random.randint(0, len(list) - 1) + pivot = list[pivot] + count = 0 + smaller, equal, larger =_partition(list, pivot) + count = len(equal) + m = len(smaller) + + #k is the pivot + if m <= k < m + count: + return pivot + # must be in smaller + elif m > k: + return quickSelect(smaller, k) + #must be in larger + else: + return quickSelect(larger, k - (m + count)) diff --git a/sorts/bogosort.py b/sorts/bogosort.py index 2512dab51..ce1982c53 100644 --- a/sorts/bogosort.py +++ b/sorts/bogosort.py @@ -41,7 +41,7 @@ def bogosort(collection): if __name__ == '__main__': import sys - # For python 2.x and 3.x compatibility: 3.x has not raw_input builtin + # For python 2.x and 3.x compatibility: 3.x has no raw_input builtin # otherwise 2.x's input builtin function is too "smart" if sys.version_info.major < 3: input_function = raw_input diff --git a/sorts/bubble_sort.py b/sorts/bubble_sort.py index 54d69e5ba..d26adc89c 100644 --- a/sorts/bubble_sort.py +++ b/sorts/bubble_sort.py @@ -41,7 +41,7 @@ def bubble_sort(collection): if __name__ == '__main__': import sys - # For python 2.x and 3.x compatibility: 3.x has not raw_input builtin + # For python 2.x and 3.x compatibility: 3.x has no raw_input builtin # otherwise 2.x's input builtin function is too "smart" if sys.version_info.major < 3: input_function = raw_input diff --git a/sorts/cocktail_shaker_sort.py b/sorts/cocktail_shaker_sort.py index a21224632..c09d64408 100644 --- a/sorts/cocktail_shaker_sort.py +++ b/sorts/cocktail_shaker_sort.py @@ -23,7 +23,7 @@ def cocktail_shaker_sort(unsorted): if __name__ == '__main__': import sys - # For python 2.x and 3.x compatibility: 3.x has not raw_input builtin + # For python 2.x and 3.x compatibility: 3.x has no raw_input builtin # otherwise 2.x's input builtin function is too "smart" if sys.version_info.major < 3: input_function = raw_input diff --git a/sorts/counting_sort.py b/sorts/counting_sort.py new file mode 100644 index 000000000..13e4554ae --- /dev/null +++ b/sorts/counting_sort.py @@ -0,0 +1,72 @@ +""" +This is pure python implementation of counting sort algorithm +For doctests run following command: +python -m doctest -v counting_sort.py +or +python3 -m doctest -v counting_sort.py +For manual testing run: +python counting_sort.py +""" + +from __future__ import print_function + + +def counting_sort(collection): + """Pure implementation of counting sort algorithm in Python + :param collection: some mutable ordered collection with heterogeneous + comparable items inside + :return: the same collection ordered by ascending + Examples: + >>> counting_sort([0, 5, 3, 2, 2]) + [0, 2, 2, 3, 5] + >>> counting_sort([]) + [] + >>> counting_sort([-2, -5, -45]) + [-45, -5, -2] + """ + # if the collection is empty, returns empty + if collection == []: + return [] + + # get some information about the collection + coll_len = len(collection) + coll_max = max(collection) + coll_min = min(collection) + + # create the counting array + counting_arr_length = coll_max + 1 - coll_min + counting_arr = [0] * counting_arr_length + + # count how much a number appears in the collection + for number in collection: + counting_arr[number - coll_min] += 1 + + # sum each position with it's predecessors. now, counting_arr[i] tells + # us how many elements <= i has in the collection + for i in range(1, counting_arr_length): + counting_arr[i] = counting_arr[i] + counting_arr[i-1] + + # create the output collection + ordered = [0] * coll_len + + # place the elements in the output, respecting the original order (stable + # sort) from end to begin, updating counting_arr + for i in reversed(range(0, coll_len)): + ordered[counting_arr[collection[i] - coll_min]-1] = collection[i] + counting_arr[collection[i] - coll_min] -= 1 + + return ordered + + +if __name__ == '__main__': + import sys + # For python 2.x and 3.x compatibility: 3.x has not raw_input builtin + # otherwise 2.x's input builtin function is too "smart" + if sys.version_info.major < 3: + input_function = raw_input + else: + input_function = input + + user_input = input_function('Enter numbers separated by a comma:\n') + unsorted = [int(item) for item in user_input.split(',')] + print(counting_sort(unsorted)) diff --git a/sorts/gnome_sort.py b/sorts/gnome_sort.py index b353e31aa..4f04ff384 100644 --- a/sorts/gnome_sort.py +++ b/sorts/gnome_sort.py @@ -21,7 +21,7 @@ def gnome_sort(unsorted): if __name__ == '__main__': import sys - # For python 2.x and 3.x compatibility: 3.x has not raw_input builtin + # For python 2.x and 3.x compatibility: 3.x has no raw_input builtin # otherwise 2.x's input builtin function is too "smart" if sys.version_info.major < 3: input_function = raw_input diff --git a/sorts/insertion_sort.py b/sorts/insertion_sort.py index caaa9305c..33bd27c8f 100644 --- a/sorts/insertion_sort.py +++ b/sorts/insertion_sort.py @@ -41,7 +41,7 @@ def insertion_sort(collection): if __name__ == '__main__': import sys - # For python 2.x and 3.x compatibility: 3.x has not raw_input builtin + # For python 2.x and 3.x compatibility: 3.x has no raw_input builtin # otherwise 2.x's input builtin function is too "smart" if sys.version_info.major < 3: input_function = raw_input diff --git a/sorts/merge_sort.py b/sorts/merge_sort.py index 92a678016..ca8dbc33c 100644 --- a/sorts/merge_sort.py +++ b/sorts/merge_sort.py @@ -64,7 +64,7 @@ def merge_sort(collection): if __name__ == '__main__': import sys - # For python 2.x and 3.x compatibility: 3.x has not raw_input builtin + # For python 2.x and 3.x compatibility: 3.x has no raw_input builtin # otherwise 2.x's input builtin function is too "smart" if sys.version_info.major < 3: input_function = raw_input diff --git a/sorts/quick_sort.py b/sorts/quick_sort.py index 8974e1bd8..52e37b587 100644 --- a/sorts/quick_sort.py +++ b/sorts/quick_sort.py @@ -42,7 +42,7 @@ def quick_sort(ARRAY): if __name__ == '__main__': import sys - # For python 2.x and 3.x compatibility: 3.x has not raw_input builtin + # For python 2.x and 3.x compatibility: 3.x has no raw_input builtin # otherwise 2.x's input builtin function is too "smart" if sys.version_info.major < 3: input_function = raw_input diff --git a/sorts/radix_sort.py b/sorts/radix_sort.py index 82f8a38b4..f19bc10e8 100644 --- a/sorts/radix_sort.py +++ b/sorts/radix_sort.py @@ -2,19 +2,19 @@ def radixsort(lst): RADIX = 10 maxLength = False tmp , placement = -1, 1 - + while not maxLength: maxLength = True # declare and initialize buckets buckets = [list() for _ in range( RADIX )] - + # split lst between lists for i in lst: - tmp = i / placement + tmp = i // placement buckets[tmp % RADIX].append( i ) if maxLength and tmp > 0: maxLength = False - + # empty lists into lst array a = 0 for b in range( RADIX ): @@ -22,6 +22,6 @@ def radixsort(lst): for i in buck: lst[a] = i a += 1 - + # move to next placement *= RADIX diff --git a/sorts/selection_sort.py b/sorts/selection_sort.py index 14bc80463..752496e98 100644 --- a/sorts/selection_sort.py +++ b/sorts/selection_sort.py @@ -44,7 +44,7 @@ def selection_sort(collection): if __name__ == '__main__': import sys - # For python 2.x and 3.x compatibility: 3.x has not raw_input builtin + # For python 2.x and 3.x compatibility: 3.x has no raw_input builtin # otherwise 2.x's input builtin function is too "smart" if sys.version_info.major < 3: input_function = raw_input diff --git a/sorts/shell_sort.py b/sorts/shell_sort.py index fdb98a570..de3d84f72 100644 --- a/sorts/shell_sort.py +++ b/sorts/shell_sort.py @@ -45,7 +45,7 @@ def shell_sort(collection): if __name__ == '__main__': import sys - # For python 2.x and 3.x compatibility: 3.x has not raw_input builtin + # For python 2.x and 3.x compatibility: 3.x has no raw_input builtin # otherwise 2.x's input builtin function is too "smart" if sys.version_info.major < 3: input_function = raw_input diff --git a/sorts/timsort.py b/sorts/timsort.py new file mode 100644 index 000000000..8c75b5191 --- /dev/null +++ b/sorts/timsort.py @@ -0,0 +1,81 @@ +def binary_search(lst, item, start, end): + if start == end: + if lst[start] > item: + return start + else: + return start + 1 + if start > end: + return start + + mid = (start + end) // 2 + if lst[mid] < item: + return binary_search(lst, item, mid + 1, end) + elif lst[mid] > item: + return binary_search(lst, item, start, mid - 1) + else: + return mid + + +def insertion_sort(lst): + length = len(lst) + + for index in range(1, length): + value = lst[index] + pos = binary_search(lst, value, 0, index - 1) + lst = lst[:pos] + [value] + lst[pos:index] + lst[index+1:] + + return lst + + +def merge(left, right): + if not left: + return right + + if not right: + return left + + if left[0] < right[0]: + return [left[0]] + merge(left[1:], right) + + return [right[0]] + merge(left, right[1:]) + + +def timsort(lst): + runs, sorted_runs = [], [] + length = len(lst) + new_run = [lst[0]] + sorted_array = [] + + for i in range(1, length): + if i == length - 1: + new_run.append(lst[i]) + runs.append(new_run) + break + + if lst[i] < lst[i - 1]: + if not new_run: + runs.append([lst[i - 1]]) + new_run.append(lst[i]) + else: + runs.append(new_run) + new_run = [] + else: + new_run.append(lst[i]) + + for run in runs: + sorted_runs.append(insertion_sort(run)) + + for run in sorted_runs: + sorted_array = merge(sorted_array, run) + + return sorted_array + + +def main(): + + lst = [5,9,10,3,-4,5,178,92,46,-18,0,7] + sorted_lst = timsort(lst) + print(sorted_lst) + +if __name__ == '__main__': + main() diff --git a/traverals/binary_tree_traversals.py b/traversals/binary_tree_traversals.py similarity index 97% rename from traverals/binary_tree_traversals.py rename to traversals/binary_tree_traversals.py index 9cf118899..9d14a1e7e 100644 --- a/traverals/binary_tree_traversals.py +++ b/traversals/binary_tree_traversals.py @@ -84,7 +84,7 @@ if __name__ == '__main__': import sys print("\n********* Binary Tree Traversals ************\n") - # For python 2.x and 3.x compatibility: 3.x has not raw_input builtin + # For python 2.x and 3.x compatibility: 3.x has no raw_input builtin # otherwise 2.x's input builtin function is too "smart" if sys.version_info.major < 3: input_function = raw_input