diff --git a/Graphs/a_star.py b/Graphs/a_star.py new file mode 100644 index 000000000..2ca9476e5 --- /dev/null +++ b/Graphs/a_star.py @@ -0,0 +1,101 @@ + +grid = [[0, 1, 0, 0, 0, 0], + [0, 1, 0, 0, 0, 0],#0 are free path whereas 1's are obstacles + [0, 1, 0, 0, 0, 0], + [0, 1, 0, 0, 1, 0], + [0, 0, 0, 0, 1, 0]] + +''' +heuristic = [[9, 8, 7, 6, 5, 4], + [8, 7, 6, 5, 4, 3], + [7, 6, 5, 4, 3, 2], + [6, 5, 4, 3, 2, 1], + [5, 4, 3, 2, 1, 0]]''' + +init = [0, 0] +goal = [len(grid)-1, len(grid[0])-1] #all coordinates are given in format [y,x] +cost = 1 + +#the cost map which pushes the path closer to the goal +heuristic = [[0 for row in range(len(grid[0]))] for col in range(len(grid))] +for i in range(len(grid)): + for j in range(len(grid[0])): + heuristic[i][j] = abs(i - goal[0]) + abs(j - goal[1]) + if grid[i][j] == 1: + heuristic[i][j] = 99 #added extra penalty in the heuristic map + + +#the actions we can take +delta = [[-1, 0 ], # go up + [ 0, -1], # go left + [ 1, 0 ], # go down + [ 0, 1 ]] # go right + + +#function to search the path +def search(grid,init,goal,cost,heuristic): + + closed = [[0 for col in range(len(grid[0]))] for row in range(len(grid))]# the referrence grid + closed[init[0]][init[1]] = 1 + action = [[0 for col in range(len(grid[0]))] for row in range(len(grid))]#the action grid + + x = init[0] + y = init[1] + g = 0 + f = g + heuristic[init[0]][init[0]] + cell = [[f, g, x, y]] + + found = False # flag that is set when search is complete + resign = False # flag set if we can't find expand + + while not found and not resign: + if len(cell) == 0: + resign = True + return "FAIL" + else: + cell.sort()#to choose the least costliest action so as to move closer to the goal + cell.reverse() + next = cell.pop() + x = next[2] + y = next[3] + g = next[1] + f = next[0] + + + if x == goal[0] and y == goal[1]: + found = True + else: + for i in range(len(delta)):#to try out different valid actions + x2 = x + delta[i][0] + y2 = y + delta[i][1] + if x2 >= 0 and x2 < len(grid) and y2 >=0 and y2 < len(grid[0]): + if closed[x2][y2] == 0 and grid[x2][y2] == 0: + g2 = g + cost + f2 = g2 + heuristic[x2][y2] + cell.append([f2, g2, x2, y2]) + closed[x2][y2] = 1 + action[x2][y2] = i + invpath = [] + x = goal[0] + y = goal[1] + invpath.append([x, y])#we get the reverse path from here + while x != init[0] or y != init[1]: + x2 = x - delta[action[x][y]][0] + y2 = y - delta[action[x][y]][1] + x = x2 + y = y2 + invpath.append([x, y]) + + path = [] + for i in range(len(invpath)): + path.append(invpath[len(invpath) - 1 - i]) + print "ACTION MAP" + for i in range(len(action)): + print action[i] + + return path + +a = search(grid,init,goal,cost,heuristic) +for i in range(len(a)): + print a[i] + diff --git a/Graphs/basic-graphs.py b/Graphs/basic-graphs.py new file mode 100644 index 000000000..fc78e5652 --- /dev/null +++ b/Graphs/basic-graphs.py @@ -0,0 +1,267 @@ +# Accept No. of Nodes and edges +n, m = map(int, raw_input().split(" ")) + +# Initialising Dictionary of edges +g = {} +for i in xrange(n): + g[i + 1] = [] + +""" +-------------------------------------------------------------------------------- + Accepting edges of Unweighted Directed Graphs +-------------------------------------------------------------------------------- +""" +for _ in xrange(m): + x, y = map(int, raw_input().split(" ")) + g[x].append(y) + +""" +-------------------------------------------------------------------------------- + Accepting edges of Unweighted Undirected Graphs +-------------------------------------------------------------------------------- +""" +for _ in xrange(m): + x, y = map(int, raw_input().split(" ")) + g[x].append(y) + g[y].append(x) + +""" +-------------------------------------------------------------------------------- + Accepting edges of Weighted Undirected Graphs +-------------------------------------------------------------------------------- +""" +for _ in xrange(m): + x, y, r = map(int, raw_input().split(" ")) + g[x].append([y, r]) + g[y].append([x, r]) + +""" +-------------------------------------------------------------------------------- + Depth First Search. + Args : G - Dictionary of edges + s - Starting Node + Vars : vis - Set of visited nodes + S - Traversal Stack +-------------------------------------------------------------------------------- +""" + + +def dfs(G, s): + vis, S = set([s]), [s] + print s + while S: + flag = 0 + for i in G[S[-1]]: + if i not in vis: + S.append(i) + vis.add(i) + flag = 1 + print i + break + if not flag: + S.pop() + + +""" +-------------------------------------------------------------------------------- + Breadth First Search. + Args : G - Dictionary of edges + s - Starting Node + Vars : vis - Set of visited nodes + Q - Traveral Stack +-------------------------------------------------------------------------------- +""" +from collections import deque + + +def bfs(G, s): + vis, Q = set([s]), deque([s]) + print s + while Q: + u = Q.popleft() + for v in G[u]: + if v not in vis: + vis.add(v) + Q.append(v) + print v + + +""" +-------------------------------------------------------------------------------- + Dijkstra's shortest path Algorithm + Args : G - Dictionary of edges + s - Starting Node + Vars : dist - Dictionary storing shortest distance from s to every other node + known - Set of knows nodes + path - Preceding node in path +-------------------------------------------------------------------------------- +""" + + +def dijk(G, s): + dist, known, path = {s: 0}, set(), {s: 0} + while True: + if len(known) == len(G) - 1: + break + mini = 100000 + for i in dist: + if i not in known and dist[i] < mini: + mini = dist[i] + u = i + known.add(u) + for v in G[u]: + if v[0] not in known: + if dist[u] + v[1] < dist.get(v[0], 100000): + dist[v[0]] = dist[u] + v[1] + path[v[0]] = u + for i in dist: + if i != s: + print dist[i] + + +""" +-------------------------------------------------------------------------------- + Topological Sort +-------------------------------------------------------------------------------- +""" +from collections import deque + + +def topo(G, ind=None, Q=[1]): + if ind == None: + ind = [0] * (len(G) + 1) # SInce oth Index is ignored + for u in G: + for v in G[u]: + ind[v] += 1 + Q = deque() + for i in G: + if ind[i] == 0: + Q.append(i) + if len(Q) == 0: + return + v = Q.popleft() + print v + for w in G[v]: + ind[w] -= 1 + if ind[w] == 0: + Q.append(w) + topo(G, ind, Q) + + +""" +-------------------------------------------------------------------------------- + Reading an Adjacency matrix +-------------------------------------------------------------------------------- +""" + + +def adjm(): + n, a = input(), [] + for i in xrange(n): + a.append(map(int, raw_input().split())) + return a, n + + +""" +-------------------------------------------------------------------------------- + Floyd Warshall's algorithm + Args : G - Dictionary of edges + s - Starting Node + Vars : dist - Dictionary storing shortest distance from s to every other node + known - Set of knows nodes + path - Preceding node in path + +-------------------------------------------------------------------------------- +""" + + +def floy((A, n)): + dist = list(A) + path = [[0] * n for i in xrange(n)] + for k in xrange(n): + for i in xrange(n): + for j in xrange(n): + if dist[i][j] > dist[i][k] + dist[k][j]: + dist[i][j] = dist[i][k] + dist[k][j] + path[i][k] = k + print dist + + +""" +-------------------------------------------------------------------------------- + Prim's MST Algorithm + Args : G - Dictionary of edges + s - Starting Node + Vars : dist - Dictionary storing shortest distance from s to nearest node + known - Set of knows nodes + path - Preceding node in path +-------------------------------------------------------------------------------- +""" + + +def prim(G, s): + dist, known, path = {s: 0}, set(), {s: 0} + while True: + if len(known) == len(G) - 1: + break + mini = 100000 + for i in dist: + if i not in known and dist[i] < mini: + mini = dist[i] + u = i + known.add(u) + for v in G[u]: + if v[0] not in known: + if v[1] < dist.get(v[0], 100000): + dist[v[0]] = v[1] + path[v[0]] = u + + +""" +-------------------------------------------------------------------------------- + Accepting Edge list + Vars : n - Number of nodes + m - Number of edges + Returns : l - Edge list + n - Number of Nodes +-------------------------------------------------------------------------------- +""" + + +def edglist(): + n, m = map(int, raw_input().split(" ")) + l = [] + for i in xrange(m): + l.append(map(int, raw_input().split(' '))) + return l, n + + +""" +-------------------------------------------------------------------------------- + Kruskal's MST Algorithm + Args : E - Edge list + n - Number of Nodes + Vars : s - Set of all nodes as unique disjoint sets (initially) +-------------------------------------------------------------------------------- +""" + + +def krusk((E, n)): + # Sort edges on the basis of distance + E.sort(reverse=True, key=lambda x: x[2]) + s = [set([i]) for i in range(1, n + 1)] + while True: + if len(s) == 1: + break + print s + x = E.pop() + for i in xrange(len(s)): + if x[0] in s[i]: + break + for j in xrange(len(s)): + if x[1] in s[j]: + if i == j: + break + s[j].update(s[i]) + s.pop(i) + break diff --git a/Graphs/minimum_spanning_tree_kruskal.py b/Graphs/minimum_spanning_tree_kruskal.py new file mode 100644 index 000000000..d26eb70b2 --- /dev/null +++ b/Graphs/minimum_spanning_tree_kruskal.py @@ -0,0 +1,31 @@ +num_nodes, num_edges = list(map(int,input().split())) + +edges = [] + +for i in range(num_edges): + node1, node2, cost = list(map(int,input().split())) + edges.append((i,node1,node2,cost)) + +edges = sorted(edges, key=lambda edge: edge[3]) + +parent = [i for i in range(num_nodes)] + +def find_parent(i): + if(i != parent[i]): + parent[i] = find_parent(parent[i]) + return parent[i] + +minimum_spanning_tree_cost = 0 +minimum_spanning_tree = [] + +for edge in edges: + parent_a = find_parent(edge[1]) + parent_b = find_parent(edge[2]) + if(parent_a != parent_b): + minimum_spanning_tree_cost += edge[3] + minimum_spanning_tree.append(edge) + parent[parent_a] = parent_b + +print(minimum_spanning_tree_cost) +for edge in minimum_spanning_tree: + print(edge) diff --git a/Graphs/scc_kosaraju.py b/Graphs/scc_kosaraju.py new file mode 100644 index 000000000..09a05e981 --- /dev/null +++ b/Graphs/scc_kosaraju.py @@ -0,0 +1,45 @@ +# n - no of nodes, m - no of edges +n, m = list(map(int,input().split())) + +g = [[] for i in range(n)] #graph +r = [[] for i in range(n)] #reversed graph +# input graph data (edges) +for i in range(m): + u, v = list(map(int,input().split())) + g[u].append(v) + r[v].append(u) + +stack = [] +visit = [False]*n +scc = [] +component = [] + +def dfs(u): + global g, r, scc, component, visit, stack + if visit[u]: return + visit[u] = True + for v in g[u]: + dfs(v) + stack.append(u) + +def dfs2(u): + global g, r, scc, component, visit, stack + if visit[u]: return + visit[u] = True + component.append(u) + for v in r[u]: + dfs2(v) + +def kosaraju(): + global g, r, scc, component, visit, stack + for i in range(n): + dfs(i) + visit = [False]*n + for i in stack[::-1]: + if visit[i]: continue + component = [] + dfs2(i) + scc.append(component) + return scc + +print(kosaraju()) diff --git a/Multi_Hueristic_Astar.py b/Multi_Hueristic_Astar.py new file mode 100644 index 000000000..03652d35a --- /dev/null +++ b/Multi_Hueristic_Astar.py @@ -0,0 +1,262 @@ +import heapq +import numpy as np +import math +import copy + + +class PriorityQueue: + def __init__(self): + self.elements = [] + self.set = set() + + def minkey(self): + if not self.empty(): + return self.elements[0][0] + else: + return float('inf') + + def empty(self): + return len(self.elements) == 0 + + def put(self, item, priority): + if item not in self.set: + heapq.heappush(self.elements, (priority, item)) + self.set.add(item) + else: + # update + # print("update", item) + temp = [] + (pri, x) = heapq.heappop(self.elements) + while x != item: + temp.append((pri, x)) + (pri, x) = heapq.heappop(self.elements) + temp.append((priority, item)) + for (pro, xxx) in temp: + heapq.heappush(self.elements, (pro, xxx)) + + def remove_element(self, item): + if item in self.set: + self.set.remove(item) + temp = [] + (pro, x) = heapq.heappop(self.elements) + while x != item: + temp.append((pro, x)) + (pro, x) = heapq.heappop(self.elements) + for (prito, yyy) in temp: + heapq.heappush(self.elements, (prito, yyy)) + + def top_show(self): + return self.elements[0][1] + + def get(self): + (priority, item) = heapq.heappop(self.elements) + self.set.remove(item) + return (priority, item) + +def consistent_hueristic(P, goal): + # euclidean distance + a = np.array(P) + b = np.array(goal) + return np.linalg.norm(a - b) + +def hueristic_2(P, goal): + # integer division by time variable + return consistent_hueristic(P, goal) // t + +def hueristic_1(P, goal): + # manhattan distance + return abs(P[0] - goal[0]) + abs(P[1] - goal[1]) + +def key(start, i, goal, g_function): + ans = g_function[start] + W1 * hueristics[i](start, goal) + return ans + +def do_something(back_pointer, goal, start): + grid = np.chararray((n, n)) + for i in range(n): + for j in range(n): + grid[i][j] = '*' + + for i in range(n): + for j in range(n): + if (j, (n-1)-i) in blocks: + grid[i][j] = "#" + + grid[0][(n-1)] = "-" + x = back_pointer[goal] + while x != start: + (x_c, y_c) = x + # print(x) + grid[(n-1)-y_c][x_c] = "-" + x = back_pointer[x] + grid[(n-1)][0] = "-" + + + for i in xrange(n): + for j in range(n): + if (i, j) == (0, n-1): + print grid[i][j], + print "<-- End position", + else: + print grid[i][j], + print + print("^") + print("Start position") + print + print("# is an obstacle") + print("- is the path taken by algorithm") + print("PATH TAKEN BY THE ALGORITHM IS:-") + x = back_pointer[goal] + while x != start: + print x, + x = back_pointer[x] + print x + quit() + +def valid(p): + if p[0] < 0 or p[0] > n-1: + return False + if p[1] < 0 or p[1] > n-1: + return False + return True + +def expand_state(s, j, visited, g_function, close_list_anchor, close_list_inad, open_list, back_pointer): + for itera in range(n_hueristic): + open_list[itera].remove_element(s) + # print("s", s) + # print("j", j) + (x, y) = s + left = (x-1, y) + right = (x+1, y) + up = (x, y+1) + down = (x, y-1) + + for neighbours in [left, right, up, down]: + if neighbours not in blocks: + if valid(neighbours) and neighbours not in visited: + # print("neighbour", neighbours) + visited.add(neighbours) + back_pointer[neighbours] = -1 + g_function[neighbours] = float('inf') + + if valid(neighbours) and g_function[neighbours] > g_function[s] + 1: + g_function[neighbours] = g_function[s] + 1 + back_pointer[neighbours] = s + if neighbours not in close_list_anchor: + open_list[0].put(neighbours, key(neighbours, 0, goal, g_function)) + if neighbours not in close_list_inad: + for var in range(1,n_hueristic): + if key(neighbours, var, goal, g_function) <= W2 * key(neighbours, 0, goal, g_function): + # print("why not plssssssssss") + open_list[j].put(neighbours, key(neighbours, var, goal, g_function)) + + + # print + +def make_common_ground(): + some_list = [] + # block 1 + for x in range(1, 5): + for y in range(1, 6): + some_list.append((x, y)) + + # line + for x in range(15, 20): + some_list.append((x, 17)) + + # block 2 big + for x in range(10, 19): + for y in range(1, 15): + some_list.append((x, y)) + + # L block + for x in range(1, 4): + for y in range(12, 19): + some_list.append((x, y)) + for x in range(3, 13): + for y in range(16, 19): + some_list.append((x, y)) + return some_list + +hueristics = {0: consistent_hueristic, 1: hueristic_1, 2: hueristic_2} + +blocks_blk = [(0, 1),(1, 1),(2, 1),(3, 1),(4, 1),(5, 1),(6, 1),(7, 1),(8, 1),(9, 1),(10, 1),(11, 1),(12, 1),(13, 1),(14, 1),(15, 1),(16, 1),(17, 1),(18, 1), (19, 1)] +blocks_no = [] +blocks_all = make_common_ground() + + + + +blocks = blocks_blk +# hyper parameters +W1 = 1 +W2 = 1 +n = 20 +n_hueristic = 3 # one consistent and two other inconsistent + +# start and end destination +start = (0, 0) +goal = (n-1, n-1) + +t = 1 +def multi_a_star(start, goal, n_hueristic): + g_function = {start: 0, goal: float('inf')} + back_pointer = {start:-1, goal:-1} + open_list = [] + visited = set() + + for i in range(n_hueristic): + open_list.append(PriorityQueue()) + open_list[i].put(start, key(start, i, goal, g_function)) + + close_list_anchor = [] + close_list_inad = [] + while open_list[0].minkey() < float('inf'): + for i in range(1, n_hueristic): + # print("i", i) + # print(open_list[0].minkey(), open_list[i].minkey()) + if open_list[i].minkey() <= W2 * open_list[0].minkey(): + global t + t += 1 + # print("less prio") + if g_function[goal] <= open_list[i].minkey(): + if g_function[goal] < float('inf'): + do_something(back_pointer, goal, start) + else: + _, get_s = open_list[i].top_show() + visited.add(get_s) + expand_state(get_s, i, visited, g_function, close_list_anchor, close_list_inad, open_list, back_pointer) + close_list_inad.append(get_s) + else: + # print("more prio") + if g_function[goal] <= open_list[0].minkey(): + if g_function[goal] < float('inf'): + do_something(back_pointer, goal, start) + else: + # print("hoolla") + get_s = open_list[0].top_show() + visited.add(get_s) + expand_state(get_s, 0, visited, g_function, close_list_anchor, close_list_inad, open_list, back_pointer) + close_list_anchor.append(get_s) + print("No path found to goal") + print + for i in range(n-1,-1, -1): + for j in range(n): + if (j, i) in blocks: + print '#', + elif (j, i) in back_pointer: + if (j, i) == (n-1, n-1): + print '*', + else: + print '-', + else: + print '*', + if (j, i) == (n-1, n-1): + print '<-- End position', + print + print("^") + print("Start position") + print + print("# is an obstacle") + print("- is the path taken by algorithm") +multi_a_star(start, goal, n_hueristic) \ No newline at end of file diff --git a/Neural_Network/convolution_neural_network.py b/Neural_Network/convolution_neural_network.py new file mode 100644 index 000000000..d8ab0d2e5 --- /dev/null +++ b/Neural_Network/convolution_neural_network.py @@ -0,0 +1,305 @@ +#-*- coding: utf-8 -*- + +''' + - - - - - -- - - - - - - - - - - - - - - - - - - - - - - + Name - - CNN - Convolution Neural Network For Photo Recognizing + Goal - - Recognize Handing Writting Word Photo + Detail:Total 5 layers neural network + * Convolution layer + * Pooling layer + * Input layer layer of BP + * Hiden layer of BP + * Output layer of BP + Author: Stephen Lee + Github: 245885195@qq.com + Date: 2017.9.20 + - - - - - -- - - - - - - - - - - - - - - - - - - - - - - + ''' + +import numpy as np +import matplotlib.pyplot as plt + +class CNN(): + + def __init__(self,conv1_get,size_p1,bp_num1,bp_num2,bp_num3,rate_w=0.2,rate_t=0.2): + ''' + :param conv1_get: [a,c,d],size, number, step of convolution kernel + :param size_p1: pooling size + :param bp_num1: units number of flatten layer + :param bp_num2: units number of hidden layer + :param bp_num3: units number of output layer + :param rate_w: rate of weight learning + :param rate_t: rate of threshold learning + ''' + self.num_bp1 = bp_num1 + self.num_bp2 = bp_num2 + self.num_bp3 = bp_num3 + self.conv1 = conv1_get[:2] + self.step_conv1 = conv1_get[2] + self.size_pooling1 = size_p1 + self.rate_weight = rate_w + self.rate_thre = rate_t + self.w_conv1 = [np.mat(-1*np.random.rand(self.conv1[0],self.conv1[0])+0.5) for i in range(self.conv1[1])] + self.wkj = np.mat(-1 * np.random.rand(self.num_bp3, self.num_bp2) + 0.5) + self.vji = np.mat(-1*np.random.rand(self.num_bp2, self.num_bp1)+0.5) + self.thre_conv1 = -2*np.random.rand(self.conv1[1])+1 + self.thre_bp2 = -2*np.random.rand(self.num_bp2)+1 + self.thre_bp3 = -2*np.random.rand(self.num_bp3)+1 + + + def save_model(self,save_path): + #save model dict with pickle + import pickle + model_dic = {'num_bp1':self.num_bp1, + 'num_bp2':self.num_bp2, + 'num_bp3':self.num_bp3, + 'conv1':self.conv1, + 'step_conv1':self.step_conv1, + 'size_pooling1':self.size_pooling1, + 'rate_weight':self.rate_weight, + 'rate_thre':self.rate_thre, + 'w_conv1':self.w_conv1, + 'wkj':self.wkj, + 'vji':self.vji, + 'thre_conv1':self.thre_conv1, + 'thre_bp2':self.thre_bp2, + 'thre_bp3':self.thre_bp3} + with open(save_path, 'wb') as f: + pickle.dump(model_dic, f) + + print('Model saved: %s'% save_path) + + @classmethod + def ReadModel(cls,model_path): + #read saved model + import pickle + with open(model_path, 'rb') as f: + model_dic = pickle.load(f) + + conv_get= model_dic.get('conv1') + conv_get.append(model_dic.get('step_conv1')) + size_p1 = model_dic.get('size_pooling1') + bp1 = model_dic.get('num_bp1') + bp2 = model_dic.get('num_bp2') + bp3 = model_dic.get('num_bp3') + r_w = model_dic.get('rate_weight') + r_t = model_dic.get('rate_thre') + #create model instance + conv_ins = CNN(conv_get,size_p1,bp1,bp2,bp3,r_w,r_t) + #modify model parameter + conv_ins.w_conv1 = model_dic.get('w_conv1') + conv_ins.wkj = model_dic.get('wkj') + conv_ins.vji = model_dic.get('vji') + conv_ins.thre_conv1 = model_dic.get('thre_conv1') + conv_ins.thre_bp2 = model_dic.get('thre_bp2') + conv_ins.thre_bp3 = model_dic.get('thre_bp3') + return conv_ins + + + def sig(self,x): + return 1 / (1 + np.exp(-1*x)) + + def do_round(self,x): + return round(x, 3) + + def convolute(self,data,convs,w_convs,thre_convs,conv_step): + #convolution process + size_conv = convs[0] + num_conv =convs[1] + size_data = np.shape(data)[0] + #get the data slice of original image data, data_focus + data_focus = [] + for i_focus in range(0, size_data - size_conv + 1, conv_step): + for j_focus in range(0, size_data - size_conv + 1, conv_step): + focus = data[i_focus:i_focus + size_conv, j_focus:j_focus + size_conv] + data_focus.append(focus) + #caculate the feature map of every single kernel, and saved as list of matrix + data_featuremap = [] + Size_FeatureMap = int((size_data - size_conv) / conv_step + 1) + for i_map in range(num_conv): + featuremap = [] + for i_focus in range(len(data_focus)): + net_focus = np.sum(np.multiply(data_focus[i_focus], w_convs[i_map])) - thre_convs[i_map] + featuremap.append(self.sig(net_focus)) + featuremap = np.asmatrix(featuremap).reshape(Size_FeatureMap, Size_FeatureMap) + data_featuremap.append(featuremap) + + #expanding the data slice to One dimenssion + focus1_list = [] + for each_focus in data_focus: + focus1_list.extend(self.Expand_Mat(each_focus)) + focus_list = np.asarray(focus1_list) + return focus_list,data_featuremap + + def pooling(self,featuremaps,size_pooling,type='average_pool'): + #pooling process + size_map = len(featuremaps[0]) + size_pooled = int(size_map/size_pooling) + featuremap_pooled = [] + for i_map in range(len(featuremaps)): + map = featuremaps[i_map] + map_pooled = [] + for i_focus in range(0,size_map,size_pooling): + for j_focus in range(0, size_map, size_pooling): + focus = map[i_focus:i_focus + size_pooling, j_focus:j_focus + size_pooling] + if type == 'average_pool': + #average pooling + map_pooled.append(np.average(focus)) + elif type == 'max_pooling': + #max pooling + map_pooled.append(np.max(focus)) + map_pooled = np.asmatrix(map_pooled).reshape(size_pooled,size_pooled) + featuremap_pooled.append(map_pooled) + return featuremap_pooled + + def _expand(self,datas): + #expanding three dimension data to one dimension list + data_expanded = [] + for i in range(len(datas)): + shapes = np.shape(datas[i]) + data_listed = datas[i].reshape(1,shapes[0]*shapes[1]) + data_listed = data_listed.getA().tolist()[0] + data_expanded.extend(data_listed) + data_expanded = np.asarray(data_expanded) + return data_expanded + + def _expand_mat(self,data_mat): + #expanding matrix to one dimension list + data_mat = np.asarray(data_mat) + shapes = np.shape(data_mat) + data_expanded = data_mat.reshape(1,shapes[0]*shapes[1]) + return data_expanded + + def _calculate_gradient_from_pool(self,out_map,pd_pool,num_map,size_map,size_pooling): + ''' + calcluate the gradient from the data slice of pool layer + pd_pool: list of matrix + out_map: the shape of data slice(size_map*size_map) + return: pd_all: list of matrix, [num, size_map, size_map] + ''' + pd_all = [] + i_pool = 0 + for i_map in range(num_map): + pd_conv1 = np.ones((size_map, size_map)) + for i in range(0, size_map, size_pooling): + for j in range(0, size_map, size_pooling): + pd_conv1[i:i + size_pooling, j:j + size_pooling] = pd_pool[i_pool] + i_pool = i_pool + 1 + pd_conv2 = np.multiply(pd_conv1,np.multiply(out_map[i_map],(1-out_map[i_map]))) + pd_all.append(pd_conv2) + return pd_all + + def trian(self,patterns,datas_train, datas_teach, n_repeat, error_accuracy,draw_e = bool): + #model traning + print('----------------------Start Training-------------------------') + print(' - - Shape: Train_Data ',np.shape(datas_train)) + print(' - - Shape: Teach_Data ',np.shape(datas_teach)) + rp = 0 + all_mse = [] + mse = 10000 + while rp < n_repeat and mse >= error_accuracy: + alle = 0 + print('-------------Learning Time %d--------------'%rp) + for p in range(len(datas_train)): + #print('------------Learning Image: %d--------------'%p) + data_train = np.asmatrix(datas_train[p]) + data_teach = np.asarray(datas_teach[p]) + data_focus1,data_conved1 = self.convolute(data_train,self.conv1,self.w_conv1, + self.thre_conv1,conv_step=self.step_conv1) + data_pooled1 = self.pooling(data_conved1,self.size_pooling1) + shape_featuremap1 = np.shape(data_conved1) + ''' + print(' -----original shape ', np.shape(data_train)) + print(' ---- after convolution ',np.shape(data_conv1)) + print(' -----after pooling ',np.shape(data_pooled1)) + ''' + data_bp_input = self._expand(data_pooled1) + bp_out1 = data_bp_input + + bp_net_j = np.dot(bp_out1,self.vji.T) - self.thre_bp2 + bp_out2 = self.sig(bp_net_j) + bp_net_k = np.dot(bp_out2 ,self.wkj.T) - self.thre_bp3 + bp_out3 = self.sig(bp_net_k) + + #--------------Model Leaning ------------------------ + # calcluate error and gradient--------------- + pd_k_all = np.multiply((data_teach - bp_out3), np.multiply(bp_out3, (1 - bp_out3))) + pd_j_all = np.multiply(np.dot(pd_k_all,self.wkj), np.multiply(bp_out2, (1 - bp_out2))) + pd_i_all = np.dot(pd_j_all,self.vji) + + pd_conv1_pooled = pd_i_all / (self.size_pooling1*self.size_pooling1) + pd_conv1_pooled = pd_conv1_pooled.T.getA().tolist() + pd_conv1_all = self._calculate_gradient_from_pool(data_conved1,pd_conv1_pooled,shape_featuremap1[0], + shape_featuremap1[1],self.size_pooling1) + #weight and threshold learning process--------- + #convolution layer + for k_conv in range(self.conv1[1]): + pd_conv_list = self._expand_mat(pd_conv1_all[k_conv]) + delta_w = self.rate_weight * np.dot(pd_conv_list,data_focus1) + + self.w_conv1[k_conv] = self.w_conv1[k_conv] + delta_w.reshape((self.conv1[0],self.conv1[0])) + + self.thre_conv1[k_conv] = self.thre_conv1[k_conv] - np.sum(pd_conv1_all[k_conv]) * self.rate_thre + #all connected layer + self.wkj = self.wkj + pd_k_all.T * bp_out2 * self.rate_weight + self.vji = self.vji + pd_j_all.T * bp_out1 * self.rate_weight + self.thre_bp3 = self.thre_bp3 - pd_k_all * self.rate_thre + self.thre_bp2 = self.thre_bp2 - pd_j_all * self.rate_thre + # calculate the sum error of all single image + errors = np.sum(abs((data_teach - bp_out3))) + alle = alle + errors + #print(' ----Teach ',data_teach) + #print(' ----BP_output ',bp_out3) + rp = rp + 1 + mse = alle/patterns + all_mse.append(mse) + def draw_error(): + yplot = [error_accuracy for i in range(int(n_repeat * 1.2))] + plt.plot(all_mse, '+-') + plt.plot(yplot, 'r--') + plt.xlabel('Learning Times') + plt.ylabel('All_mse') + plt.grid(True, alpha=0.5) + plt.show() + print('------------------Training Complished---------------------') + print(' - - Training epoch: ', rp, ' - - Mse: %.6f' % mse) + if draw_e: + draw_error() + return mse + + def predict(self,datas_test): + #model predict + produce_out = [] + print('-------------------Start Testing-------------------------') + print(' - - Shape: Test_Data ',np.shape(datas_test)) + for p in range(len(datas_test)): + data_test = np.asmatrix(datas_test[p]) + data_focus1, data_conved1 = self.convolute(data_test, self.conv1, self.w_conv1, + self.thre_conv1, conv_step=self.step_conv1) + data_pooled1 = self.pooling(data_conved1, self.size_pooling1) + data_bp_input = self._expand(data_pooled1) + + bp_out1 = data_bp_input + bp_net_j = bp_out1 * self.vji.T - self.thre_bp2 + bp_out2 = self.sig(bp_net_j) + bp_net_k = bp_out2 * self.wkj.T - self.thre_bp3 + bp_out3 = self.sig(bp_net_k) + produce_out.extend(bp_out3.getA().tolist()) + res = [list(map(self.do_round,each)) for each in produce_out] + return np.asarray(res) + + def convolution(self,data): + #return the data of image after convoluting process so we can check it out + data_test = np.asmatrix(data) + data_focus1, data_conved1 = self.convolute(data_test, self.conv1, self.w_conv1, + self.thre_conv1, conv_step=self.step_conv1) + data_pooled1 = self.pooling(data_conved1, self.size_pooling1) + + return data_conved1,data_pooled1 + + +if __name__ == '__main__': + pass + ''' + I will put the example on other file + ''' \ No newline at end of file diff --git a/Project Euler/Problem 01/sol1.py b/Project Euler/Problem 01/sol1.py new file mode 100644 index 000000000..512154e29 --- /dev/null +++ b/Project Euler/Problem 01/sol1.py @@ -0,0 +1,12 @@ +''' +Problem Statement: +If we list all the natural numbers below 10 that are multiples of 3 or 5, +we get 3,5,6 and 9. The sum of these multiples is 23. +Find the sum of all the multiples of 3 or 5 below N. +''' +n = int(raw_input().strip()) +sum=0; +for a in range(3,n): + if(a%3==0 or a%5==0): + sum+=a +print sum; \ No newline at end of file diff --git a/Project Euler/Problem 01/sol2.py b/Project Euler/Problem 01/sol2.py new file mode 100644 index 000000000..5e368c220 --- /dev/null +++ b/Project Euler/Problem 01/sol2.py @@ -0,0 +1,15 @@ +''' +Problem Statement: +If we list all the natural numbers below 10 that are multiples of 3 or 5, +we get 3,5,6 and 9. The sum of these multiples is 23. +Find the sum of all the multiples of 3 or 5 below N. +''' +n = int(raw_input().strip()) +sum = 0 +terms = (n-1)/3 +sum+= ((terms)*(6+(terms-1)*3))/2 #sum of an A.P. +terms = (n-1)/5 +sum+= ((terms)*(10+(terms-1)*5))/2 +terms = (n-1)/15 +sum-= ((terms)*(30+(terms-1)*15))/2 +print sum \ No newline at end of file diff --git a/Project Euler/Problem 01/sol3.py b/Project Euler/Problem 01/sol3.py new file mode 100644 index 000000000..0caa30a53 --- /dev/null +++ b/Project Euler/Problem 01/sol3.py @@ -0,0 +1,42 @@ +''' +Problem Statement: +If we list all the natural numbers below 10 that are multiples of 3 or 5, +we get 3,5,6 and 9. The sum of these multiples is 23. +Find the sum of all the multiples of 3 or 5 below N. +''' +''' +This solution is based on the pattern that the successive numbers in the series follow: 0+3,+2,+1,+3,+1,+2,+3. +''' +n = int(raw_input().strip()) +sum=0; +num=0; +while(1): + num+=3 + if(num>=n): + break + sum+=num + num+=2 + if(num>=n): + break + sum+=num + num+=1 + if(num>=n): + break + sum+=num + num+=3 + if(num>=n): + break + sum+=num + num+=1 + if(num>=n): + break + sum+=num + num+=2 + if(num>=n): + break + sum+=num + num+=3 + if(num>=n): + break + sum+=num +print sum; \ No newline at end of file diff --git a/Project Euler/Problem 02/sol1.py b/Project Euler/Problem 02/sol1.py new file mode 100644 index 000000000..6cf520767 --- /dev/null +++ b/Project Euler/Problem 02/sol1.py @@ -0,0 +1,18 @@ +''' +Problem: +Each new term in the Fibonacci sequence is generated by adding the previous two terms. By starting with 1 and 2, +the first 10 terms will be: + 1,2,3,5,8,13,21,34,55,89,.. +By considering the terms in the Fibonacci sequence whose values do not exceed n, find the sum of the even-valued terms. +e.g. for n=10, we have {2,8}, sum is 10. +''' + +n = int(raw_input().strip()) +i=1; j=2; sum=0 +while(j<=n): + if((j&1)==0): #can also use (j%2==0) + sum+=j + temp=i + i=j + j=temp+i +print sum \ No newline at end of file diff --git a/Project Euler/Problem 03/sol1.py b/Project Euler/Problem 03/sol1.py new file mode 100644 index 000000000..bd3e237e7 --- /dev/null +++ b/Project Euler/Problem 03/sol1.py @@ -0,0 +1,38 @@ +''' +Problem: +The prime factors of 13195 are 5,7,13 and 29. What is the largest prime factor of a given number N? +e.g. for 10, largest prime factor = 5. For 17, largest prime factor = 17. +''' + +import math + +def isprime(no): + if(no==2): + return True + elif (no%2==0): + return False + sq = int(math.sqrt(no))+1 + for i in range(3,sq,2): + if(no%i==0): + return False + return True + +max=0 +n=int(input()) +if(isprime(n)): + print n +else: + while (n%2==0): + n=n/2 + if(isprime(n)): + print n + else: + n1 = int(math.sqrt(n))+1 + for i in range(3,n1,2): + if(n%i==0): + if(isprime(n/i)): + max=n/i + break + elif(isprime(i)): + max=i + print max diff --git a/Project Euler/Problem 03/sol2.py b/Project Euler/Problem 03/sol2.py new file mode 100644 index 000000000..2577892c4 --- /dev/null +++ b/Project Euler/Problem 03/sol2.py @@ -0,0 +1,16 @@ +''' +Problem: +The prime factors of 13195 are 5,7,13 and 29. What is the largest prime factor of a given number N? +e.g. for 10, largest prime factor = 5. For 17, largest prime factor = 17. +''' +n=int(input()) +prime=1 +i=2 +while(i*i<=n): + while(n%i==0): + prime=i + n/=i + i+=1 +if(n>1): + prime=n +print prime diff --git a/Project Euler/Problem 04/sol1.py b/Project Euler/Problem 04/sol1.py new file mode 100644 index 000000000..f8ed832d8 --- /dev/null +++ b/Project Euler/Problem 04/sol1.py @@ -0,0 +1,15 @@ +''' +Problem: +A palindromic number reads the same both ways. The largest palindrome made from the product of two 2-digit numbers is 9009 = 91 x 99. +Find the largest palindrome made from the product of two 3-digit numbers which is less than N. +''' +n=int(input()) +for i in range(n-1,10000,-1): + temp=str(i) + if(temp==temp[::-1]): + j=999 + while(j!=99): + if((i%j==0) and (len(str(i/j))==3)): + print i + exit(0) + j-=1 diff --git a/Project Euler/Problem 04/sol2.py b/Project Euler/Problem 04/sol2.py new file mode 100644 index 000000000..4d2006242 --- /dev/null +++ b/Project Euler/Problem 04/sol2.py @@ -0,0 +1,18 @@ +''' +Problem: +A palindromic number reads the same both ways. The largest palindrome made from the product of two 2-digit numbers is 9009 = 91 x 99. +Find the largest palindrome made from the product of two 3-digit numbers which is less than N. +''' +arr = [] +for i in range(999,100,-1): + for j in range(999,100,-1): + t = str(i*j) + if t == t[::-1]: + arr.append(i*j) +arr.sort() + +n=int(input()) +for i in arr[::-1]: + if(i ### All algorithms implemented in Python (for education) @@ -128,6 +128,13 @@ The method is named after **Julius Caesar**, who used it in his private correspo The encryption step performed by a Caesar cipher is often incorporated as part of more complex schemes, such as the Vigenère cipher, and still has modern application in the ROT13 system. As with all single-alphabet substitution ciphers, the Caesar cipher is easily broken and in modern practice offers essentially no communication security. ###### Source: [Wikipedia](https://en.wikipedia.org/wiki/Caesar_cipher) +### Vigenère +The **Vigenère cipher** is a method of encrypting alphabetic text by using a series of **interwoven Caesar ciphers** based on the letters of a keyword. It is **a form of polyalphabetic substitution**.
+The Vigenère cipher has been reinvented many times. The method was originally described by Giovan Battista Bellaso in his 1553 book La cifra del. Sig. Giovan Battista Bellaso; however, the scheme was later misattributed to Blaise de Vigenère in the 19th century, and is now widely known as the "Vigenère cipher".
+Though the cipher is easy to understand and implement, for three centuries it resisted all attempts to break it; this earned it the description **le chiffre indéchiffrable**(French for 'the indecipherable cipher'). +Many people have tried to implement encryption schemes that are essentially Vigenère ciphers. Friedrich Kasiski was the first to publish a general method of deciphering a Vigenère cipher in 1863. +###### Source: [Wikipedia](https://en.wikipedia.org/wiki/Vigen%C3%A8re_cipher) + ### Transposition In cryptography, a **transposition cipher** is a method of encryption by which the positions held by units of plaintext (which are commonly characters or groups of characters) are shifted according to a regular system, so that the ciphertext constitutes a permutation of the plaintext. That is, the order of the units is changed (the plaintext is reordered).
Mathematically a bijective function is used on the characters' positions to encrypt and an inverse function to decrypt. diff --git a/ciphers/rot13.py b/ciphers/rot13.py new file mode 100644 index 000000000..208de4890 --- /dev/null +++ b/ciphers/rot13.py @@ -0,0 +1,24 @@ +def dencrypt(s, n): + out = '' + for c in s: + if c >= 'A' and c <= 'Z': + out += chr(ord('A') + (ord(c) - ord('A') + n) % 26) + elif c >= 'a' and c <= 'z': + out += chr(ord('a') + (ord(c) - ord('a') + n) % 26) + else: + out += c + return out + + +def main(): + s0 = 'HELLO' + + s1 = dencrypt(s0, 13) + print(s1) # URYYB + + s2 = dencrypt(s1, 13) + print(s2) # HELLO + + +if __name__ == '__main__': + main() diff --git a/data_structures/AVL/AVL.py b/data_structures/AVL/AVL.py index 0e3008dc1..9717f1b6e 100644 --- a/data_structures/AVL/AVL.py +++ b/data_structures/AVL/AVL.py @@ -7,40 +7,42 @@ class Node: def __init__(self, label): self.label = label - self.left = None - self.rigt = None - self.parent = None + self._parent = None + self._left = None + self._right = None self.height = 0 - def getLabel(self): - return self.label + @property + def right(self): + return self._right - def setLabel(self, label): - self.label = label + @right.setter + def right(self, node): + if node is not None: + node._parent = self + self._right = node - def getLeft(self): - return self.left + @property + def left(self): + return self._left - def setLeft(self, left): - self.left = left + @left.setter + def left(self, node): + if node is not None: + node._parent = self + self._left = node - def getRight(self): - return self.rigt + @property + def parent(self): + return self._parent - def setRight(self, right): - self.rigt = right - - def getParent(self): - return self.parent - - def setParent(self, parent): - self.parent = parent - - def setHeight(self, height): - self.height = height - - def getHeight(self, height): - return self.height + @parent.setter + def parent(self, node): + if node is not None: + self._parent = node + self.height = self.parent.height + 1 + else: + self.height = 0 class AVL: @@ -51,8 +53,10 @@ class AVL: def insert(self, value): node = Node(value) + if self.root is None: self.root = node + self.root.height = 0 self.size = 1 else: # Same as Binary Tree @@ -64,63 +68,77 @@ class AVL: dad_node = curr_node - if node.getLabel() < curr_node.getLabel(): - curr_node = curr_node.getLeft() + if node.label < curr_node.label: + curr_node = curr_node.left else: - curr_node = curr_node.getRight() + curr_node = curr_node.right else: - if node.getLabel() < dad_node.getLabel(): - dad_node.setLeft(node) - dad_node.setHeight(dad_node.getHeight() + 1) - - if (dad_node.getRight().getHeight() - - dad_node.getLeft.getHeight() > 1): - self.rebalance(dad_node) - + node.height = dad_node.height + dad_node.height += 1 + if node.label < dad_node.label: + dad_node.left = node else: - dad_node.setRight(node) - dad_node.setHeight(dad_node.getHeight() + 1) - - if (dad_node.getRight().getHeight() - - dad_node.getLeft.getHeight() > 1): - self.rebalance(dad_node) + dad_node.right = node + self.rebalance(node) + self.size += 1 break def rebalance(self, node): - if (node.getRight().getHeight() - - node.getLeft.getHeight() > 1): - if (node.getRight().getHeight() > - node.getLeft.getHeight()): - pass - else: - pass - pass - elif (node.getRight().getHeight() - - node.getLeft.getHeight() > 2): - if (node.getRight().getHeight() > - node.getLeft.getHeight()): - pass - else: - pass - pass - pass + n = node + + while n is not None: + height_right = n.height + height_left = n.height + + if n.right is not None: + height_right = n.right.height + + if n.left is not None: + height_left = n.left.height + + if abs(height_left - height_right) > 1: + if height_left > height_right: + left_child = n.left + if left_child is not None: + h_right = (right_child.right.height + if (right_child.right is not None) else 0) + h_left = (right_child.left.height + if (right_child.left is not None) else 0) + if (h_left > h_right): + self.rotate_left(n) + break + else: + self.double_rotate_right(n) + break + else: + right_child = n.right + if right_child is not None: + h_right = (right_child.right.height + if (right_child.right is not None) else 0) + h_left = (right_child.left.height + if (right_child.left is not None) else 0) + if (h_left > h_right): + self.double_rotate_left(n) + break + else: + self.rotate_right(n) + break + n = n.parent def rotate_left(self, node): - # TODO: is this pythonic enought? - aux = node.getLabel() - node = aux.getRight() - node.setHeight(node.getHeight() - 1) - node.setLeft(Node(aux)) - node.getLeft().setHeight(node.getHeight() + 1) - node.getRight().setHeight(node.getRight().getHeight() - 1) + aux = node.parent.label + node.parent.label = node.label + node.parent.right = Node(aux) + node.parent.right.height = node.parent.height + 1 + node.parent.left = node.right + def rotate_right(self, node): - aux = node.getLabel() - node = aux.getLeft() - node.setHeight(node.getHeight() - 1) - node.setRight(Node(aux)) - node.getLeft().setHeight(node.getHeight() + 1) - node.getLeft().setHeight(node.getLeft().getHeight() - 1) + aux = node.parent.label + node.parent.label = node.label + node.parent.left = Node(aux) + node.parent.left.height = node.parent.height + 1 + node.parent.right = node.right def double_rotate_left(self, node): self.rotate_right(node.getRight().getRight()) @@ -129,3 +147,34 @@ class AVL: def double_rotate_right(self, node): self.rotate_left(node.getLeft().getLeft()) self.rotate_right(node) + + def empty(self): + if self.root is None: + return True + return False + + def preShow(self, curr_node): + if curr_node is not None: + self.preShow(curr_node.left) + print(curr_node.label, end=" ") + self.preShow(curr_node.right) + + def preorder(self, curr_node): + if curr_node is not None: + self.preShow(curr_node.left) + self.preShow(curr_node.right) + print(curr_node.label, end=" ") + + def getRoot(self): + return self.root + +t = AVL() +t.insert(1) +t.insert(2) +t.insert(3) +# t.preShow(t.root) +# print("\n") +# t.insert(4) +# t.insert(5) +# t.preShow(t.root) +# t.preorden(t.root) diff --git a/data_structures/Arrays b/data_structures/Arrays new file mode 100644 index 000000000..03eaefac1 --- /dev/null +++ b/data_structures/Arrays @@ -0,0 +1 @@ +Arrays implimentation using python programming. diff --git a/data_structures/Binary Tree/FenwickTree.py b/data_structures/Binary Tree/FenwickTree.py new file mode 100644 index 000000000..02e2d6151 --- /dev/null +++ b/data_structures/Binary Tree/FenwickTree.py @@ -0,0 +1,28 @@ +class FenwickTree: + + def __init__(self, SIZE): # create fenwick tree with size SIZE + self.Size = SIZE + self.ft = [0 for i in range (0,SIZE)] + + def update(self, i, val): # update data (adding) in index i in O(lg N) + while (i < self.Size): + self.ft[i] += val + i += i & (-i) + + def query(self, i): # query cumulative data from index 0 to i in O(lg N) + ret = 0 + while (i > 0): + ret += self.ft[i] + i -= i & (-i) + return ret + +if __name__ == '__main__': + f = FenwickTree(100) + f.update(1,20) + f.update(4,4) + print (f.query(1)) + print (f.query(3)) + print (f.query(4)) + f.update(2,-5) + print (f.query(1)) + print (f.query(3)) diff --git a/data_structures/Binary Tree/LazySegmentTree.py b/data_structures/Binary Tree/LazySegmentTree.py new file mode 100644 index 000000000..bbd880a06 --- /dev/null +++ b/data_structures/Binary Tree/LazySegmentTree.py @@ -0,0 +1,90 @@ +import math + +class SegmentTree: + + def __init__(self, N): + self.N = N + self.st = [0 for i in range(0,4*N)] # approximate the overall size of segment tree with array N + self.lazy = [0 for i in range(0,4*N)] # create array to store lazy update + self.flag = [0 for i in range(0,4*N)] # flag for lazy update + + def left(self, idx): + return idx*2 + + def right(self, idx): + return idx*2 + 1 + + def build(self, idx, l, r, A): + if l==r: + self.st[idx] = A[l-1] + else : + mid = (l+r)//2 + self.build(self.left(idx),l,mid, A) + self.build(self.right(idx),mid+1,r, A) + self.st[idx] = max(self.st[self.left(idx)] , self.st[self.right(idx)]) + + # update with O(lg N) (Normal segment tree without lazy update will take O(Nlg N) for each update) + def update(self, idx, l, r, a, b, val): # update(1, 1, N, a, b, v) for update val v to [a,b] + if self.flag[idx] == True: + self.st[idx] = self.lazy[idx] + self.flag[idx] = False + if l!=r: + self.lazy[self.left(idx)] = self.lazy[idx] + self.lazy[self.right(idx)] = self.lazy[idx] + self.flag[self.left(idx)] = True + self.flag[self.right(idx)] = True + + if r < a or l > b: + return True + if l >= a and r <= b : + self.st[idx] = val + if l!=r: + self.lazy[self.left(idx)] = val + self.lazy[self.right(idx)] = val + self.flag[self.left(idx)] = True + self.flag[self.right(idx)] = True + return True + mid = (l+r)//2 + self.update(self.left(idx),l,mid,a,b,val) + self.update(self.right(idx),mid+1,r,a,b,val) + self.st[idx] = max(self.st[self.left(idx)] , self.st[self.right(idx)]) + return True + + # query with O(lg N) + def query(self, idx, l, r, a, b): #query(1, 1, N, a, b) for query max of [a,b] + if self.flag[idx] == True: + self.st[idx] = self.lazy[idx] + self.flag[idx] = False + if l != r: + self.lazy[self.left(idx)] = self.lazy[idx] + self.lazy[self.right(idx)] = self.lazy[idx] + self.flag[self.left(idx)] = True + self.flag[self.right(idx)] = True + if r < a or l > b: + return -math.inf + if l >= a and r <= b: + return self.st[idx] + mid = (l+r)//2 + q1 = self.query(self.left(idx),l,mid,a,b) + q2 = self.query(self.right(idx),mid+1,r,a,b) + return max(q1,q2) + + def showData(self): + showList = [] + for i in range(1,N+1): + showList += [self.query(1, 1, self.N, i, i)] + print (showList) + + +if __name__ == '__main__': + A = [1,2,-4,7,3,-5,6,11,-20,9,14,15,5,2,-8] + N = 15 + segt = SegmentTree(N) + segt.build(1,1,N,A) + print (segt.query(1,1,N,4,6)) + print (segt.query(1,1,N,7,11)) + print (segt.query(1,1,N,7,12)) + segt.update(1,1,N,1,3,111) + print (segt.query(1,1,N,1,15)) + segt.update(1,1,N,7,8,235) + segt.showData() diff --git a/data_structures/Binary Tree/SegmentTree.py b/data_structures/Binary Tree/SegmentTree.py new file mode 100644 index 000000000..cf47ca7d7 --- /dev/null +++ b/data_structures/Binary Tree/SegmentTree.py @@ -0,0 +1,64 @@ +import math + +class SegmentTree: + + def __init__(self, N): + self.N = N + self.st = [0 for i in range(0,4*N)] # approximate the overall size of segment tree with array N + + def left(self, idx): + return idx*2 + + def right(self, idx): + return idx*2 + 1 + + def build(self, idx, l, r, A): + if l==r: + self.st[idx] = A[l-1] + else : + mid = (l+r)//2 + self.build(self.left(idx),l,mid, A) + self.build(self.right(idx),mid+1,r, A) + self.st[idx] = max(self.st[self.left(idx)] , self.st[self.right(idx)]) + + def update(self, idx, l, r, a, b, val): # update(1, 1, N, a, b, v) for update val v to [a,b] + if r < a or l > b: + return True + if l == r : + self.st[idx] = val + return True + mid = (l+r)//2 + self.update(self.left(idx),l,mid,a,b,val) + self.update(self.right(idx),mid+1,r,a,b,val) + self.st[idx] = max(self.st[self.left(idx)] , self.st[self.right(idx)]) + return True + + def query(self, idx, l, r, a, b): #query(1, 1, N, a, b) for query max of [a,b] + if r < a or l > b: + return -math.inf + if l >= a and r <= b: + return self.st[idx] + mid = (l+r)//2 + q1 = self.query(self.left(idx),l,mid,a,b) + q2 = self.query(self.right(idx),mid+1,r,a,b) + return max(q1,q2) + + def showData(self): + showList = [] + for i in range(1,N+1): + showList += [self.query(1, 1, self.N, i, i)] + print (showList) + + +if __name__ == '__main__': + A = [1,2,-4,7,3,-5,6,11,-20,9,14,15,5,2,-8] + N = 15 + segt = SegmentTree(N) + segt.build(1,1,N,A) + print (segt.query(1,1,N,4,6)) + print (segt.query(1,1,N,7,11)) + print (segt.query(1,1,N,7,12)) + segt.update(1,1,N,1,3,111) + print (segt.query(1,1,N,1,15)) + segt.update(1,1,N,7,8,235) + segt.showData() diff --git a/data_structures/Binary Tree/binary_seach_tree.py b/data_structures/Binary Tree/binary_seach_tree.py deleted file mode 100644 index 0b1726534..000000000 --- a/data_structures/Binary Tree/binary_seach_tree.py +++ /dev/null @@ -1,103 +0,0 @@ -''' -A binary search Tree -''' - - -class Node: - - def __init__(self, label): - self.label = label - self.left = None - self.right = None - - def getLabel(self): - return self.label - - def setLabel(self, label): - self.label = label - - def getLeft(self): - return self.left - - def setLeft(self, left): - self.left = left - - def getRight(self): - return self.right - - def setRight(self, right): - self.right = right - - -class BinarySearchTree: - - def __init__(self): - self.root = None - - def insert(self, label): - - # Create a new Node - - node = Node(label) - - if self.empty(): - self.root = node - else: - dad_node = None - curr_node = self.root - - while True: - if curr_node is not None: - - dad_node = curr_node - - if node.getLabel() < curr_node.getLabel(): - curr_node = curr_node.getLeft() - else: - curr_node = curr_node.getRight() - else: - if node.getLabel() < dad_node.getLabel(): - dad_node.setLeft(node) - else: - dad_node.setRight(node) - break - - def empty(self): - if self.root is None: - return True - return False - - def preShow(self, curr_node): - if curr_node is None: - print(curr_node.getLabel(), end=" ") - - self.preShow(curr_node.getLeft()) - self.preShow(curr_node.getRight()) - - def getRoot(self): - return self.root - - -''' -Example - 8 - / \ - 3 10 - / \ \ - 1 6 14 - / \ / - 4 7 13 -''' - -t = BinarySearchTree() -t.insert(8) -t.insert(3) -t.insert(1) -t.insert(6) -t.insert(4) -t.insert(7) -t.insert(10) -t.insert(14) -t.insert(13) - -t.preShow(t.getRoot()) diff --git a/data_structures/Binary Tree/binary_search_tree.py b/data_structures/Binary Tree/binary_search_tree.py new file mode 100644 index 000000000..5290f685b --- /dev/null +++ b/data_structures/Binary Tree/binary_search_tree.py @@ -0,0 +1,257 @@ +''' +A binary search Tree +''' +class Node: + + def __init__(self, label, parent): + self.label = label + self.left = None + self.right = None + #Added in order to delete a node easier + self.parent = parent + + def getLabel(self): + return self.label + + def setLabel(self, label): + self.label = label + + def getLeft(self): + return self.left + + def setLeft(self, left): + self.left = left + + def getRight(self): + return self.right + + def setRight(self, right): + self.right = right + + def getParent(self): + return self.parent + + def setParent(self, parent): + self.parent = parent + +class BinarySearchTree: + + def __init__(self): + self.root = None + + def insert(self, label): + # Create a new Node + new_node = Node(label, None) + # If Tree is empty + if self.empty(): + self.root = new_node + else: + #If Tree is not empty + curr_node = self.root + #While we don't get to a leaf + while curr_node is not None: + #We keep reference of the parent node + parent_node = curr_node + #If node label is less than current node + if new_node.getLabel() < curr_node.getLabel(): + #We go left + curr_node = curr_node.getLeft() + else: + #Else we go right + curr_node = curr_node.getRight() + #We insert the new node in a leaf + if new_node.getLabel() < parent_node.getLabel(): + parent_node.setLeft(new_node) + else: + parent_node.setRight(new_node) + #Set parent to the new node + new_node.setParent(parent_node) + + def delete(self, label): + if (not self.empty()): + #Look for the node with that label + node = self.getNode(label) + #If the node exists + if(node is not None): + #If it has no children + if(node.getLeft() is None and node.getRight() is None): + self.__reassignNodes(node, None) + node = None + #Has only right children + elif(node.getLeft() is None and node.getRight() is not None): + self.__reassignNodes(node, node.getRight()) + #Has only left children + elif(node.getLeft() is not None and node.getRight() is None): + self.__reassignNodes(node, node.getLeft()) + #Has two children + else: + #Gets the max value of the left branch + tmpNode = self.getMax(node.getLeft()) + #Deletes the tmpNode + self.delete(tmpNode.getLabel()) + #Assigns the value to the node to delete and keesp tree structure + node.setLabel(tmpNode.getLabel()) + + def getNode(self, label): + curr_node = None + #If the tree is not empty + if(not self.empty()): + #Get tree root + curr_node = self.getRoot() + #While we don't find the node we look for + #I am using lazy evaluation here to avoid NoneType Attribute error + while curr_node is not None and curr_node.getLabel() is not label: + #If node label is less than current node + if label < curr_node.getLabel(): + #We go left + curr_node = curr_node.getLeft() + else: + #Else we go right + curr_node = curr_node.getRight() + return curr_node + + def getMax(self, root = None): + if(root is not None): + curr_node = root + else: + #We go deep on the right branch + curr_node = self.getRoot() + if(not self.empty()): + while(curr_node.getRight() is not None): + curr_node = curr_node.getRight() + return curr_node + + def getMin(self, root = None): + if(root is not None): + curr_node = root + else: + #We go deep on the left branch + curr_node = self.getRoot() + if(not self.empty()): + curr_node = self.getRoot() + while(curr_node.getLeft() is not None): + curr_node = curr_node.getLeft() + return curr_node + + def empty(self): + if self.root is None: + return True + return False + + def __InOrderTraversal(self, curr_node): + nodeList = [] + if curr_node is not None: + nodeList.insert(0, curr_node) + nodeList = nodeList + self.__InOrderTraversal(curr_node.getLeft()) + nodeList = nodeList + self.__InOrderTraversal(curr_node.getRight()) + return nodeList + + def getRoot(self): + return self.root + + def __isRightChildren(self, node): + if(node == node.getParent().getRight()): + return True + return False + + def __reassignNodes(self, node, newChildren): + if(newChildren is not None): + newChildren.setParent(node.getParent()) + if(node.getParent() is not None): + #If it is the Right Children + if(self.__isRightChildren(node)): + node.getParent().setRight(newChildren) + else: + #Else it is the left children + node.getParent().setLeft(newChildren) + + #This function traversal the tree. By default it returns an + #In order traversal list. You can pass a function to traversal + #The tree as needed by client code + def traversalTree(self, traversalFunction = None, root = None): + if(traversalFunction is None): + #Returns a list of nodes in preOrder by default + return self.__InOrderTraversal(self.root) + else: + #Returns a list of nodes in the order that the users wants to + return traversalFunction(self.root) + + #Returns an string of all the nodes labels in the list + #In Order Traversal + def __str__(self): + list = self.__InOrderTraversal(self.root) + str = "" + for x in list: + str = str + " " + x.getLabel().__str__() + return str + +def InPreOrder(curr_node): + nodeList = [] + if curr_node is not None: + nodeList = nodeList + InPreOrder(curr_node.getLeft()) + nodeList.insert(0, curr_node.getLabel()) + nodeList = nodeList + InPreOrder(curr_node.getRight()) + return nodeList + +def testBinarySearchTree(): + ''' + Example + 8 + / \ + 3 10 + / \ \ + 1 6 14 + / \ / + 4 7 13 + ''' + + ''' + Example After Deletion + 7 + / \ + 1 4 + + ''' + t = BinarySearchTree() + t.insert(8) + t.insert(3) + t.insert(6) + t.insert(1) + t.insert(10) + t.insert(14) + t.insert(13) + t.insert(4) + t.insert(7) + + #Prints all the elements of the list in order traversal + print(t.__str__()) + + if(t.getNode(6) is not None): + print("The label 6 exists") + else: + print("The label 6 doesn't exist") + + if(t.getNode(-1) is not None): + print("The label -1 exists") + else: + print("The label -1 doesn't exist") + + if(not t.empty()): + print("Max Value: ", t.getMax().getLabel()) + print("Min Value: ", t.getMin().getLabel()) + + t.delete(13) + t.delete(10) + t.delete(8) + t.delete(3) + t.delete(6) + t.delete(14) + + #Gets all the elements of the tree In pre order + #And it prints them + list = t.traversalTree(InPreOrder, t.root) + for x in list: + print(x) + +if __name__ == "__main__": + testBinarySearchTree() diff --git a/data_structures/Graph/BreadthFirstSearch.py b/data_structures/Graph/BreadthFirstSearch.py new file mode 100644 index 000000000..16b1b2007 --- /dev/null +++ b/data_structures/Graph/BreadthFirstSearch.py @@ -0,0 +1,61 @@ +# Author: OMKAR PATHAK + +class Graph(): + def __init__(self): + self.vertex = {} + + # for printing the Graph vertexes + def printGraph(self): + for i in self.vertex.keys(): + print(i,' -> ', ' -> '.join([str(j) for j in self.vertex[i]])) + + # for adding the edge beween two vertexes + def addEdge(self, fromVertex, toVertex): + # check if vertex is already present, + if fromVertex in self.vertex.keys(): + self.vertex[fromVertex].append(toVertex) + else: + # else make a new vertex + self.vertex[fromVertex] = [toVertex] + + def BFS(self, startVertex): + # Take a list for stoting already visited vertexes + visited = [False] * len(self.vertex) + + # create a list to store all the vertexes for BFS + queue = [] + + # mark the source node as visited and enqueue it + visited[startVertex] = True + queue.append(startVertex) + + while queue: + startVertex = queue.pop(0) + print(startVertex, end = ' ') + + # mark all adjacent nodes as visited and print them + for i in self.vertex[startVertex]: + if visited[i] == False: + queue.append(i) + visited[i] = True + +if __name__ == '__main__': + g = Graph() + g.addEdge(0, 1) + g.addEdge(0, 2) + g.addEdge(1, 2) + g.addEdge(2, 0) + g.addEdge(2, 3) + g.addEdge(3, 3) + + g.printGraph() + print('BFS:') + g.BFS(2) + + # OUTPUT: + # 0  ->  1 -> 2 + # 1  ->  2 + # 2  ->  0 -> 3 + # 3  ->  3 + # BFS: + # 2 0 3 1 diff --git a/data_structures/Graph/Breadth_First_Search.py b/data_structures/Graph/Breadth_First_Search.py deleted file mode 100644 index 92a6e819b..000000000 --- a/data_structures/Graph/Breadth_First_Search.py +++ /dev/null @@ -1,72 +0,0 @@ -class GRAPH: - """docstring for GRAPH""" - def __init__(self, nodes): - self.nodes = nodes - self.graph = [[0]*nodes for i in range (nodes)] - self.visited = [0]*nodes - - - def show(self): - - for i in self.graph: - for j in i: - print(j, end=' ') - print(' ') - def bfs(self,v): - - visited = [False]*self.vertex - visited[v - 1] = True - print('%d visited' % (v)) - - queue = [v - 1] - while len(queue) > 0: - v = queue[0] - for u in range(self.vertex): - if self.graph[v][u] == 1: - if visited[u] is False: - visited[u] = True - queue.append(u) - print('%d visited' % (u +1)) - queue.pop(0) - -g = Graph(10) - -g.add_edge(1,2) -g.add_edge(1,3) -g.add_edge(1,4) -g.add_edge(2,5) -g.add_edge(3,6) -g.add_edge(3,7) -g.add_edge(4,8) -g.add_edge(5,9) -g.add_edge(6,10) -g.bfs(4) - -print(self.graph) - - def add_edge(self, i, j): - self.graph[i][j]=1 - self.graph[j][i]=1 - - def bfs(self, s): - queue = [s] - self.visited[s] = 1 - while len(queue)!= 0: - x = queue.pop(0) - print(x) - for i in range(0, self.nodes): - if self.graph[x][i] == 1 and self.visited[i] == 0: - queue.append(i) - self.visited[i] = 1 - -n = int(input("Enter the number of Nodes : ")) -g = GRAPH(n) -e = int(input("Enter the no of edges : ")) -print("Enter the edges (u v)") - -for i in range(0, e): - u ,v = map(int, raw_input().split()) - g.add_edge(u, v) - -s = int(input("Enter the source node :")) -g.bfs(s) diff --git a/data_structures/Graph/Deep_First_Search.py b/data_structures/Graph/Deep_First_Search.py deleted file mode 100644 index 656ddfbaf..000000000 --- a/data_structures/Graph/Deep_First_Search.py +++ /dev/null @@ -1,32 +0,0 @@ -class GRAPH: - """docstring for GRAPH""" - def __init__(self, nodes): - self.nodes=nodes - self.graph=[[0]*nodes for i in range (nodes)] - self.visited=[0]*nodes - - - def show(self): - print self.graph - - def add_edge(self, i, j): - self.graph[i][j]=1 - self.graph[j][i]=1 - - def dfs(self,s): - self.visited[s]=1 - print(s) - for i in range(0,self.nodes): - if self.visited[i]==0 and self.graph[s][i]==1: - self.dfs(i) - - -n=int(input("Enter the number of Nodes : ")) -g=GRAPH(n) -e=int(input("Enter the no of edges : ")) -print("Enter the edges (u v)") -for i in range(0,e): - u,v=map(int, raw_input().split()) - g.add_edge(u,v) -s=int(input("Enter the source node :")) -g.dfs(s) diff --git a/data_structures/Graph/DepthFirstSearch.py b/data_structures/Graph/DepthFirstSearch.py new file mode 100644 index 000000000..94ef3cb86 --- /dev/null +++ b/data_structures/Graph/DepthFirstSearch.py @@ -0,0 +1,61 @@ +# Author: OMKAR PATHAK + +class Graph(): + def __init__(self): + self.vertex = {} + + # for printing the Graph vertexes + def printGraph(self): + print(self.vertex) + for i in self.vertex.keys(): + print(i,' -> ', ' -> '.join([str(j) for j in self.vertex[i]])) + + # for adding the edge beween two vertexes + def addEdge(self, fromVertex, toVertex): + # check if vertex is already present, + if fromVertex in self.vertex.keys(): + self.vertex[fromVertex].append(toVertex) + else: + # else make a new vertex + self.vertex[fromVertex] = [toVertex] + + def DFS(self): + # visited array for storing already visited nodes + visited = [False] * len(self.vertex) + + # call the recursive helper function + for i in range(len(self.vertex)): + if visited[i] == False: + self.DFSRec(i, visited) + + def DFSRec(self, startVertex, visited): + # mark start vertex as visited + visited[startVertex] = True + + print(startVertex, end = ' ') + + # Recur for all the vertexes that are adjacent to this node + for i in self.vertex.keys(): + if visited[i] == False: + self.DFSRec(i, visited) + +if __name__ == '__main__': + g = Graph() + g.addEdge(0, 1) + g.addEdge(0, 2) + g.addEdge(1, 2) + g.addEdge(2, 0) + g.addEdge(2, 3) + g.addEdge(3, 3) + + g.printGraph() + print('DFS:') + g.DFS() + + # OUTPUT: + # 0  ->  1 -> 2 + # 1  ->  2 + # 2  ->  0 -> 3 + # 3  ->  3 + # DFS: + # 0 1 2 3 diff --git a/data_structures/Graph/dijkstra_algorithm.py b/data_structures/Graph/dijkstra_algorithm.py new file mode 100644 index 000000000..c43ff37f5 --- /dev/null +++ b/data_structures/Graph/dijkstra_algorithm.py @@ -0,0 +1,211 @@ +# Title: Dijkstra's Algorithm for finding single source shortest path from scratch +# Author: Shubham Malik +# References: https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm + +import math +import sys +# For storing the vertex set to retreive node with the lowest distance + + +class PriorityQueue: + # Based on Min Heap + def __init__(self): + self.cur_size = 0 + self.array = [] + self.pos = {} # To store the pos of node in array + + def isEmpty(self): + return self.cur_size == 0 + + def min_heapify(self, idx): + lc = self.left(idx) + rc = self.right(idx) + if lc < self.cur_size and self.array(lc)[0] < self.array(idx)[0]: + smallest = lc + else: + smallest = idx + if rc < self.cur_size and self.array(rc)[0] < self.array(smallest)[0]: + smallest = rc + if smallest != idx: + self.swap(idx, smallest) + self.min_heapify(smallest) + + def insert(self, tup): + # Inserts a node into the Priority Queue + self.pos[tup[1]] = self.cur_size + self.cur_size += 1 + self.array.append((sys.maxsize, tup[1])) + self.decrease_key((sys.maxsize, tup[1]), tup[0]) + + def extract_min(self): + # Removes and returns the min element at top of priority queue + min_node = self.array[0][1] + self.array[0] = self.array[self.cur_size - 1] + self.cur_size -= 1 + self.min_heapify(1) + del self.pos[min_node] + return min_node + + def left(self, i): + # returns the index of left child + return 2 * i + 1 + + def right(self, i): + # returns the index of right child + return 2 * i + 2 + + def par(self, i): + # returns the index of parent + return math.floor(i / 2) + + def swap(self, i, j): + # swaps array elements at indices i and j + # update the pos{} + self.pos[self.array[i][1]] = j + self.pos[self.array[j][1]] = i + temp = self.array[i] + self.array[i] = self.array[j] + self.array[j] = temp + + def decrease_key(self, tup, new_d): + idx = self.pos[tup[1]] + # assuming the new_d is atmost old_d + self.array[idx] = (new_d, tup[1]) + while idx > 0 and self.array[self.par(idx)][0] > self.array[idx][0]: + self.swap(idx, self.par(idx)) + idx = self.par(idx) + + +class Graph: + def __init__(self, num): + self.adjList = {} # To store graph: u -> (v,w) + self.num_nodes = num # Number of nodes in graph + # To store the distance from source vertex + self.dist = [0] * self.num_nodes + self.par = [-1] * self.num_nodes # To store the path + + def add_edge(self, u, v, w): + # Edge going from node u to v and v to u with weight w + # u (w)-> v, v (w) -> u + # Check if u already in graph + if u in self.adjList.keys(): + self.adjList[u].append((v, w)) + else: + self.adjList[u] = [(v, w)] + + # Assuming undirected graph + if v in self.adjList.keys(): + self.adjList[v].append((u, w)) + else: + self.adjList[v] = [(u, w)] + + def show_graph(self): + # u -> v(w) + for u in self.adjList: + print(u, '->', ' -> '.join(str("{}({})".format(v, w)) + for v, w in self.adjList[u])) + + def dijkstra(self, src): + # Flush old junk values in par[] + self.par = [-1] * self.num_nodes + # src is the source node + self.dist[src] = 0 + Q = PriorityQueue() + Q.insert((0, src)) # (dist from src, node) + for u in self.adjList.keys(): + if u != src: + self.dist[u] = sys.maxsize # Infinity + self.par[u] = -1 + + while not Q.isEmpty(): + u = Q.extract_min() # Returns node with the min dist from source + # Update the distance of all the neighbours of u and + # if their prev dist was INFINITY then push them in Q + for v, w in self.adjList[u]: + new_dist = self.dist[u] + w + if self.dist[v] > new_dist: + if self.dist[v] == sys.maxsize: + Q.insert((new_dist, v)) + else: + Q.decrease_key((self.dist[v], v), new_dist) + self.dist[v] = new_dist + self.par[v] = u + + # Show the shortest distances from src + self.show_distances(src) + + def show_distances(self, src): + print("Distance from node: {}".format(src)) + for u in range(self.num_nodes): + print('Node {} has distance: {}'.format(u, self.dist[u])) + + def show_path(self, src, dest): + # To show the shortest path from src to dest + # WARNING: Use it *after* calling dijkstra + path = [] + cost = 0 + temp = dest + # Backtracking from dest to src + while self.par[temp] != -1: + path.append(temp) + if temp != src: + for v, w in self.adjList[temp]: + if v == self.par[temp]: + cost += w + break + temp = self.par[temp] + path.append(src) + path.reverse() + + print('----Path to reach {} from {}----'.format(dest, src)) + for u in path: + print('{}'.format(u), end=' ') + if u != dest: + print('-> ', end='') + + print('\nTotal cost of path: ', cost) + + +if __name__ == '__main__': + graph = Graph(9) + graph.add_edge(0, 1, 4) + graph.add_edge(0, 7, 8) + graph.add_edge(1, 2, 8) + graph.add_edge(1, 7, 11) + graph.add_edge(2, 3, 7) + graph.add_edge(2, 8, 2) + graph.add_edge(2, 5, 4) + graph.add_edge(3, 4, 9) + graph.add_edge(3, 5, 14) + graph.add_edge(4, 5, 10) + graph.add_edge(5, 6, 2) + graph.add_edge(6, 7, 1) + graph.add_edge(6, 8, 6) + graph.add_edge(7, 8, 7) + graph.show_graph() + graph.dijkstra(0) + graph.show_path(0, 4) + +# OUTPUT +# 0 -> 1(4) -> 7(8) +# 1 -> 0(4) -> 2(8) -> 7(11) +# 7 -> 0(8) -> 1(11) -> 6(1) -> 8(7) +# 2 -> 1(8) -> 3(7) -> 8(2) -> 5(4) +# 3 -> 2(7) -> 4(9) -> 5(14) +# 8 -> 2(2) -> 6(6) -> 7(7) +# 5 -> 2(4) -> 3(14) -> 4(10) -> 6(2) +# 4 -> 3(9) -> 5(10) +# 6 -> 5(2) -> 7(1) -> 8(6) +# Distance from node: 0 +# Node 0 has distance: 0 +# Node 1 has distance: 4 +# Node 2 has distance: 12 +# Node 3 has distance: 19 +# Node 4 has distance: 21 +# Node 5 has distance: 11 +# Node 6 has distance: 9 +# Node 7 has distance: 8 +# Node 8 has distance: 14 +# ----Path to reach 4 from 0---- +# 0 -> 7 -> 6 -> 5 -> 4 +# Total cost of path: 21 diff --git a/data_structures/LinkedList/singly_LinkedList.py b/data_structures/LinkedList/singly_LinkedList.py index 941e8a0fa..8828ce6cc 100644 --- a/data_structures/LinkedList/singly_LinkedList.py +++ b/data_structures/LinkedList/singly_LinkedList.py @@ -3,22 +3,15 @@ class Node:#create a Node self.data=data#given data self.next=None#given next to None class Linked_List: + pass - def insert_tail(Head,data):#insert the data at tail - tamp=Head#create a tamp as a head - if(tamp==None):#if linkedlist is empty - newNod=Node()#create newNode Node type and given data and next - newNod.data=data - newNod.next=None - Head=newNod + + def insert_tail(Head,data): + if(Head.next is None): + Head.next = Node(data) else: - while tamp.next!=None:#find the last Node - tamp=tamp.next - newNod = Node()#create a new node - newNod.data = data - newNod.next = None - tamp.next=newNod#put the newnode into last node - return Head#return first node of linked list + insert_tail(Head.next, data) + def insert_head(Head,data): tamp = Head if (tamp == None): @@ -32,16 +25,18 @@ class Linked_List: newNod.next = Head#put the Head at NewNode Next Head=newNod#make a NewNode to Head return Head - def Print(Head):#print every node data - tamp=Node() + + def printList(Head):#print every node data tamp=Head while tamp!=None: print(tamp.data) tamp=tamp.next + def delete_head(Head):#delete from head if Head!=None: Head=Head.next return Head#return new Head + def delete_tail(Head):#delete from tail if Head!=None: tamp = Node() @@ -50,12 +45,22 @@ class Linked_List: tamp = tamp.next tamp.next=None#delete the last element by give next None to 2nd last Element return Head + def isEmpty(Head): - if(Head==None):#check Head is None or Not - return True#return Ture if list is empty - else: - return False#check False if it's not empty - - - - + return Head is None #Return if Head is none + + def reverse(Head): + prev = None + current = Head + + while(current): + # Store the current node's next node. + next_node = current.next + # Make the current node's next point backwards + current.next = prev + # Make the previous node be the current node + prev = current + # Make the current node the next node (to progress iteration) + current = next_node + # Return prev in order to put the head at the end + Head = prev diff --git a/data_structures/Queue/DeQueue.py b/data_structures/Queue/DeQueue.py new file mode 100644 index 000000000..175c88163 --- /dev/null +++ b/data_structures/Queue/DeQueue.py @@ -0,0 +1,39 @@ +# Python code to demonstrate working of +# extend(), extendleft(), rotate(), reverse() + +# importing "collections" for deque operations +import collections + +# initializing deque +de = collections.deque([1, 2, 3,]) + +# using extend() to add numbers to right end +# adds 4,5,6 to right end +de.extend([4,5,6]) + +# printing modified deque +print ("The deque after extending deque at end is : ") +print (de) + +# using extendleft() to add numbers to left end +# adds 7,8,9 to right end +de.extendleft([7,8,9]) + +# printing modified deque +print ("The deque after extending deque at beginning is : ") +print (de) + +# using rotate() to rotate the deque +# rotates by 3 to left +de.rotate(-3) + +# printing modified deque +print ("The deque after rotating deque is : ") +print (de) + +# using reverse() to reverse the deque +de.reverse() + +# printing modified deque +print ("The deque after reversing deque is : ") +print (de) diff --git a/data_structures/Stacks/Balanced_Parentheses.py b/data_structures/Stacks/Balanced_Parentheses.py deleted file mode 100644 index 6b7740380..000000000 --- a/data_structures/Stacks/Balanced_Parentheses.py +++ /dev/null @@ -1,27 +0,0 @@ -# Author: OMKAR PATHAK - -import Stack - -def parseParenthesis(string): - balanced = 1 - index = 0 - myStack = Stack.Stack(len(string)) - while (index < len(string)) and (balanced == 1): - check = string[index] - if check == '(': - myStack.push(check) - else: - if myStack.isEmpty(): - balanced = 0 - else: - myStack.pop() - index += 1 - - if balanced == 1 and myStack.isEmpty(): - return True - else: - return False - -if __name__ == '__main__': - print(parseParenthesis('((()))')) # True - print(parseParenthesis('((())')) # False diff --git a/data_structures/Stacks/Infix_To_Postfix_Conversion.py b/data_structures/Stacks/Infix_To_Postfix_Conversion.py deleted file mode 100644 index e33926a3d..000000000 --- a/data_structures/Stacks/Infix_To_Postfix_Conversion.py +++ /dev/null @@ -1,48 +0,0 @@ -# Author: OMKAR PATHAK - -import Stack - -def isOperand(char): - return (ord(char) >= ord('a') and ord(char) <= ord('z')) or (ord(char) >= ord('A') and ord(char) <= ord('Z')) - -def precedence(char): - if char == '+' or char == '-': - return 1 - elif char == '*' or char == '/': - return 2 - elif char == '^': - return 3 - else: - return -1 - -def infixToPostfix(myExp, myStack): - postFix = [] - for i in range(len(myExp)): - if (isOperand(myExp[i])): - postFix.append(myExp[i]) - elif(myExp[i] == '('): - myStack.push(myExp[i]) - elif(myExp[i] == ')'): - topOperator = myStack.pop() - while(not myStack.isEmpty() and topOperator != '('): - postFix.append(topOperator) - topOperator = myStack.pop() - else: - while (not myStack.isEmpty()) and (precedence(myExp[i]) <= precedence(myStack.peek())): - postFix.append(myStack.pop()) - myStack.push(myExp[i]) - - while(not myStack.isEmpty()): - postFix.append(myStack.pop()) - return ' '.join(postFix) - -if __name__ == '__main__': - myExp = 'a+b*(c^d-e)^(f+g*h)-i' - myExp = [i for i in myExp] - print('Infix:',' '.join(myExp)) - myStack = Stack.Stack(len(myExp)) - print('Postfix:',infixToPostfix(myExp, myStack)) - - # OUTPUT: - # Infix: a + b * ( c ^ d - e ) ^ ( f + g * h ) - i - # Postfix: a b c d ^ e - f g h * + ^ * + i - diff --git a/data_structures/Stacks/Stack.py b/data_structures/Stacks/Stack.py deleted file mode 100644 index 41bbdc9d2..000000000 --- a/data_structures/Stacks/Stack.py +++ /dev/null @@ -1,50 +0,0 @@ -# Author: OMKAR PATHAK - -class Stack(object): - def __init__(self, limit = 10): - self.stack = [] - self.limit = limit - - # for printing the stack contents - def __str__(self): - return ' '.join([str(i) for i in self.stack]) - - # for pushing an element on to the stack - def push(self, data): - if len(self.stack) >= self.limit: - print('Stack Overflow') - else: - self.stack.append(data) - - # for popping the uppermost element - def pop(self): - if len(self.stack) <= 0: - return -1 - else: - return self.stack.pop() - - # for peeking the top-most element of the stack - def peek(self): - if len(self.stack) <= 0: - return -1 - else: - return self.stack[len(self.stack) - 1] - - # to check if stack is empty - def isEmpty(self): - return self.stack == [] - - # for checking the size of stack - def size(self): - return len(self.stack) - -if __name__ == '__main__': - myStack = Stack() - for i in range(10): - myStack.push(i) - print(myStack) - myStack.pop() # popping the top element - print(myStack) - myStack.peek() # printing the top element - myStack.isEmpty() - myStack.size() diff --git a/data_structures/Stacks/balanced_parentheses.py b/data_structures/Stacks/balanced_parentheses.py new file mode 100644 index 000000000..1c9a84843 --- /dev/null +++ b/data_structures/Stacks/balanced_parentheses.py @@ -0,0 +1,21 @@ +from Stack import Stack + +__author__ = 'Omkar Pathak' + + +def balanced_parentheses(parentheses): + """ Use a stack to check if a string of parentheses are balanced.""" + stack = Stack(len(parentheses)) + for parenthesis in parentheses: + if parenthesis == '(': + stack.push(parenthesis) + elif parenthesis == ')': + stack.pop() + return not stack.is_empty() + + +if __name__ == '__main__': + examples = ['((()))', '((())'] + print('Balanced parentheses demonstration:\n') + for example in examples: + print(example + ': ' + str(balanced_parentheses(example))) diff --git a/data_structures/Stacks/infix_to_postfix_conversion.py b/data_structures/Stacks/infix_to_postfix_conversion.py new file mode 100644 index 000000000..f0a8fd072 --- /dev/null +++ b/data_structures/Stacks/infix_to_postfix_conversion.py @@ -0,0 +1,62 @@ +import string + +from Stack import Stack + +__author__ = 'Omkar Pathak' + + +def is_operand(char): + return char in string.ascii_letters or char in string.digits + + +def precedence(char): + """ Return integer value representing an operator's precedence, or + order of operation. + + https://en.wikipedia.org/wiki/Order_of_operations + """ + dictionary = {'+': 1, '-': 1, + '*': 2, '/': 2, + '^': 3} + return dictionary.get(char, -1) + + +def infix_to_postfix(expression): + """ Convert infix notation to postfix notation using the Shunting-yard + algorithm. + + https://en.wikipedia.org/wiki/Shunting-yard_algorithm + https://en.wikipedia.org/wiki/Infix_notation + https://en.wikipedia.org/wiki/Reverse_Polish_notation + """ + stack = Stack(len(expression)) + postfix = [] + for char in expression: + if is_operand(char): + postfix.append(char) + elif char not in {'(', ')'}: + while (not stack.is_empty() + and precedence(char) <= precedence(stack.peek())): + postfix.append(stack.pop()) + stack.push(char) + elif char == '(': + stack.push(char) + elif char == ')': + while not stack.is_empty() and stack.peek() != '(': + postfix.append(stack.pop()) + # Pop '(' from stack. If there is no '(', there is a mismatched + # parentheses. + if stack.peek() != '(': + raise ValueError('Mismatched parentheses') + stack.pop() + while not stack.is_empty(): + postfix.append(stack.pop()) + return ' '.join(postfix) + + +if __name__ == '__main__': + expression = 'a+b*(c^d-e)^(f+g*h)-i' + + print('Infix to Postfix Notation demonstration:\n') + print('Infix notation: ' + expression) + print('Postfix notation: ' + infix_to_postfix(expression)) diff --git a/data_structures/Stacks/next.py b/data_structures/Stacks/next.py new file mode 100644 index 000000000..9765900c0 --- /dev/null +++ b/data_structures/Stacks/next.py @@ -0,0 +1,16 @@ +# Function to print element and NGE pair for all elements of list +def printNGE(arr): + + for i in range(0, len(arr), 1): + + next = -1 + for j in range(i+1, len(arr), 1): + if arr[i] < arr[j]: + next = arr[j] + break + + print(str(arr[i]) + " -- " + str(next)) + +# Driver program to test above function +arr = [11,13,21,3] +printNGE(arr) diff --git a/data_structures/Stacks/stack.py b/data_structures/Stacks/stack.py new file mode 100644 index 000000000..0b100abf3 --- /dev/null +++ b/data_structures/Stacks/stack.py @@ -0,0 +1,68 @@ +__author__ = 'Omkar Pathak' + + +class Stack(object): + """ A stack is an abstract data type that serves as a collection of + elements with two principal operations: push() and pop(). push() adds an + element to the top of the stack, and pop() removes an element from the top + of a stack. The order in which elements come off of a stack are + Last In, First Out (LIFO). + + https://en.wikipedia.org/wiki/Stack_(abstract_data_type) + """ + + def __init__(self, limit=10): + self.stack = [] + self.limit = limit + + def __bool__(self): + return not bool(self.stack) + + def __str__(self): + return str(self.stack) + + def push(self, data): + """ Push an element to the top of the stack.""" + if len(self.stack) >= self.limit: + raise StackOverflowError + self.stack.append(data) + + def pop(self): + """ Pop an element off of the top of the stack.""" + if self.stack: + return self.stack.pop() + else: + raise IndexError('pop from an empty stack') + + def peek(self): + """ Peek at the top-most element of the stack.""" + if self.stack: + return self.stack[-1] + + def is_empty(self): + """ Check if a stack is empty.""" + return not bool(self.stack) + + def size(self): + """ Return the size of the stack.""" + return len(self.stack) + + +class StackOverflowError(BaseException): + pass + + +if __name__ == '__main__': + stack = Stack() + for i in range(10): + stack.push(i) + + print('Stack demonstration:\n') + print('Initial stack: ' + str(stack)) + print('pop(): ' + str(stack.pop())) + print('After pop(), the stack is now: ' + str(stack)) + print('peek(): ' + str(stack.peek())) + stack.push(100) + print('After push(100), the stack is now: ' + str(stack)) + print('is_empty(): ' + str(stack.is_empty())) + print('size(): ' + str(stack.size())) diff --git a/data_structures/UnionFind/__init__.py b/data_structures/UnionFind/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/data_structures/UnionFind/tests_union_find.py b/data_structures/UnionFind/tests_union_find.py new file mode 100644 index 000000000..bdcc01033 --- /dev/null +++ b/data_structures/UnionFind/tests_union_find.py @@ -0,0 +1,77 @@ +from union_find import UnionFind +import unittest + + +class TestUnionFind(unittest.TestCase): + def test_init_with_valid_size(self): + uf = UnionFind(5) + self.assertEqual(uf.size, 5) + + def test_init_with_invalid_size(self): + with self.assertRaises(ValueError): + uf = UnionFind(0) + + with self.assertRaises(ValueError): + uf = UnionFind(-5) + + def test_union_with_valid_values(self): + uf = UnionFind(10) + + for i in range(11): + for j in range(11): + uf.union(i, j) + + def test_union_with_invalid_values(self): + uf = UnionFind(10) + + with self.assertRaises(ValueError): + uf.union(-1, 1) + + with self.assertRaises(ValueError): + uf.union(11, 1) + + def test_same_set_with_valid_values(self): + uf = UnionFind(10) + + for i in range(11): + for j in range(11): + if i == j: + self.assertTrue(uf.same_set(i, j)) + else: + self.assertFalse(uf.same_set(i, j)) + + uf.union(1, 2) + self.assertTrue(uf.same_set(1, 2)) + + uf.union(3, 4) + self.assertTrue(uf.same_set(3, 4)) + + self.assertFalse(uf.same_set(1, 3)) + self.assertFalse(uf.same_set(1, 4)) + self.assertFalse(uf.same_set(2, 3)) + self.assertFalse(uf.same_set(2, 4)) + + uf.union(1, 3) + self.assertTrue(uf.same_set(1, 3)) + self.assertTrue(uf.same_set(1, 4)) + self.assertTrue(uf.same_set(2, 3)) + self.assertTrue(uf.same_set(2, 4)) + + uf.union(4, 10) + self.assertTrue(uf.same_set(1, 10)) + self.assertTrue(uf.same_set(2, 10)) + self.assertTrue(uf.same_set(3, 10)) + self.assertTrue(uf.same_set(4, 10)) + + def test_same_set_with_invalid_values(self): + uf = UnionFind(10) + + with self.assertRaises(ValueError): + uf.same_set(-1, 1) + + with self.assertRaises(ValueError): + uf.same_set(11, 0) + + +if __name__ == '__main__': + unittest.main() diff --git a/data_structures/UnionFind/union_find.py b/data_structures/UnionFind/union_find.py new file mode 100644 index 000000000..40eea67ac --- /dev/null +++ b/data_structures/UnionFind/union_find.py @@ -0,0 +1,87 @@ +class UnionFind(): + """ + https://en.wikipedia.org/wiki/Disjoint-set_data_structure + + The union-find is a disjoint-set data structure + + You can merge two sets and tell if one set belongs to + another one. + + It's used on the Kruskal Algorithm + (https://en.wikipedia.org/wiki/Kruskal%27s_algorithm) + + The elements are in range [0, size] + """ + def __init__(self, size): + if size <= 0: + raise ValueError("size should be greater than 0") + + self.size = size + + # The below plus 1 is because we are using elements + # in range [0, size]. It makes more sense. + + # Every set begins with only itself + self.root = [i for i in range(size+1)] + + # This is used for heuristic union by rank + self.weight = [0 for i in range(size+1)] + + def union(self, u, v): + """ + Union of the sets u and v. + Complexity: log(n). + Amortized complexity: < 5 (it's very fast). + """ + + self._validate_element_range(u, "u") + self._validate_element_range(v, "v") + + if u == v: + return + + # Using union by rank will guarantee the + # log(n) complexity + rootu = self._root(u) + rootv = self._root(v) + weight_u = self.weight[rootu] + weight_v = self.weight[rootv] + if weight_u >= weight_v: + self.root[rootv] = rootu + if weight_u == weight_v: + self.weight[rootu] += 1 + else: + self.root[rootu] = rootv + + def same_set(self, u, v): + """ + Return true if the elements u and v belongs to + the same set + """ + + self._validate_element_range(u, "u") + self._validate_element_range(v, "v") + + return self._root(u) == self._root(v) + + def _root(self, u): + """ + Get the element set root. + This uses the heuristic path compression + See wikipedia article for more details. + """ + + if u != self.root[u]: + self.root[u] = self._root(self.root[u]) + + return self.root[u] + + def _validate_element_range(self, u, element_name): + """ + Raises ValueError if element is not in range + """ + if u < 0 or u > self.size: + msg = ("element {0} with value {1} " + "should be in range [0~{2}]")\ + .format(element_name, u, self.size) + raise ValueError(msg) diff --git a/dynamic_programming/FloydWarshall.py b/dynamic_programming/FloydWarshall.py new file mode 100644 index 000000000..bf7714124 --- /dev/null +++ b/dynamic_programming/FloydWarshall.py @@ -0,0 +1,37 @@ +import math + +class Graph: + + def __init__(self, N = 0): # a graph with Node 0,1,...,N-1 + self.N = N + self.W = [[math.inf for j in range(0,N)] for i in range(0,N)] # adjacency matrix for weight + self.dp = [[math.inf for j in range(0,N)] for i in range(0,N)] # dp[i][j] stores minimum distance from i to j + + def addEdge(self, u, v, w): + self.dp[u][v] = w; + + def floyd_warshall(self): + for k in range(0,self.N): + for i in range(0,self.N): + for j in range(0,self.N): + self.dp[i][j] = min(self.dp[i][j], self.dp[i][k] + self.dp[k][j]) + + def showMin(self, u, v): + return self.dp[u][v] + +if __name__ == '__main__': + graph = Graph(5) + graph.addEdge(0,2,9) + graph.addEdge(0,4,10) + graph.addEdge(1,3,5) + graph.addEdge(2,3,7) + graph.addEdge(3,0,10) + graph.addEdge(3,1,2) + graph.addEdge(3,2,1) + graph.addEdge(3,4,6) + graph.addEdge(4,1,3) + graph.addEdge(4,2,4) + graph.addEdge(4,3,9) + graph.floyd_warshall() + graph.showMin(1,4) + graph.showMin(0,3) diff --git a/dynamic_programming/coin_change.py b/dynamic_programming/coin_change.py new file mode 100644 index 000000000..dca016359 --- /dev/null +++ b/dynamic_programming/coin_change.py @@ -0,0 +1,25 @@ +""" +You have m types of coins available in infinite quantities +where the value of each coins is given in the array S=[S0,... Sm-1] +Can you determine number of ways of making change for n units using +the given types of coins? +https://www.hackerrank.com/challenges/coin-change/problem +""" +def dp_count(S, m, n): + table = [0] * (n + 1) + + # Base case (If given value is 0) + table[0] = 1 + + # Pick all coins one by one and update table[] values + # after the index greater than or equal to the value of the + # picked coin + for i in range(0, m): + for j in range(S[i], n + 1): + table[j] += table[j - S[i]] + + return table[n] + +if __name__ == '__main__': + print dp_count([1, 2, 3], 3, 4) # answer 4 + print dp_count([2, 5, 3, 6], 4, 10) # answer 5 diff --git a/dynamic_programming/fastfibonacci.py b/dynamic_programming/fastfibonacci.py new file mode 100644 index 000000000..5957fbe0d --- /dev/null +++ b/dynamic_programming/fastfibonacci.py @@ -0,0 +1,42 @@ +""" +This program calculates the nth Fibonacci number in O(log(n)). +It's possible to calculate F(1000000) in less than a second. +""" +import sys + + +# returns F(n) +def fibonacci(n: int): + if n < 0: + raise ValueError("Negative arguments are not supported") + return _fib(n)[0] + + +# returns (F(n), F(n-1)) +def _fib(n: int): + if n == 0: + # (F(0), F(1)) + return (0, 1) + else: + # F(2n) = F(n)[2F(n+1) − F(n)] + # F(2n+1) = F(n+1)^2+F(n)^2 + a, b = _fib(n // 2) + c = a * (b * 2 - a) + d = a * a + b * b + if n % 2 == 0: + return (c, d) + else: + return (d, c + d) + + +if __name__ == "__main__": + args = sys.argv[1:] + if len(args) != 1: + print("Too few or too much parameters given.") + exit(1) + try: + n = int(args[0]) + except ValueError: + print("Could not convert data to an integer.") + exit(1) + print("F(%d) = %d" % (n, fibonacci(n))) diff --git a/dynamic_programming/fibonacci.py b/dynamic_programming/fibonacci.py index 692cb756a..5eaa81b3e 100644 --- a/dynamic_programming/fibonacci.py +++ b/dynamic_programming/fibonacci.py @@ -30,7 +30,7 @@ if __name__ == '__main__': import sys print("\n********* Fibonacci Series Using Dynamic Programming ************\n") - # For python 2.x and 3.x compatibility: 3.x has not raw_input builtin + # For python 2.x and 3.x compatibility: 3.x has no raw_input builtin # otherwise 2.x's input builtin function is too "smart" if sys.version_info.major < 3: input_function = raw_input diff --git a/dynamic_programming/longest_increasing_subsequence_O(nlogn).py b/dynamic_programming/longest_increasing_subsequence_O(nlogn).py new file mode 100644 index 000000000..3ebb4a137 --- /dev/null +++ b/dynamic_programming/longest_increasing_subsequence_O(nlogn).py @@ -0,0 +1,40 @@ +############################# +# Author: Aravind Kashyap +# File: lis.py +# comments: This programme outputs the Longest Strictly Increasing Subsequence in O(NLogN) +# Where N is the Number of elements in the list +############################# +def CeilIndex(v,l,r,key): + while r-l > 1: + m = (l + r)/2 + if v[m] >= key: + r = m + else: + l = m + + return r + + +def LongestIncreasingSubsequenceLength(v): + if(len(v) == 0): + return 0 + + tail = [0]*len(v) + length = 1 + + tail[0] = v[0] + + for i in range(1,len(v)): + if v[i] < tail[0]: + tail[0] = v[i] + elif v[i] > tail[length-1]: + tail[length] = v[i] + length += 1 + else: + tail[CeilIndex(tail,-1,length-1,v[i])] = v[i] + + return length + + +v = [2, 5, 3, 7, 11, 8, 10, 13, 6] +print LongestIncreasingSubsequenceLength(v) diff --git a/machine_learning/k_means_clust.py b/machine_learning/k_means_clust.py new file mode 100644 index 000000000..c19832726 --- /dev/null +++ b/machine_learning/k_means_clust.py @@ -0,0 +1,172 @@ +'''README, Author - Anurag Kumar(mailto:anuragkumarak95@gmail.com) + +Requirements: + - sklearn + - numpy + - matplotlib + +Python: + - 3.5 + +Inputs: + - X , a 2D numpy array of features. + - k , number of clusters to create. + - initial_centroids , initial centroid values generated by utility function(mentioned in usage). + - maxiter , maximum number of iterations to process. + - heterogeneity , empty list that will be filled with hetrogeneity values if passed to kmeans func. + +Usage: + 1. define 'k' value, 'X' features array and 'hetrogeneity' empty list + + 2. create initial_centroids, + initial_centroids = get_initial_centroids( + X, + k, + seed=0 # seed value for initial centroid generation, None for randomness(default=None) + ) + + 3. find centroids and clusters using kmeans function. + + centroids, cluster_assignment = kmeans( + X, + k, + initial_centroids, + maxiter=400, + record_heterogeneity=heterogeneity, + verbose=True # whether to print logs in console or not.(default=False) + ) + + + 4. Plot the loss function, hetrogeneity values for every iteration saved in hetrogeneity list. + plot_heterogeneity( + heterogeneity, + k + ) + + 5. Have fun.. + +''' +from sklearn.metrics import pairwise_distances +import numpy as np + +TAG = 'K-MEANS-CLUST/ ' + +def get_initial_centroids(data, k, seed=None): + '''Randomly choose k data points as initial centroids''' + if seed is not None: # useful for obtaining consistent results + np.random.seed(seed) + n = data.shape[0] # number of data points + + # Pick K indices from range [0, N). + rand_indices = np.random.randint(0, n, k) + + # Keep centroids as dense format, as many entries will be nonzero due to averaging. + # As long as at least one document in a cluster contains a word, + # it will carry a nonzero weight in the TF-IDF vector of the centroid. + centroids = data[rand_indices,:] + + return centroids + +def centroid_pairwise_dist(X,centroids): + return pairwise_distances(X,centroids,metric='euclidean') + +def assign_clusters(data, centroids): + + # Compute distances between each data point and the set of centroids: + # Fill in the blank (RHS only) + distances_from_centroids = centroid_pairwise_dist(data,centroids) + + # Compute cluster assignments for each data point: + # Fill in the blank (RHS only) + cluster_assignment = np.argmin(distances_from_centroids,axis=1) + + return cluster_assignment + +def revise_centroids(data, k, cluster_assignment): + new_centroids = [] + for i in range(k): + # Select all data points that belong to cluster i. Fill in the blank (RHS only) + member_data_points = data[cluster_assignment==i] + # Compute the mean of the data points. Fill in the blank (RHS only) + centroid = member_data_points.mean(axis=0) + new_centroids.append(centroid) + new_centroids = np.array(new_centroids) + + return new_centroids + +def compute_heterogeneity(data, k, centroids, cluster_assignment): + + heterogeneity = 0.0 + for i in range(k): + + # Select all data points that belong to cluster i. Fill in the blank (RHS only) + member_data_points = data[cluster_assignment==i, :] + + if member_data_points.shape[0] > 0: # check if i-th cluster is non-empty + # Compute distances from centroid to data points (RHS only) + distances = pairwise_distances(member_data_points, [centroids[i]], metric='euclidean') + squared_distances = distances**2 + heterogeneity += np.sum(squared_distances) + + return heterogeneity + +from matplotlib import pyplot as plt +def plot_heterogeneity(heterogeneity, k): + plt.figure(figsize=(7,4)) + plt.plot(heterogeneity, linewidth=4) + plt.xlabel('# Iterations') + plt.ylabel('Heterogeneity') + plt.title('Heterogeneity of clustering over time, K={0:d}'.format(k)) + plt.rcParams.update({'font.size': 16}) + plt.show() + +def kmeans(data, k, initial_centroids, maxiter=500, record_heterogeneity=None, verbose=False): + '''This function runs k-means on given data and initial set of centroids. + maxiter: maximum number of iterations to run.(default=500) + record_heterogeneity: (optional) a list, to store the history of heterogeneity as function of iterations + if None, do not store the history. + verbose: if True, print how many data points changed their cluster labels in each iteration''' + centroids = initial_centroids[:] + prev_cluster_assignment = None + + for itr in range(maxiter): + if verbose: + print(itr, end='') + + # 1. Make cluster assignments using nearest centroids + cluster_assignment = assign_clusters(data,centroids) + + # 2. Compute a new centroid for each of the k clusters, averaging all data points assigned to that cluster. + centroids = revise_centroids(data,k, cluster_assignment) + + # Check for convergence: if none of the assignments changed, stop + if prev_cluster_assignment is not None and \ + (prev_cluster_assignment==cluster_assignment).all(): + break + + # Print number of new assignments + if prev_cluster_assignment is not None: + num_changed = np.sum(prev_cluster_assignment!=cluster_assignment) + if verbose: + print(' {0:5d} elements changed their cluster assignment.'.format(num_changed)) + + # Record heterogeneity convergence metric + if record_heterogeneity is not None: + # YOUR CODE HERE + score = compute_heterogeneity(data,k,centroids,cluster_assignment) + record_heterogeneity.append(score) + + prev_cluster_assignment = cluster_assignment[:] + + return centroids, cluster_assignment + +# Mock test below +if False: # change to true to run this test case. + import sklearn.datasets as ds + dataset = ds.load_iris() + k = 3 + heterogeneity = [] + initial_centroids = get_initial_centroids(dataset['data'], k, seed=0) + centroids, cluster_assignment = kmeans(dataset['data'], k, initial_centroids, maxiter=400, + record_heterogeneity=heterogeneity, verbose=True) + plot_heterogeneity(heterogeneity, k) \ No newline at end of file diff --git a/other/LinearCongruentialGenerator.py b/other/LinearCongruentialGenerator.py new file mode 100644 index 000000000..b1eaa6119 --- /dev/null +++ b/other/LinearCongruentialGenerator.py @@ -0,0 +1,34 @@ +__author__ = "Tobias Carryer" + +from time import time + +class LinearCongruentialGenerator(object): + """ + A pseudorandom number generator. + """ + + def __init__( self, multiplier, increment, modulo, seed=int(time()) ): + """ + These parameters are saved and used when nextNumber() is called. + + modulo is the largest number that can be generated (exclusive). The most + efficent values are powers of 2. 2^32 is a common value. + """ + self.multiplier = multiplier + self.increment = increment + self.modulo = modulo + self.seed = seed + + def next_number( self ): + """ + The smallest number that can be generated is zero. + The largest number that can be generated is modulo-1. modulo is set in the constructor. + """ + self.seed = (self.multiplier * self.seed + self.increment) % self.modulo + return self.seed + +if __name__ == "__main__": + # Show the LCG in action. + lcg = LinearCongruentialGenerator(1664525, 1013904223, 2<<31) + while True : + print lcg.next_number() \ No newline at end of file diff --git a/other/binary_exponentiation.py b/other/binary_exponentiation.py new file mode 100644 index 000000000..1a30fb8fd --- /dev/null +++ b/other/binary_exponentiation.py @@ -0,0 +1,49 @@ +""" +* Binary Exponentiation for Powers +* This is a method to find a^b in a time complexity of O(log b) +* This is one of the most commonly used methods of finding powers. +* Also useful in cases where solution to (a^b)%c is required, +* where a,b,c can be numbers over the computers calculation limits. +* Done using iteration, can also be done using recursion + +* @author chinmoy159 +* @version 1.0 dated 10/08/2017 +""" + + +def b_expo(a, b): + res = 1 + while b > 0: + if b&1: + res *= a + + a *= a + b >>= 1 + + return res + + +def b_expo_mod(a, b, c): + res = 1 + while b > 0: + if b&1: + res = ((res%c) * (a%c)) % c + + a *= a + b >>= 1 + + return res + +""" +* Wondering how this method works ! +* It's pretty simple. +* Let's say you need to calculate a ^ b +* RULE 1 : a ^ b = (a*a) ^ (b/2) ---- example : 4 ^ 4 = (4*4) ^ (4/2) = 16 ^ 2 +* RULE 2 : IF b is ODD, then ---- a ^ b = a * (a ^ (b - 1)) :: where (b - 1) is even. +* Once b is even, repeat the process to get a ^ b +* Repeat the process till b = 1 OR b = 0, because a^1 = a AND a^0 = 1 +* +* As far as the modulo is concerned, +* the fact : (a*b) % c = ((a%c) * (b%c)) % c +* Now apply RULE 1 OR 2 whichever is required. +""" diff --git a/other/binary_exponentiation_2.py b/other/binary_exponentiation_2.py new file mode 100644 index 000000000..217a616c9 --- /dev/null +++ b/other/binary_exponentiation_2.py @@ -0,0 +1,50 @@ +""" +* Binary Exponentiation with Multiplication +* This is a method to find a*b in a time complexity of O(log b) +* This is one of the most commonly used methods of finding result of multiplication. +* Also useful in cases where solution to (a*b)%c is required, +* where a,b,c can be numbers over the computers calculation limits. +* Done using iteration, can also be done using recursion + +* @author chinmoy159 +* @version 1.0 dated 10/08/2017 +""" + + +def b_expo(a, b): + res = 0 + while b > 0: + if b&1: + res += a + + a += a + b >>= 1 + + return res + + +def b_expo_mod(a, b, c): + res = 0 + while b > 0: + if b&1: + res = ((res%c) + (a%c)) % c + + a += a + b >>= 1 + + return res + + +""" +* Wondering how this method works ! +* It's pretty simple. +* Let's say you need to calculate a ^ b +* RULE 1 : a * b = (a+a) * (b/2) ---- example : 4 * 4 = (4+4) * (4/2) = 8 * 2 +* RULE 2 : IF b is ODD, then ---- a * b = a + (a * (b - 1)) :: where (b - 1) is even. +* Once b is even, repeat the process to get a * b +* Repeat the process till b = 1 OR b = 0, because a*1 = a AND a*0 = 0 +* +* As far as the modulo is concerned, +* the fact : (a+b) % c = ((a%c) + (b%c)) % c +* Now apply RULE 1 OR 2, whichever is required. +""" diff --git a/other/euclidean_gcd.py b/other/euclidean_gcd.py new file mode 100644 index 000000000..13378379f --- /dev/null +++ b/other/euclidean_gcd.py @@ -0,0 +1,18 @@ +# https://en.wikipedia.org/wiki/Euclidean_algorithm + +def euclidean_gcd(a, b): + while b: + t = b + b = a % b + a = t + return a + +def main(): + print("GCD(3, 5) = " + str(euclidean_gcd(3, 5))) + print("GCD(5, 3) = " + str(euclidean_gcd(5, 3))) + print("GCD(1, 3) = " + str(euclidean_gcd(1, 3))) + print("GCD(3, 6) = " + str(euclidean_gcd(3, 6))) + print("GCD(6, 3) = " + str(euclidean_gcd(6, 3))) + +if __name__ == '__main__': + main() diff --git a/other/game_of_life/game_o_life.py b/other/game_of_life/game_o_life.py new file mode 100644 index 000000000..32ebe0fc1 --- /dev/null +++ b/other/game_of_life/game_o_life.py @@ -0,0 +1,118 @@ +'''Conway's Game Of Life, Author Anurag Kumar(mailto:anuragkumarak95@gmail.com) + +Requirements: + - numpy + - random + - time + - matplotlib + +Python: + - 3.5 + +Usage: + - $python3 game_o_life + +Game-Of-Life Rules: + + 1. + Any live cell with fewer than two live neighbours + dies, as if caused by under-population. + 2. + Any live cell with two or three live neighbours lives + on to the next generation. + 3. + Any live cell with more than three live neighbours + dies, as if by over-population. + 4. + Any dead cell with exactly three live neighbours be- + comes a live cell, as if by reproduction. + ''' +import numpy as np +import random, time, sys +from matplotlib import pyplot as plt +import matplotlib.animation as animation +from matplotlib.colors import ListedColormap + +usage_doc='Usage of script: script_nama ' + +choice = [0]*100 + [1]*10 +random.shuffle(choice) + +def create_canvas(size): + canvas = [ [False for i in range(size)] for j in range(size)] + return canvas + +def seed(canvas): + for i,row in enumerate(canvas): + for j,_ in enumerate(row): + canvas[i][j]=bool(random.getrandbits(1)) + +def run(canvas): + ''' This function runs the rules of game through all points, and changes their status accordingly.(in the same canvas) + @Args: + -- + canvas : canvas of population to run the rules on. + + @returns: + -- + None + ''' + canvas = np.array(canvas) + next_gen_canvas = np.array(create_canvas(canvas.shape[0])) + for r, row in enumerate(canvas): + for c, pt in enumerate(row): + # print(r-1,r+2,c-1,c+2) + next_gen_canvas[r][c] = __judge_point(pt,canvas[r-1:r+2,c-1:c+2]) + + canvas = next_gen_canvas + del next_gen_canvas # cleaning memory as we move on. + return canvas.tolist() + +def __judge_point(pt,neighbours): + dead = 0 + alive = 0 + # finding dead or alive neighbours count. + for i in neighbours: + for status in i: + if status: alive+=1 + else: dead+=1 + + # handling duplicate entry for focus pt. + if pt : alive-=1 + else : dead-=1 + + # running the rules of game here. + state = pt + if pt: + if alive<2: + state=False + elif alive==2 or alive==3: + state=True + elif alive>3: + state=False + else: + if alive==3: + state=True + + return state + + +if __name__=='__main__': + if len(sys.argv) != 2: raise Exception(usage_doc) + + canvas_size = int(sys.argv[1]) + # main working structure of this module. + c=create_canvas(canvas_size) + seed(c) + fig, ax = plt.subplots() + fig.show() + cmap = ListedColormap(['w','k']) + try: + while True: + c = run(c) + ax.matshow(c,cmap=cmap) + fig.canvas.draw() + ax.cla() + except KeyboardInterrupt: + # do nothing. + pass diff --git a/other/game_of_life/sample.gif b/other/game_of_life/sample.gif new file mode 100644 index 000000000..0bf2ae1f9 Binary files /dev/null and b/other/game_of_life/sample.gif differ diff --git a/other/nested_brackets.py b/other/nested_brackets.py index ea681ded5..f486190f4 100644 --- a/other/nested_brackets.py +++ b/other/nested_brackets.py @@ -18,28 +18,20 @@ returns true if S is nested and false otherwise. def is_balanced(S): stack = [] - + open_brackets = set({'(', '[', '{'}) + closed_brackets = set({')', ']', '}'}) + open_to_closed = dict({'{':'}', '[':']', '(':')'}) + for i in range(len(S)): - - if S[i] == '(' or S[i] == '{' or S[i] == '[': + + if S[i] in open_brackets: stack.append(S[i]) - - else: - - if len(stack) > 0: - - pair = stack.pop() + S[i] - - if pair != '[]' and pair != '()' and pair != '{}': - return False - - else: + + elif S[i] in closed_brackets: + if len(stack) == 0 or (len(stack) > 0 and open_to_closed[stack.pop()] != S[i]): return False - - if len(stack) == 0: - return True - - return False + + return len(stack) == 0 def main(): @@ -48,7 +40,7 @@ def main(): if is_balanced(S): print(S, "is balanced") - + else: print(S, "is not balanced") diff --git a/other/sierpinski_triangle.py b/other/sierpinski_triangle.py new file mode 100644 index 000000000..e566f693f --- /dev/null +++ b/other/sierpinski_triangle.py @@ -0,0 +1,64 @@ +'''Author Anurag Kumar | anuragkumarak95@gmail.com | git/anuragkumarak95 + +Simple example of Fractal generation using recursive function. + +What is Sierpinski Triangle? +>>The Sierpinski triangle (also with the original orthography Sierpinski), also called the Sierpinski gasket or the Sierpinski Sieve, +is a fractal and attractive fixed set with the overall shape of an equilateral triangle, subdivided recursively into smaller +equilateral triangles. Originally constructed as a curve, this is one of the basic examples of self-similar sets, i.e., +it is a mathematically generated pattern that can be reproducible at any magnification or reduction. It is named after +the Polish mathematician Wacław Sierpinski, but appeared as a decorative pattern many centuries prior to the work of Sierpinski. + +Requirements(pip): + - turtle + +Python: + - 2.6 + +Usage: + - $python sierpinski_triangle.py + +Credits: This code was written by editing the code from http://www.lpb-riannetrujillo.com/blog/python-fractal/ + +''' +import turtle +import sys +PROGNAME = 'Sierpinski Triangle' +if len(sys.argv) !=2: + raise Exception('right format for using this script: $python fractals.py ') + +myPen = turtle.Turtle() +myPen.ht() +myPen.speed(5) +myPen.pencolor('red') + +points = [[-175,-125],[0,175],[175,-125]] #size of triangle + +def getMid(p1,p2): + return ( (p1[0]+p2[0]) / 2, (p1[1] + p2[1]) / 2) #find midpoint + +def triangle(points,depth): + + myPen.up() + myPen.goto(points[0][0],points[0][1]) + myPen.down() + myPen.goto(points[1][0],points[1][1]) + myPen.goto(points[2][0],points[2][1]) + myPen.goto(points[0][0],points[0][1]) + + if depth>0: + triangle([points[0], + getMid(points[0], points[1]), + getMid(points[0], points[2])], + depth-1) + triangle([points[1], + getMid(points[0], points[1]), + getMid(points[1], points[2])], + depth-1) + triangle([points[2], + getMid(points[2], points[1]), + getMid(points[0], points[2])], + depth-1) + + +triangle(points,int(sys.argv[1])) \ No newline at end of file diff --git a/other/two-sum.py b/other/two-sum.py new file mode 100644 index 000000000..4a522b6d4 --- /dev/null +++ b/other/two-sum.py @@ -0,0 +1,28 @@ +""" +Given an array of integers, return indices of the two numbers such that they add up to a specific target. + +You may assume that each input would have exactly one solution, and you may not use the same element twice. + +Example: +Given nums = [2, 7, 11, 15], target = 9, + +Because nums[0] + nums[1] = 2 + 7 = 9, +return [0, 1]. +""" + +def twoSum(nums, target): + """ + :type nums: List[int] + :type target: int + :rtype: List[int] + """ + chk_map = {} + for index, val in enumerate(nums): + compl = target - val + if compl in chk_map: + indices = [chk_map[compl], index] + print(indices) + return [indices] + else: + chk_map[val] = index + return False diff --git a/searches/binary_search.py b/searches/binary_search.py index 8dc3008da..c54aa96a1 100644 --- a/searches/binary_search.py +++ b/searches/binary_search.py @@ -113,7 +113,7 @@ def binary_search_by_recursion(sorted_collection, item, left, right): return binary_search_by_recursion(sorted_collection, item, left, midpoint-1) else: return binary_search_by_recursion(sorted_collection, item, midpoint+1, right) - + def __assert_sorted(collection): """Check if collection is sorted, if not - raises :py:class:`ValueError` @@ -137,14 +137,14 @@ def __assert_sorted(collection): if __name__ == '__main__': import sys - # For python 2.x and 3.x compatibility: 3.x has not raw_input builtin + # For python 2.x and 3.x compatibility: 3.x has no raw_input builtin # otherwise 2.x's input builtin function is too "smart" if sys.version_info.major < 3: input_function = raw_input else: input_function = input - user_input = input_function('Enter numbers separated by coma:\n') + user_input = input_function('Enter numbers separated by comma:\n') collection = [int(item) for item in user_input.split(',')] try: __assert_sorted(collection) diff --git a/searches/interpolation_search.py b/searches/interpolation_search.py new file mode 100644 index 000000000..068d9c554 --- /dev/null +++ b/searches/interpolation_search.py @@ -0,0 +1,102 @@ +""" +This is pure python implementation of interpolation search algorithm +""" +from __future__ import print_function +import bisect + + +def interpolation_search(sorted_collection, item): + """Pure implementation of interpolation search algorithm in Python + Be careful collection must be sorted, otherwise result will be + unpredictable + :param sorted_collection: some sorted collection with comparable items + :param item: item value to search + :return: index of found item or None if item is not found + """ + left = 0 + right = len(sorted_collection) - 1 + + while left <= right: + point = left + ((item - sorted_collection[left]) * (right - left)) // (sorted_collection[right] - sorted_collection[left]) + + #out of range check + if point<0 or point>=len(sorted_collection): + return None + + current_item = sorted_collection[point] + if current_item == item: + return point + else: + if item < current_item: + right = point - 1 + else: + left = point + 1 + return None + + +def interpolation_search_by_recursion(sorted_collection, item, left, right): + + """Pure implementation of interpolation search algorithm in Python by recursion + Be careful collection must be sorted, otherwise result will be + unpredictable + First recursion should be started with left=0 and right=(len(sorted_collection)-1) + :param sorted_collection: some sorted collection with comparable items + :param item: item value to search + :return: index of found item or None if item is not found + """ + point = left + ((item - sorted_collection[left]) * (right - left)) // (sorted_collection[right] - sorted_collection[left]) + + #out of range check + if point<0 or point>=len(sorted_collection): + return None + + if sorted_collection[point] == item: + return point + elif sorted_collection[point] > item: + return interpolation_search_by_recursion(sorted_collection, item, left, point-1) + else: + return interpolation_search_by_recursion(sorted_collection, item, point+1, right) + +def __assert_sorted(collection): + """Check if collection is sorted, if not - raises :py:class:`ValueError` + :param collection: collection + :return: True if collection is sorted + :raise: :py:class:`ValueError` if collection is not sorted + Examples: + >>> __assert_sorted([0, 1, 2, 4]) + True + >>> __assert_sorted([10, -1, 5]) + Traceback (most recent call last): + ... + ValueError: Collection must be sorted + """ + if collection != sorted(collection): + raise ValueError('Collection must be sorted') + return True + + +if __name__ == '__main__': + import sys + # For python 2.x and 3.x compatibility: 3.x has no raw_input builtin + # otherwise 2.x's input builtin function is too "smart" + if sys.version_info.major < 3: + input_function = raw_input + else: + input_function = input + + user_input = input_function('Enter numbers separated by comma:\n') + collection = [int(item) for item in user_input.split(',')] + try: + __assert_sorted(collection) + except ValueError: + sys.exit('Sequence must be sorted to apply interpolation search') + + target_input = input_function( + 'Enter a single number to be found in the list:\n' + ) + target = int(target_input) + result = interpolation_search(collection, target) + if result is not None: + print('{} found at positions: {}'.format(target, result)) + else: + print('Not found') \ No newline at end of file diff --git a/searches/linear_search.py b/searches/linear_search.py index 24479e45b..ce8098b1a 100644 --- a/searches/linear_search.py +++ b/searches/linear_search.py @@ -41,7 +41,7 @@ def linear_search(sequence, target): if __name__ == '__main__': import sys - # For python 2.x and 3.x compatibility: 3.x has not raw_input builtin + # For python 2.x and 3.x compatibility: 3.x has no raw_input builtin # otherwise 2.x's input builtin function is too "smart" if sys.version_info.major < 3: input_function = raw_input diff --git a/searches/ternary_search.py b/searches/ternary_search.py new file mode 100644 index 000000000..3b1c75314 --- /dev/null +++ b/searches/ternary_search.py @@ -0,0 +1,112 @@ +''' +This is a type of divide and conquer algorithm which divides the search space into +3 parts and finds the target value based on the property of the array or list +(usually monotonic property). + +Time Complexity : O(log3 N) +Space Complexity : O(1) +''' + +import sys + +# This is the precision for this function which can be altered. +# It is recommended for users to keep this number greater than or equal to 10. +precision = 10 + +# This is the linear search that will occur after the search space has become smaller. +def lin_search(left, right, A, target): + for i in range(left, right+1): + if(A[i] == target): + return i + +# This is the iterative method of the ternary search algorithm. +def ite_ternary_search(A, target): + left = 0 + right = len(A) - 1; + while(True): + if(left>> counting_sort([0, 5, 3, 2, 2]) + [0, 2, 2, 3, 5] + >>> counting_sort([]) + [] + >>> counting_sort([-2, -5, -45]) + [-45, -5, -2] + """ + # if the collection is empty, returns empty + if collection == []: + return [] + + # get some information about the collection + coll_len = len(collection) + coll_max = max(collection) + coll_min = min(collection) + + # create the counting array + counting_arr_length = coll_max + 1 - coll_min + counting_arr = [0] * counting_arr_length + + # count how much a number appears in the collection + for number in collection: + counting_arr[number - coll_min] += 1 + + # sum each position with it's predecessors. now, counting_arr[i] tells + # us how many elements <= i has in the collection + for i in range(1, counting_arr_length): + counting_arr[i] = counting_arr[i] + counting_arr[i-1] + + # create the output collection + ordered = [0] * coll_len + + # place the elements in the output, respecting the original order (stable + # sort) from end to begin, updating counting_arr + for i in reversed(range(0, coll_len)): + ordered[counting_arr[collection[i] - coll_min]-1] = collection[i] + counting_arr[collection[i] - coll_min] -= 1 + + return ordered + + +if __name__ == '__main__': + import sys + # For python 2.x and 3.x compatibility: 3.x has not raw_input builtin + # otherwise 2.x's input builtin function is too "smart" + if sys.version_info.major < 3: + input_function = raw_input + else: + input_function = input + + user_input = input_function('Enter numbers separated by a comma:\n') + unsorted = [int(item) for item in user_input.split(',')] + print(counting_sort(unsorted)) diff --git a/sorts/countingsort.py b/sorts/countingsort.py new file mode 100644 index 000000000..c7b502d8f --- /dev/null +++ b/sorts/countingsort.py @@ -0,0 +1,41 @@ +# Python program for counting sort + +# This is the main function that sort the given string arr[] in +# in the alphabetical order +def countSort(arr): + + # The output character array that will have sorted arr + output = [0 for i in range(256)] + + # Create a count array to store count of inidividul + # characters and initialize count array as 0 + count = [0 for i in range(256)] + + # For storing the resulting answer since the + # string is immutable + ans = ["" for _ in arr] + + # Store count of each character + for i in arr: + count[ord(i)] += 1 + + # Change count[i] so that count[i] now contains actual + # position of this character in output array + for i in range(256): + count[i] += count[i-1] + + # Build the output character array + for i in range(len(arr)): + output[count[ord(arr[i])]-1] = arr[i] + count[ord(arr[i])] -= 1 + + # Copy the output array to arr, so that arr now + # contains sorted characters + for i in range(len(arr)): + ans[i] = output[i] + return ans + +# Driver program to test above function +arr = "thisisthestring" +ans = countSort(arr) +print ("Sorted string array is %s" %("".join(ans))) diff --git a/sorts/cyclesort.py b/sorts/cyclesort.py new file mode 100644 index 000000000..2cc635417 --- /dev/null +++ b/sorts/cyclesort.py @@ -0,0 +1,51 @@ +# Code contributed by Honey Sharma +def cycle_sort(array): + ans = 0 + + # Pass through the array to find cycles to rotate. + for cycleStart in range(0, len(array) - 1): + item = array[cycleStart] + + # finding the position for putting the item. + pos = cycleStart + for i in range(cycleStart + 1, len(array)): + if array[i] < item: + pos += 1 + + # If the item is already present-not a cycle. + if pos == cycleStart: + continue + + # Otherwise, put the item there or right after any duplicates. + while item == array[pos]: + pos += 1 + array[pos], item = item, array[pos] + ans += 1 + + # Rotate the rest of the cycle. + while pos != cycleStart: + + # Find where to put the item. + pos = cycleStart + for i in range(cycleStart + 1, len(array)): + if array[i] < item: + pos += 1 + + # Put the item there or right after any duplicates. + while item == array[pos]: + pos += 1 + array[pos], item = item, array[pos] + ans += 1 + + return ans + + +# Main Code starts here +user_input = input('Enter numbers separated by a comma:\n') +unsorted = [int(item) for item in user_input.split(',')] +n = len(unsorted) +cycle_sort(unsorted) + +print("After sort : ") +for i in range(0, n): + print(unsorted[i], end=' ') diff --git a/sorts/external-sort.py b/sorts/external-sort.py new file mode 100644 index 000000000..eca26012d --- /dev/null +++ b/sorts/external-sort.py @@ -0,0 +1,161 @@ +#!/usr/bin/env python + +# +# Sort large text files in a minimum amount of memory +# +import os +import sys +import argparse + +class FileSplitter(object): + BLOCK_FILENAME_FORMAT = 'block_{0}.dat' + + def __init__(self, filename): + self.filename = filename + self.block_filenames = [] + + def write_block(self, data, block_number): + filename = self.BLOCK_FILENAME_FORMAT.format(block_number) + file = open(filename, 'w') + file.write(data) + file.close() + self.block_filenames.append(filename) + + def get_block_filenames(self): + return self.block_filenames + + def split(self, block_size, sort_key=None): + file = open(self.filename, 'r') + i = 0 + + while True: + lines = file.readlines(block_size) + + if lines == []: + break + + if sort_key is None: + lines.sort() + else: + lines.sort(key=sort_key) + + self.write_block(''.join(lines), i) + i += 1 + + def cleanup(self): + map(lambda f: os.remove(f), self.block_filenames) + + +class NWayMerge(object): + def select(self, choices): + min_index = -1 + min_str = None + + for i in range(len(choices)): + if min_str is None or choices[i] < min_str: + min_index = i + + return min_index + + +class FilesArray(object): + def __init__(self, files): + self.files = files + self.empty = set() + self.num_buffers = len(files) + self.buffers = {i: None for i in range(self.num_buffers)} + + def get_dict(self): + return {i: self.buffers[i] for i in range(self.num_buffers) if i not in self.empty} + + def refresh(self): + for i in range(self.num_buffers): + if self.buffers[i] is None and i not in self.empty: + self.buffers[i] = self.files[i].readline() + + if self.buffers[i] == '': + self.empty.add(i) + + if len(self.empty) == self.num_buffers: + return False + + return True + + def unshift(self, index): + value = self.buffers[index] + self.buffers[index] = None + + return value + + +class FileMerger(object): + def __init__(self, merge_strategy): + self.merge_strategy = merge_strategy + + def merge(self, filenames, outfilename, buffer_size): + outfile = open(outfilename, 'w', buffer_size) + buffers = FilesArray(self.get_file_handles(filenames, buffer_size)) + + while buffers.refresh(): + min_index = self.merge_strategy.select(buffers.get_dict()) + outfile.write(buffers.unshift(min_index)) + + def get_file_handles(self, filenames, buffer_size): + files = {} + + for i in range(len(filenames)): + files[i] = open(filenames[i], 'r', buffer_size) + + return files + + + +class ExternalSort(object): + def __init__(self, block_size): + self.block_size = block_size + + def sort(self, filename, sort_key=None): + num_blocks = self.get_number_blocks(filename, self.block_size) + splitter = FileSplitter(filename) + splitter.split(self.block_size, sort_key) + + merger = FileMerger(NWayMerge()) + buffer_size = self.block_size / (num_blocks + 1) + merger.merge(splitter.get_block_filenames(), filename + '.out', buffer_size) + + splitter.cleanup() + + def get_number_blocks(self, filename, block_size): + return (os.stat(filename).st_size / block_size) + 1 + + +def parse_memory(string): + if string[-1].lower() == 'k': + return int(string[:-1]) * 1024 + elif string[-1].lower() == 'm': + return int(string[:-1]) * 1024 * 1024 + elif string[-1].lower() == 'g': + return int(string[:-1]) * 1024 * 1024 * 1024 + else: + return int(string) + + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('-m', + '--mem', + help='amount of memory to use for sorting', + default='100M') + parser.add_argument('filename', + metavar='', + nargs=1, + help='name of file to sort') + args = parser.parse_args() + + sorter = ExternalSort(parse_memory(args.mem)) + sorter.sort(args.filename[0]) + + +if __name__ == '__main__': +main() \ No newline at end of file diff --git a/sorts/gnome_sort.py b/sorts/gnome_sort.py index b353e31aa..4f04ff384 100644 --- a/sorts/gnome_sort.py +++ b/sorts/gnome_sort.py @@ -21,7 +21,7 @@ def gnome_sort(unsorted): if __name__ == '__main__': import sys - # For python 2.x and 3.x compatibility: 3.x has not raw_input builtin + # For python 2.x and 3.x compatibility: 3.x has no raw_input builtin # otherwise 2.x's input builtin function is too "smart" if sys.version_info.major < 3: input_function = raw_input diff --git a/sorts/insertion_sort.py b/sorts/insertion_sort.py index caaa9305c..33bd27c8f 100644 --- a/sorts/insertion_sort.py +++ b/sorts/insertion_sort.py @@ -41,7 +41,7 @@ def insertion_sort(collection): if __name__ == '__main__': import sys - # For python 2.x and 3.x compatibility: 3.x has not raw_input builtin + # For python 2.x and 3.x compatibility: 3.x has no raw_input builtin # otherwise 2.x's input builtin function is too "smart" if sys.version_info.major < 3: input_function = raw_input diff --git a/sorts/merge_sort.py b/sorts/merge_sort.py index 92a678016..ca8dbc33c 100644 --- a/sorts/merge_sort.py +++ b/sorts/merge_sort.py @@ -64,7 +64,7 @@ def merge_sort(collection): if __name__ == '__main__': import sys - # For python 2.x and 3.x compatibility: 3.x has not raw_input builtin + # For python 2.x and 3.x compatibility: 3.x has no raw_input builtin # otherwise 2.x's input builtin function is too "smart" if sys.version_info.major < 3: input_function = raw_input diff --git a/sorts/quick_sort.py b/sorts/quick_sort.py index 8974e1bd8..52e37b587 100644 --- a/sorts/quick_sort.py +++ b/sorts/quick_sort.py @@ -42,7 +42,7 @@ def quick_sort(ARRAY): if __name__ == '__main__': import sys - # For python 2.x and 3.x compatibility: 3.x has not raw_input builtin + # For python 2.x and 3.x compatibility: 3.x has no raw_input builtin # otherwise 2.x's input builtin function is too "smart" if sys.version_info.major < 3: input_function = raw_input diff --git a/sorts/radix_sort.py b/sorts/radix_sort.py index 82f8a38b4..b0b4b41ab 100644 --- a/sorts/radix_sort.py +++ b/sorts/radix_sort.py @@ -2,19 +2,20 @@ def radixsort(lst): RADIX = 10 maxLength = False tmp , placement = -1, 1 - + while not maxLength: maxLength = True # declare and initialize buckets buckets = [list() for _ in range( RADIX )] - + # split lst between lists - for i in lst: - tmp = i / placement - buckets[tmp % RADIX].append( i ) + for i in lst: + tmp = int((i / placement) % RADIX) + buckets[tmp].append(i) + if maxLength and tmp > 0: maxLength = False - + # empty lists into lst array a = 0 for b in range( RADIX ): @@ -22,6 +23,6 @@ def radixsort(lst): for i in buck: lst[a] = i a += 1 - + # move to next placement *= RADIX diff --git a/sorts/selection_sort.py b/sorts/selection_sort.py index 14bc80463..752496e98 100644 --- a/sorts/selection_sort.py +++ b/sorts/selection_sort.py @@ -44,7 +44,7 @@ def selection_sort(collection): if __name__ == '__main__': import sys - # For python 2.x and 3.x compatibility: 3.x has not raw_input builtin + # For python 2.x and 3.x compatibility: 3.x has no raw_input builtin # otherwise 2.x's input builtin function is too "smart" if sys.version_info.major < 3: input_function = raw_input diff --git a/sorts/shell_sort.py b/sorts/shell_sort.py index fdb98a570..de3d84f72 100644 --- a/sorts/shell_sort.py +++ b/sorts/shell_sort.py @@ -45,7 +45,7 @@ def shell_sort(collection): if __name__ == '__main__': import sys - # For python 2.x and 3.x compatibility: 3.x has not raw_input builtin + # For python 2.x and 3.x compatibility: 3.x has no raw_input builtin # otherwise 2.x's input builtin function is too "smart" if sys.version_info.major < 3: input_function = raw_input diff --git a/sorts/timsort.py b/sorts/timsort.py new file mode 100644 index 000000000..8c75b5191 --- /dev/null +++ b/sorts/timsort.py @@ -0,0 +1,81 @@ +def binary_search(lst, item, start, end): + if start == end: + if lst[start] > item: + return start + else: + return start + 1 + if start > end: + return start + + mid = (start + end) // 2 + if lst[mid] < item: + return binary_search(lst, item, mid + 1, end) + elif lst[mid] > item: + return binary_search(lst, item, start, mid - 1) + else: + return mid + + +def insertion_sort(lst): + length = len(lst) + + for index in range(1, length): + value = lst[index] + pos = binary_search(lst, value, 0, index - 1) + lst = lst[:pos] + [value] + lst[pos:index] + lst[index+1:] + + return lst + + +def merge(left, right): + if not left: + return right + + if not right: + return left + + if left[0] < right[0]: + return [left[0]] + merge(left[1:], right) + + return [right[0]] + merge(left, right[1:]) + + +def timsort(lst): + runs, sorted_runs = [], [] + length = len(lst) + new_run = [lst[0]] + sorted_array = [] + + for i in range(1, length): + if i == length - 1: + new_run.append(lst[i]) + runs.append(new_run) + break + + if lst[i] < lst[i - 1]: + if not new_run: + runs.append([lst[i - 1]]) + new_run.append(lst[i]) + else: + runs.append(new_run) + new_run = [] + else: + new_run.append(lst[i]) + + for run in runs: + sorted_runs.append(insertion_sort(run)) + + for run in sorted_runs: + sorted_array = merge(sorted_array, run) + + return sorted_array + + +def main(): + + lst = [5,9,10,3,-4,5,178,92,46,-18,0,7] + sorted_lst = timsort(lst) + print(sorted_lst) + +if __name__ == '__main__': + main() diff --git a/traverals/binary_tree_traversals.py b/traversals/binary_tree_traversals.py similarity index 97% rename from traverals/binary_tree_traversals.py rename to traversals/binary_tree_traversals.py index 9cf118899..9d14a1e7e 100644 --- a/traverals/binary_tree_traversals.py +++ b/traversals/binary_tree_traversals.py @@ -84,7 +84,7 @@ if __name__ == '__main__': import sys print("\n********* Binary Tree Traversals ************\n") - # For python 2.x and 3.x compatibility: 3.x has not raw_input builtin + # For python 2.x and 3.x compatibility: 3.x has no raw_input builtin # otherwise 2.x's input builtin function is too "smart" if sys.version_info.major < 3: input_function = raw_input