From c313a7bb4f68bf99b14dda1dbae3280ac949f237 Mon Sep 17 00:00:00 2001 From: Francisco Matias Date: Tue, 20 Jun 2017 22:08:03 -0300 Subject: [PATCH 01/22] Update .travis.yml --- .travis.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 3e45c05a1..fe3ec15a6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,10 +6,9 @@ python: - "3.5" - "3.6" - "3.6-dev" - - "3.7-dev" - - "nightly" + install: - if [ "$TRAVIS_PYTHON_VERSION" == "3.2" ]; then travis_retry pip install coverage==3.7.1; fi - if [ "$TRAVIS_PYTHON_VERSION" != "3.2" ]; then travis_retry pip install coverage; fi - "pip install pytest pytest-cov" -script: py.test --doctest-modules --cov ./ \ No newline at end of file +script: py.test --doctest-modules --cov ./ From 16b0d62f2843b51f3bd983ea574b6ba553e391ae Mon Sep 17 00:00:00 2001 From: Zach Wild Date: Wed, 21 Jun 2017 19:11:31 -0400 Subject: [PATCH 02/22] Add topological_sort.py --- sorts/topological_sort.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 sorts/topological_sort.py diff --git a/sorts/topological_sort.py b/sorts/topological_sort.py new file mode 100644 index 000000000..5de1cc2b7 --- /dev/null +++ b/sorts/topological_sort.py @@ -0,0 +1,32 @@ +# a +# / \ +# b c +# / \ +# d e +edges = {'a': ['c', 'b'], 'b': ['d', 'e'], 'c': [], 'd': [], 'e': []} +vertices = ['a', 'b', 'c', 'd', 'e'] + + +def topological_sort(start, visited, sort): + """Perform topolical sort on a directed acyclic graph.""" + current = start + # add current to visited + visited.append(current) + neighbors = edges[current] + for neighbor in neighbors: + # if neighbor not in visited, visit + if neighbor not in visited: + sort = topological_sort(neighbor, visited, sort) + # if all neighbors visited add current to sort + sort.append(current) + # if all vertices haven't been visited select a new one to visit + if len(visited) != len(vertices): + for vertice in vertices: + if vertice not in visited: + sort = topological_sort(vertice, visited, sort) + # return sort + return sort + + +sort = topological_sort('a', [], []) +print(sort) From 64d29ef2ad0f084624c41d74249dde986c50c56d Mon Sep 17 00:00:00 2001 From: = <201551009@iiitvadodara.ac.in> Date: Fri, 23 Jun 2017 02:09:42 +0530 Subject: [PATCH 03/22] -Added Gradient Descent Algorithm --- machine_learning/gradient_descent.py | 121 +++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 machine_learning/gradient_descent.py diff --git a/machine_learning/gradient_descent.py b/machine_learning/gradient_descent.py new file mode 100644 index 000000000..1e771b072 --- /dev/null +++ b/machine_learning/gradient_descent.py @@ -0,0 +1,121 @@ +""" +Implementation of gradient descent algorithm for minimizing cost of a linear hypothesis function. +""" +import numpy + +# List of input, output pairs +train_data = (((5, 2, 3), 15), ((6, 5, 9), 25), + ((11, 12, 13), 41), ((1, 1, 1), 8), ((11, 12, 13), 41)) +test_data = (((515, 22, 13), 555), ((61, 35, 49), 150)) +parameter_vector = [2, 4, 1, 5] +m = len(train_data) +LEARNING_RATE = 0.009 + + +def _error(example_no, data_set='train'): + """ + :param data_set: train data or test data + :param example_no: example number whose error has to be checked + :return: error in example pointed by example number. + """ + return calculate_hypothesis_value(example_no, data_set) - output(example_no, data_set) + + +def _hypothesis_value(data_input_tuple): + """ + Calculates hypothesis function value for a given input + :param data_input_tuple: Input tuple of a particular example + :return: Value of hypothesis function at that point. + Note that there is an 'biased input' whose value is fixed as 1. + It is not explicitly mentioned in input data.. But, ML hypothesis functions use it. + So, we have to take care of it separately. Line 36 takes care of it. + """ + hyp_val = 0 + for i in range(len(parameter_vector) - 1): + hyp_val += data_input_tuple[i]*parameter_vector[i+1] + hyp_val += parameter_vector[0] + return hyp_val + + +def output(example_no, data_set): + """ + :param data_set: test data or train data + :param example_no: example whose output is to be fetched + :return: output for that example + """ + if data_set == 'train': + return train_data[example_no][1] + elif data_set == 'test': + return test_data[example_no][1] + + +def calculate_hypothesis_value(example_no, data_set): + """ + Calculates hypothesis value for a given example + :param data_set: test data or train_data + :param example_no: example whose hypothesis value is to be calculated + :return: hypothesis value for that example + """ + if data_set == "train": + return _hypothesis_value(train_data[example_no][0]) + elif data_set == "test": + return _hypothesis_value(test_data[example_no][0]) + + +def summation_of_cost_derivative(index, end=m): + """ + Calculates the sum of cost function derivative + :param index: index wrt derivative is being calculated + :param end: value where summation ends, default is m, number of examples + :return: Returns the summation of cost derivative + Note: If index is -1, this means we are calculating summation wrt to biased parameter. + """ + summation_value = 0 + for i in range(end): + if index == -1: + summation_value += _error(i) + else: + summation_value += _error(i)*train_data[i][0][index] + return summation_value + + +def get_cost_derivative(index): + """ + :param index: index of the parameter vector wrt to derivative is to be calculated + :return: derivative wrt to that index + Note: If index is -1, this means we are calculating summation wrt to biased parameter. + """ + cost_derivative_value = summation_of_cost_derivative(index, m)/m + return cost_derivative_value + + +def run_gradient_descent(): + global parameter_vector + # Tune these values to set a tolerance value for predicted output + absolute_error_limit = 0.000002 + relative_error_limit = 0 + j = 0 + while True: + j += 1 + temp_parameter_vector = [0, 0, 0, 0] + for i in range(0, len(parameter_vector)): + cost_derivative = get_cost_derivative(i-1) + temp_parameter_vector[i] = parameter_vector[i] - \ + LEARNING_RATE*cost_derivative + if numpy.allclose(parameter_vector, temp_parameter_vector, + atol=absolute_error_limit, rtol=relative_error_limit): + break + parameter_vector = temp_parameter_vector + print("Number of iterations:", j) + + +def test_gradient_descent(): + for i in range(len(test_data)): + print("Actual output value:", output(i, 'test')) + print("Hypothesis output:", calculate_hypothesis_value(i, 'test')) + + +if __name__ == '__main__': + run_gradient_descent() + print("\nTesting gradient descent for a linear hypothesis function.\n") + test_gradient_descent() From 1727d79d97cae1bcf773d070f324208860bf9c52 Mon Sep 17 00:00:00 2001 From: yashLadha Date: Tue, 27 Jun 2017 17:56:27 +0530 Subject: [PATCH 04/22] Added Linear regression --- machine_learning/linear_regression.py | 108 ++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 machine_learning/linear_regression.py diff --git a/machine_learning/linear_regression.py b/machine_learning/linear_regression.py new file mode 100644 index 000000000..fd33c3580 --- /dev/null +++ b/machine_learning/linear_regression.py @@ -0,0 +1,108 @@ +""" +Linear regression is the most basic type of regression commonly used for +predictive analysis. The idea is preety simple, we have a dataset and we have +a feature's associated with it. The Features should be choose very cautiously +as they determine, how much our model will be able to make future predictions. +We try to set these Feature weights, over many iterations, so that they best +fits our dataset. In this particular code, i had used a CSGO dataset (ADR vs +Rating). We try to best fit a line through dataset and estimate the parameters. +""" + +import requests +import numpy as np + + +def collect_dataset(): + """ Collect dataset of CSGO + The dataset contains ADR vs Rating of a Player + :return : dataset obtained from the link, as matrix + """ + response = requests.get('https://raw.githubusercontent.com/yashLadha/' + + 'The_Math_of_Intelligence/master/Week1/ADRvs' + + 'Rating.csv') + lines = response.text.splitlines() + data = [] + for item in lines: + item = item.split(',') + data.append(item) + data.pop(0) # This is for removing the labels from the list + dataset = np.matrix(data) + return dataset + + +def run_steep_gradient_descent(data_x, data_y, + len_data, alpha, theta): + """ Run steep gradient descent and updates the Feature vector accordingly_ + :param data_x : contains the dataset + :param data_y : contains the output associated with each data-entry + :param len_data : length of the data_ + :param alpha : Learning rate of the model + :param theta : Feature vector (weight's for our model) + ;param return : Updated Feature's, using + curr_features - alpha_ * gradient(w.r.t. feature) + """ + n = len_data + + prod = np.dot(theta, data_x.transpose()) + prod -= data_y.transpose() + sum_grad = np.dot(prod, data_x) + theta = theta - (alpha / n) * sum_grad + return theta + + +def sum_of_square_error(data_x, data_y, len_data, theta): + """ Return sum of square error for error calculation + :param data_x : contains our dataset + :param data_y : contains the output (result vector) + :param len_data : len of the dataset + :param theta : contains the feature vector + :return : sum of square error computed from given feature's + """ + error = 0.0 + prod = np.dot(theta, data_x.transpose()) + prod -= data_y.transpose() + sum_elem = np.sum(np.square(prod)) + error = sum_elem / (2 * len_data) + return error + + +def run_linear_regression(data_x, data_y): + """ Implement Linear regression over the dataset + :param data_x : contains our dataset + :param data_y : contains the output (result vector) + :return : feature for line of best fit (Feature vector) + """ + iterations = 100000 + alpha = 0.0001550 + + no_features = data_x.shape[1] + len_data = data_x.shape[0] - 1 + + theta = np.zeros((1, no_features)) + + for i in range(0, iterations): + theta = run_steep_gradient_descent(data_x, data_y, + len_data, alpha, theta) + error = sum_of_square_error(data_x, data_y, len_data, theta) + print('At Iteration %d - Error is %.5f ' % (i + 1, error)) + + return theta + + +def main(): + """ Driver function """ + data = collect_dataset() + + len_data = data.shape[0] + data_x = np.c_[np.ones(len_data), data[:, :-1]].astype(float) + data_y = data[:, -1].astype(float) + + theta = run_linear_regression(data_x, data_y) + len_result = theta.shape[1] + print('Resultant Feature vector : ') + for i in range(0, len_result): + print('%.5f' % (theta[0, i])) + + +if __name__ == '__main__': + main() From a3972dd9b4b0675d52cd1dcdf9fc2c92d677f0eb Mon Sep 17 00:00:00 2001 From: Rafael Date: Fri, 30 Jun 2017 21:12:10 +0200 Subject: [PATCH 05/22] fix indent error --- data_structures/Binary Tree/binary_seach_tree.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data_structures/Binary Tree/binary_seach_tree.py b/data_structures/Binary Tree/binary_seach_tree.py index b6aac2990..1f14514b5 100644 --- a/data_structures/Binary Tree/binary_seach_tree.py +++ b/data_structures/Binary Tree/binary_seach_tree.py @@ -12,7 +12,7 @@ class Node: return self.label def setLabel(self, label): - self.label = label + self.label = label def getLeft(self): return self.left From 817c27462bec1a87577cdf91f9cd193253d00554 Mon Sep 17 00:00:00 2001 From: Rafael Date: Tue, 4 Jul 2017 18:46:46 +0200 Subject: [PATCH 06/22] fix some style errors --- data_structures/Binary Tree/binary_seach_tree.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/data_structures/Binary Tree/binary_seach_tree.py b/data_structures/Binary Tree/binary_seach_tree.py index 1f14514b5..1dac948ae 100644 --- a/data_structures/Binary Tree/binary_seach_tree.py +++ b/data_structures/Binary Tree/binary_seach_tree.py @@ -1,6 +1,8 @@ ''' A binary search Tree ''' + + class Node: def __init__(self, label): @@ -34,7 +36,7 @@ class BinarySearchTree: def insert(self, label): - #Create a new Node + # Create a new Node node = Node(label) @@ -45,7 +47,7 @@ class BinarySearchTree: curr_node = self.root while True: - if curr_node != None: + if curr_node is not None: dad_node = curr_node @@ -61,12 +63,12 @@ class BinarySearchTree: break def empty(self): - if self.root == None: + if self.root is None: return True return False def preShow(self, curr_node): - if curr_node != None: + if curr_node is None: print(curr_node.getLabel(), end=" ") self.preShow(curr_node.getLeft()) From 3ead193f0eb74e92cb6f719b7618b47e89b35fa6 Mon Sep 17 00:00:00 2001 From: Rafael Date: Tue, 4 Jul 2017 18:48:27 +0200 Subject: [PATCH 07/22] Done Node and Insert method --- data_structures/AVL/AVL.py | 106 +++++++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 data_structures/AVL/AVL.py diff --git a/data_structures/AVL/AVL.py b/data_structures/AVL/AVL.py new file mode 100644 index 000000000..1c59862a6 --- /dev/null +++ b/data_structures/AVL/AVL.py @@ -0,0 +1,106 @@ +''' +A AVL tree +''' + + +class Node: + + def __init__(self, label): + self.label = label + self.left = None + self.rigt = None + self.parent = None + self.height = 0 + + def getLabel(self): + return self.label + + def setLabel(self, label): + self.label = label + + def getLeft(self): + return self.left + + def setLeft(self, left): + self.left = left + + def getRight(self): + return self.rigt + + def setRight(self, right): + self.rigt = right + + def getParent(self): + return self.parent + + def setParent(self, parent): + self.parent = parent + + def setHeight(self, height): + self.height = height + + def getHeight(self, height): + return self.height + + +class AVL: + + def __init__(self): + self.root = None + self.size = 0 + + # def __init__(self, root): + # self.root = root + # self.size = 1 + + def insert(self, value): + node = Node(value) + if self.root is None: + self.root = node + self.size = 1 + else: + # Same as Binary Tree + dad_node = None + curr_node = self.root + + while True: + if curr_node is not None: + + dad_node = curr_node + + if node.getLabel() < curr_node.getLabel(): + curr_node = curr_node.getLeft() + else: + curr_node = curr_node.getRight() + else: + if node.getLabel() < dad_node.getLabel(): + dad_node.setLeft(node) + dad_node.setHeight(dad_node.getHeight() + 1) + + if (dad_node.getRight().getHeight() - + dad_node.getLeft.getHeight() > 1): + self.rebalance(dad_node) + + else: + dad_node.setRight(node) + dad_node.setHeight(dad_node.getHeight() + 1) + + if (dad_node.getRight().getHeight() - + dad_node.getLeft.getHeight() > 1): + self.rebalance(dad_node) + break + + def rebalance(self, node): + pass + + def rotate_left(self, node): + pass + + def rotate_right(self, node): + pass + + def double_rotate_left(self, node): + pass + + def double_rotate_righs(self, node): + pass From 23ac6bc1b539aede6b5b245756a8b500f8e2c936 Mon Sep 17 00:00:00 2001 From: ZivLi Date: Wed, 5 Jul 2017 16:40:18 +0800 Subject: [PATCH 08/22] Add bin-search implement by recursion --- searches/binary_search.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/searches/binary_search.py b/searches/binary_search.py index f9e27d3ff..13b54f498 100644 --- a/searches/binary_search.py +++ b/searches/binary_search.py @@ -80,6 +80,39 @@ def binary_search_std_lib(sorted_collection, item): return index return None +def binary_search_by_recursion(sorted_collection, item, left, right): + + """Pure implementation of binary search algorithm in Python by recursion + + Be careful collection must be sorted, otherwise result will be + unpredictable + First recursion should be started with left=0 and right=(len(sorted_collection)-1) + + :param sorted_collection: some sorted collection with comparable items + :param item: item value to search + :return: index of found item or None if item is not found + + Examples: + >>> binary_search_std_lib([0, 5, 7, 10, 15], 0) + 0 + + >>> binary_search_std_lib([0, 5, 7, 10, 15], 15) + 4 + + >>> binary_search_std_lib([0, 5, 7, 10, 15], 5) + 1 + + >>> binary_search_std_lib([0, 5, 7, 10, 15], 6) + + """ + midpoint = left + (right - left) // 2 + + if sorted_collection[midpoint] == item: + return midpoint + elif sorted_collection[midpoint] > item: + return binary_search_by_recursion(sorted_collection, item, left, right-1) + else: + return binary_search_by_recursion(sorted_collection, item, left+1, right) def __assert_sorted(collection): """Check if collection is sorted, if not - raises :py:class:`ValueError` From 0b7d3a5c970b59bebb8111dd65d0dd1c8f3ec01a Mon Sep 17 00:00:00 2001 From: Rafael Date: Wed, 5 Jul 2017 20:37:47 +0200 Subject: [PATCH 09/22] Rotate left and right done --- data_structures/AVL/AVL.py | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/data_structures/AVL/AVL.py b/data_structures/AVL/AVL.py index 1c59862a6..ab09743ee 100644 --- a/data_structures/AVL/AVL.py +++ b/data_structures/AVL/AVL.py @@ -49,10 +49,6 @@ class AVL: self.root = None self.size = 0 - # def __init__(self, root): - # self.root = root - # self.size = 1 - def insert(self, value): node = Node(value) if self.root is None: @@ -91,13 +87,40 @@ class AVL: break def rebalance(self, node): + if (node.getRight().getHeight() - + node.getLeft.getHeight() > 1): + if (node.getRight().getHeight() > + node.getLeft.getHeight()): + pass + else: + pass + pass + elif (node.getRight().getHeight() - + node.getLeft.getHeight() > 2): + if (node.getRight().getHeight() > + node.getLeft.getHeight()): + pass + else: + pass + pass pass def rotate_left(self, node): - pass + # TODO: is this pythonic enought? + aux = node.getLabel() + node = aux.getRight() + node.setHeight(node.getHeight() - 1) + node.setLeft(Node(aux)) + node.getLeft().setHeight(node.getHeight() + 1) + node.getRight().setHeight(node.getRight().getHeight() - 1) def rotate_right(self, node): - pass + aux = node.getLabel() + node = aux.getLeft() + node.setHeight(node.getHeight() - 1) + node.setRight(Node(aux)) + node.getLeft().setHeight(node.getHeight() + 1) + node.getLeft().setHeight(node.getLeft().getHeight() - 1) def double_rotate_left(self, node): pass From f65fe8c858636c0e7d29e7f9561d3109f984978b Mon Sep 17 00:00:00 2001 From: Rafael Date: Thu, 6 Jul 2017 21:13:56 +0200 Subject: [PATCH 10/22] more rotations --- data_structures/AVL/AVL.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/data_structures/AVL/AVL.py b/data_structures/AVL/AVL.py index ab09743ee..0e3008dc1 100644 --- a/data_structures/AVL/AVL.py +++ b/data_structures/AVL/AVL.py @@ -123,7 +123,9 @@ class AVL: node.getLeft().setHeight(node.getLeft().getHeight() - 1) def double_rotate_left(self, node): - pass + self.rotate_right(node.getRight().getRight()) + self.rotate_left(node) - def double_rotate_righs(self, node): - pass + def double_rotate_right(self, node): + self.rotate_left(node.getLeft().getLeft()) + self.rotate_right(node) From 6c3b27424693342fbdaa94d54fa35158463c74a7 Mon Sep 17 00:00:00 2001 From: Omkar Pathak Date: Sun, 16 Jul 2017 11:25:32 +0530 Subject: [PATCH 11/22] Python implementation of Stack --- data_structures/Stacks/Stack.py | 50 +++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 data_structures/Stacks/Stack.py diff --git a/data_structures/Stacks/Stack.py b/data_structures/Stacks/Stack.py new file mode 100644 index 000000000..41bbdc9d2 --- /dev/null +++ b/data_structures/Stacks/Stack.py @@ -0,0 +1,50 @@ +# Author: OMKAR PATHAK + +class Stack(object): + def __init__(self, limit = 10): + self.stack = [] + self.limit = limit + + # for printing the stack contents + def __str__(self): + return ' '.join([str(i) for i in self.stack]) + + # for pushing an element on to the stack + def push(self, data): + if len(self.stack) >= self.limit: + print('Stack Overflow') + else: + self.stack.append(data) + + # for popping the uppermost element + def pop(self): + if len(self.stack) <= 0: + return -1 + else: + return self.stack.pop() + + # for peeking the top-most element of the stack + def peek(self): + if len(self.stack) <= 0: + return -1 + else: + return self.stack[len(self.stack) - 1] + + # to check if stack is empty + def isEmpty(self): + return self.stack == [] + + # for checking the size of stack + def size(self): + return len(self.stack) + +if __name__ == '__main__': + myStack = Stack() + for i in range(10): + myStack.push(i) + print(myStack) + myStack.pop() # popping the top element + print(myStack) + myStack.peek() # printing the top element + myStack.isEmpty() + myStack.size() From ce3e91a420333a08b054b57675f699605d1b2452 Mon Sep 17 00:00:00 2001 From: Omkar Pathak Date: Sun, 16 Jul 2017 11:25:50 +0530 Subject: [PATCH 12/22] Infix to Postfix conversion using Stack --- .../Stacks/Infix_To_Postfix_Conversion.py | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 data_structures/Stacks/Infix_To_Postfix_Conversion.py diff --git a/data_structures/Stacks/Infix_To_Postfix_Conversion.py b/data_structures/Stacks/Infix_To_Postfix_Conversion.py new file mode 100644 index 000000000..e33926a3d --- /dev/null +++ b/data_structures/Stacks/Infix_To_Postfix_Conversion.py @@ -0,0 +1,48 @@ +# Author: OMKAR PATHAK + +import Stack + +def isOperand(char): + return (ord(char) >= ord('a') and ord(char) <= ord('z')) or (ord(char) >= ord('A') and ord(char) <= ord('Z')) + +def precedence(char): + if char == '+' or char == '-': + return 1 + elif char == '*' or char == '/': + return 2 + elif char == '^': + return 3 + else: + return -1 + +def infixToPostfix(myExp, myStack): + postFix = [] + for i in range(len(myExp)): + if (isOperand(myExp[i])): + postFix.append(myExp[i]) + elif(myExp[i] == '('): + myStack.push(myExp[i]) + elif(myExp[i] == ')'): + topOperator = myStack.pop() + while(not myStack.isEmpty() and topOperator != '('): + postFix.append(topOperator) + topOperator = myStack.pop() + else: + while (not myStack.isEmpty()) and (precedence(myExp[i]) <= precedence(myStack.peek())): + postFix.append(myStack.pop()) + myStack.push(myExp[i]) + + while(not myStack.isEmpty()): + postFix.append(myStack.pop()) + return ' '.join(postFix) + +if __name__ == '__main__': + myExp = 'a+b*(c^d-e)^(f+g*h)-i' + myExp = [i for i in myExp] + print('Infix:',' '.join(myExp)) + myStack = Stack.Stack(len(myExp)) + print('Postfix:',infixToPostfix(myExp, myStack)) + + # OUTPUT: + # Infix: a + b * ( c ^ d - e ) ^ ( f + g * h ) - i + # Postfix: a b c d ^ e - f g h * + ^ * + i - From 2af624ff017b0f1d9bf1d9cb26a8eb022f6fdd1e Mon Sep 17 00:00:00 2001 From: Omkar Pathak Date: Sun, 16 Jul 2017 11:26:11 +0530 Subject: [PATCH 13/22] Checking balanced parantheses using Stack --- .../Stacks/Balanced_Parentheses.py | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 data_structures/Stacks/Balanced_Parentheses.py diff --git a/data_structures/Stacks/Balanced_Parentheses.py b/data_structures/Stacks/Balanced_Parentheses.py new file mode 100644 index 000000000..6b7740380 --- /dev/null +++ b/data_structures/Stacks/Balanced_Parentheses.py @@ -0,0 +1,27 @@ +# Author: OMKAR PATHAK + +import Stack + +def parseParenthesis(string): + balanced = 1 + index = 0 + myStack = Stack.Stack(len(string)) + while (index < len(string)) and (balanced == 1): + check = string[index] + if check == '(': + myStack.push(check) + else: + if myStack.isEmpty(): + balanced = 0 + else: + myStack.pop() + index += 1 + + if balanced == 1 and myStack.isEmpty(): + return True + else: + return False + +if __name__ == '__main__': + print(parseParenthesis('((()))')) # True + print(parseParenthesis('((())')) # False From 37705510e5a9aaacd7b10187fa34e9c09ec61762 Mon Sep 17 00:00:00 2001 From: AnupKumarPanwar <1anuppanwar@gmail.com> Date: Tue, 18 Jul 2017 16:17:48 +0530 Subject: [PATCH 14/22] DFS & BFS in graph --- Graphs/Breadth_First_Search.py | 68 +++++++++++++++------------------- Graphs/Deep_First_Search.py | 51 +++++++++++++------------ 2 files changed, 54 insertions(+), 65 deletions(-) diff --git a/Graphs/Breadth_First_Search.py b/Graphs/Breadth_First_Search.py index f3d9fd778..5af471027 100644 --- a/Graphs/Breadth_First_Search.py +++ b/Graphs/Breadth_First_Search.py @@ -1,45 +1,35 @@ -class Graph: +class GRAPH: + """docstring for GRAPH""" + def __init__(self, nodes): + self.nodes=nodes + self.graph=[[0]*nodes for i in range (nodes)] + self.visited=[0]*nodes - def __init__(self, vertex): - self.vertex = vertex - self.graph = [[0] * vertex for i in range(vertex) ] - - def add_edge(self, u, v): - self.graph[u - 1][v - 1] = 1 - self.graph[v - 1][u - 1] = 1 def show(self): + print self.graph - for i in self.graph: - for j in i: - print(j, end=' ') - print(' ') - def bfs(self,v): + def add_edge(self, i, j): + self.graph[i][j]=1 + self.graph[j][i]=1 - visited = [False]*self.vertex - visited[v - 1] = True - print('%d visited' % (v)) + def bfs(self,s): + queue=[s] + while len(queue)!=0: + x=queue.pop(0) + print(x) + self.visited[x]=1 + for i in range(0,self.nodes): + if self.graph[x][i]==1 and self.visited[i]==0: + queue.append(i) + self.visited[i]=1 - queue = [v - 1] - while len(queue) > 0: - v = queue[0] - for u in range(self.vertex): - if self.graph[v][u] == 1: - if visited[u]== False: - visited[u] = True - queue.append(u) - print('%d visited' % (u +1)) - queue.pop(0) - -g = Graph(10) - -g.add_edge(1,2) -g.add_edge(1,3) -g.add_edge(1,4) -g.add_edge(2,5) -g.add_edge(3,6) -g.add_edge(3,7) -g.add_edge(4,8) -g.add_edge(5,9) -g.add_edge(6,10) -g.bfs(1) +n=int(input("Enter the number of Nodes : ")) +g=GRAPH(n) +e=int(input("Enter the no of edges : ")) +print("Enter the edges (u v)") +for i in range(0,e): + u,v=map(int, raw_input().split()) + g.add_edge(u,v) +s=int(input("Enter the source node :")) +g.bfs(s) diff --git a/Graphs/Deep_First_Search.py b/Graphs/Deep_First_Search.py index 51c0eb6b8..656ddfbaf 100644 --- a/Graphs/Deep_First_Search.py +++ b/Graphs/Deep_First_Search.py @@ -1,33 +1,32 @@ -class Graph: +class GRAPH: + """docstring for GRAPH""" + def __init__(self, nodes): + self.nodes=nodes + self.graph=[[0]*nodes for i in range (nodes)] + self.visited=[0]*nodes - def __init__(self, vertex): - self.vertex = vertex - self.graph = [[0] * vertex for i in range(vertex) ] - self.visited = [False] * vertex - def add_edge(self, u, v): - self.graph[u - 1][v - 1] = 1 - self.graph[v - 1][u - 1] = 1 def show(self): + print self.graph - for i in self.graph: - for j in i: - print(j, end=' ') - print(' ') + def add_edge(self, i, j): + self.graph[i][j]=1 + self.graph[j][i]=1 - - def dfs(self, u): - self.visited[u - 1] = True - print('%d visited' % u) - for i in range(1, self.vertex + 1): - if self.graph[u - 1][i - 1] == 1 and self.visited[i - 1] == False: + def dfs(self,s): + self.visited[s]=1 + print(s) + for i in range(0,self.nodes): + if self.visited[i]==0 and self.graph[s][i]==1: self.dfs(i) + - -g = Graph(5) -g.add_edge(1,4) -g.add_edge(4,2) -g.add_edge(4,5) -g.add_edge(2,5) -g.add_edge(5,3) -g.dfs(1) +n=int(input("Enter the number of Nodes : ")) +g=GRAPH(n) +e=int(input("Enter the no of edges : ")) +print("Enter the edges (u v)") +for i in range(0,e): + u,v=map(int, raw_input().split()) + g.add_edge(u,v) +s=int(input("Enter the source node :")) +g.dfs(s) From 0f2edefc2f2c2793d1bdc731feecfddbcc910e7e Mon Sep 17 00:00:00 2001 From: Omkar Pathak Date: Thu, 20 Jul 2017 06:51:04 +0530 Subject: [PATCH 15/22] Python Graph implementation --- data_structures/Graph/Graph.py | 40 ++++++++++++ .../Graph/P01_BreadthFirstSearch.py | 61 +++++++++++++++++++ data_structures/Graph/P02_DepthFirstSearch.py | 61 +++++++++++++++++++ 3 files changed, 162 insertions(+) create mode 100644 data_structures/Graph/Graph.py create mode 100644 data_structures/Graph/P01_BreadthFirstSearch.py create mode 100644 data_structures/Graph/P02_DepthFirstSearch.py diff --git a/data_structures/Graph/Graph.py b/data_structures/Graph/Graph.py new file mode 100644 index 000000000..0fa3f5593 --- /dev/null +++ b/data_structures/Graph/Graph.py @@ -0,0 +1,40 @@ +# Author: OMKAR PATHAK + +# We can use Python's dictionary for constructing the graph + +class AdjacencyList(object): + def __init__(self): + self.List = {} + + def addEdge(self, fromVertex, toVertex): + # check if vertex is already present + if fromVertex in self.List.keys(): + self.List[fromVertex].append(toVertex) + else: + self.List[fromVertex] = [toVertex] + + def printList(self): + for i in self.List: + print(i,'->',' -> '.join([str(j) for j in self.List[i]])) + +if __name__ == '__main__': + al = AdjacencyList() + al.addEdge(0, 1) + al.addEdge(0, 4) + al.addEdge(4, 1) + al.addEdge(4, 3) + al.addEdge(1, 0) + al.addEdge(1, 4) + al.addEdge(1, 3) + al.addEdge(1, 2) + al.addEdge(2, 3) + al.addEdge(3, 4) + + al.printList() + + # OUTPUT: + # 0 -> 1 -> 4 + # 1 -> 0 -> 4 -> 3 -> 2 + # 2 -> 3 + # 3 -> 4 + # 4 -> 1 -> 3 diff --git a/data_structures/Graph/P01_BreadthFirstSearch.py b/data_structures/Graph/P01_BreadthFirstSearch.py new file mode 100644 index 000000000..16b1b2007 --- /dev/null +++ b/data_structures/Graph/P01_BreadthFirstSearch.py @@ -0,0 +1,61 @@ +# Author: OMKAR PATHAK + +class Graph(): + def __init__(self): + self.vertex = {} + + # for printing the Graph vertexes + def printGraph(self): + for i in self.vertex.keys(): + print(i,' -> ', ' -> '.join([str(j) for j in self.vertex[i]])) + + # for adding the edge beween two vertexes + def addEdge(self, fromVertex, toVertex): + # check if vertex is already present, + if fromVertex in self.vertex.keys(): + self.vertex[fromVertex].append(toVertex) + else: + # else make a new vertex + self.vertex[fromVertex] = [toVertex] + + def BFS(self, startVertex): + # Take a list for stoting already visited vertexes + visited = [False] * len(self.vertex) + + # create a list to store all the vertexes for BFS + queue = [] + + # mark the source node as visited and enqueue it + visited[startVertex] = True + queue.append(startVertex) + + while queue: + startVertex = queue.pop(0) + print(startVertex, end = ' ') + + # mark all adjacent nodes as visited and print them + for i in self.vertex[startVertex]: + if visited[i] == False: + queue.append(i) + visited[i] = True + +if __name__ == '__main__': + g = Graph() + g.addEdge(0, 1) + g.addEdge(0, 2) + g.addEdge(1, 2) + g.addEdge(2, 0) + g.addEdge(2, 3) + g.addEdge(3, 3) + + g.printGraph() + print('BFS:') + g.BFS(2) + + # OUTPUT: + # 0  ->  1 -> 2 + # 1  ->  2 + # 2  ->  0 -> 3 + # 3  ->  3 + # BFS: + # 2 0 3 1 diff --git a/data_structures/Graph/P02_DepthFirstSearch.py b/data_structures/Graph/P02_DepthFirstSearch.py new file mode 100644 index 000000000..94ef3cb86 --- /dev/null +++ b/data_structures/Graph/P02_DepthFirstSearch.py @@ -0,0 +1,61 @@ +# Author: OMKAR PATHAK + +class Graph(): + def __init__(self): + self.vertex = {} + + # for printing the Graph vertexes + def printGraph(self): + print(self.vertex) + for i in self.vertex.keys(): + print(i,' -> ', ' -> '.join([str(j) for j in self.vertex[i]])) + + # for adding the edge beween two vertexes + def addEdge(self, fromVertex, toVertex): + # check if vertex is already present, + if fromVertex in self.vertex.keys(): + self.vertex[fromVertex].append(toVertex) + else: + # else make a new vertex + self.vertex[fromVertex] = [toVertex] + + def DFS(self): + # visited array for storing already visited nodes + visited = [False] * len(self.vertex) + + # call the recursive helper function + for i in range(len(self.vertex)): + if visited[i] == False: + self.DFSRec(i, visited) + + def DFSRec(self, startVertex, visited): + # mark start vertex as visited + visited[startVertex] = True + + print(startVertex, end = ' ') + + # Recur for all the vertexes that are adjacent to this node + for i in self.vertex.keys(): + if visited[i] == False: + self.DFSRec(i, visited) + +if __name__ == '__main__': + g = Graph() + g.addEdge(0, 1) + g.addEdge(0, 2) + g.addEdge(1, 2) + g.addEdge(2, 0) + g.addEdge(2, 3) + g.addEdge(3, 3) + + g.printGraph() + print('DFS:') + g.DFS() + + # OUTPUT: + # 0  ->  1 -> 2 + # 1  ->  2 + # 2  ->  0 -> 3 + # 3  ->  3 + # DFS: + # 0 1 2 3 From e8a36b12cecaa2df05d430fc1eee13fb9928ad56 Mon Sep 17 00:00:00 2001 From: Omkar Pathak Date: Thu, 20 Jul 2017 06:59:42 +0530 Subject: [PATCH 16/22] Added Bucket Sort implementation --- sorts/bucket_sort.py | 55 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 sorts/bucket_sort.py diff --git a/sorts/bucket_sort.py b/sorts/bucket_sort.py new file mode 100644 index 000000000..a8fe614cc --- /dev/null +++ b/sorts/bucket_sort.py @@ -0,0 +1,55 @@ +# Author: OMKAR PATHAK +# This program will illustrate how to implement bucket sort algorithm + +# Wikipedia says: Bucket sort, or bin sort, is a sorting algorithm that works by distributing the +# elements of an array into a number of buckets. Each bucket is then sorted individually, either using +# a different sorting algorithm, or by recursively applying the bucket sorting algorithm. It is a +# distribution sort, and is a cousin of radix sort in the most to least significant digit flavour. +# Bucket sort is a generalization of pigeonhole sort. Bucket sort can be implemented with comparisons +# and therefore can also be considered a comparison sort algorithm. The computational complexity estimates +# involve the number of buckets. + +# Time Complexity of Solution: +# Best Case O(n); Average Case O(n); Worst Case O(n) + +from P26_InsertionSort import insertionSort +import math + +DEFAULT_BUCKET_SIZE = 5 + +def bucketSort(myList, bucketSize=DEFAULT_BUCKET_SIZE): + if(len(myList) == 0): + print('You don\'t have any elements in array!') + + minValue = myList[0] + maxValue = myList[0] + + # For finding minimum and maximum values + for i in range(0, len(myList)): + if myList[i] < minValue: + minValue = myList[i] + elif myList[i] > maxValue: + maxValue = myList[i] + + # Initialize buckets + bucketCount = math.floor((maxValue - minValue) / bucketSize) + 1 + buckets = [] + for i in range(0, bucketCount): + buckets.append([]) + + # For putting values in buckets + for i in range(0, len(myList)): + buckets[math.floor((myList[i] - minValue) / bucketSize)].append(myList[i]) + + # Sort buckets and place back into input array + sortedArray = [] + for i in range(0, len(buckets)): + insertionSort(buckets[i]) + for j in range(0, len(buckets[i])): + sortedArray.append(buckets[i][j]) + + return sortedArray + +if __name__ == '__main__': + sortedArray = bucketSort([12, 23, 4, 5, 3, 2, 12, 81, 56, 95]) + print(sortedArray) From ef01688b946d5d4ed565a3401aff3c8a526099cd Mon Sep 17 00:00:00 2001 From: Omkar Pathak Date: Thu, 20 Jul 2017 07:02:49 +0530 Subject: [PATCH 17/22] Added Bucket Sort implementation --- sorts/bucket_sort.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sorts/bucket_sort.py b/sorts/bucket_sort.py index a8fe614cc..e378d65f4 100644 --- a/sorts/bucket_sort.py +++ b/sorts/bucket_sort.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python # Author: OMKAR PATHAK # This program will illustrate how to implement bucket sort algorithm From 9dcd981a5776f5b2d2ff2b5867a17a3d8fcf06d5 Mon Sep 17 00:00:00 2001 From: mounaim Date: Tue, 25 Jul 2017 16:08:54 +0100 Subject: [PATCH 18/22] Update Breadth_First_Search.py Fixed the fact that all nodes except source are marked as visited twice --- Graphs/Breadth_First_Search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Graphs/Breadth_First_Search.py b/Graphs/Breadth_First_Search.py index 5af471027..19b278093 100644 --- a/Graphs/Breadth_First_Search.py +++ b/Graphs/Breadth_First_Search.py @@ -15,10 +15,10 @@ class GRAPH: def bfs(self,s): queue=[s] + self.visited[s]=1 while len(queue)!=0: x=queue.pop(0) print(x) - self.visited[x]=1 for i in range(0,self.nodes): if self.graph[x][i]==1 and self.visited[i]==0: queue.append(i) From 0d01a4a0aaf577b38b44340352ca580a65a72d01 Mon Sep 17 00:00:00 2001 From: mandy8055 Date: Sun, 30 Jul 2017 00:42:32 +0530 Subject: [PATCH 19/22] Added one of the most important machine learning algorithm The k-means clustering is done by using tensorflow which is the vital and growing machine learning library of google. --- .../k_means_clustering_tensorflow.py | 141 ++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 dynamic_programming/k_means_clustering_tensorflow.py diff --git a/dynamic_programming/k_means_clustering_tensorflow.py b/dynamic_programming/k_means_clustering_tensorflow.py new file mode 100644 index 000000000..ad495c71a --- /dev/null +++ b/dynamic_programming/k_means_clustering_tensorflow.py @@ -0,0 +1,141 @@ +import tensorflow as tf +from random import choice, shuffle +from numpy import array + + +def TFKMeansCluster(vectors, noofclusters): + """ + K-Means Clustering using TensorFlow. + 'vectors' should be a n*k 2-D NumPy array, where n is the number + of vectors of dimensionality k. + 'noofclusters' should be an integer. + """ + + noofclusters = int(noofclusters) + assert noofclusters < len(vectors) + + #Find out the dimensionality + dim = len(vectors[0]) + + #Will help select random centroids from among the available vectors + vector_indices = list(range(len(vectors))) + shuffle(vector_indices) + + #GRAPH OF COMPUTATION + #We initialize a new graph and set it as the default during each run + #of this algorithm. This ensures that as this function is called + #multiple times, the default graph doesn't keep getting crowded with + #unused ops and Variables from previous function calls. + + graph = tf.Graph() + + with graph.as_default(): + + #SESSION OF COMPUTATION + + sess = tf.Session() + + ##CONSTRUCTING THE ELEMENTS OF COMPUTATION + + ##First lets ensure we have a Variable vector for each centroid, + ##initialized to one of the vectors from the available data points + centroids = [tf.Variable((vectors[vector_indices[i]])) + for i in range(noofclusters)] + ##These nodes will assign the centroid Variables the appropriate + ##values + centroid_value = tf.placeholder("float64", [dim]) + cent_assigns = [] + for centroid in centroids: + cent_assigns.append(tf.assign(centroid, centroid_value)) + + ##Variables for cluster assignments of individual vectors(initialized + ##to 0 at first) + assignments = [tf.Variable(0) for i in range(len(vectors))] + ##These nodes will assign an assignment Variable the appropriate + ##value + assignment_value = tf.placeholder("int32") + cluster_assigns = [] + for assignment in assignments: + cluster_assigns.append(tf.assign(assignment, + assignment_value)) + + ##Now lets construct the node that will compute the mean + #The placeholder for the input + mean_input = tf.placeholder("float", [None, dim]) + #The Node/op takes the input and computes a mean along the 0th + #dimension, i.e. the list of input vectors + mean_op = tf.reduce_mean(mean_input, 0) + + ##Node for computing Euclidean distances + #Placeholders for input + v1 = tf.placeholder("float", [dim]) + v2 = tf.placeholder("float", [dim]) + euclid_dist = tf.sqrt(tf.reduce_sum(tf.pow(tf.sub( + v1, v2), 2))) + + ##This node will figure out which cluster to assign a vector to, + ##based on Euclidean distances of the vector from the centroids. + #Placeholder for input + centroid_distances = tf.placeholder("float", [noofclusters]) + cluster_assignment = tf.argmin(centroid_distances, 0) + + ##INITIALIZING STATE VARIABLES + + ##This will help initialization of all Variables defined with respect + ##to the graph. The Variable-initializer should be defined after + ##all the Variables have been constructed, so that each of them + ##will be included in the initialization. + init_op = tf.initialize_all_variables() + + #Initialize all variables + sess.run(init_op) + + ##CLUSTERING ITERATIONS + + #Now perform the Expectation-Maximization steps of K-Means clustering + #iterations. To keep things simple, we will only do a set number of + #iterations, instead of using a Stopping Criterion. + noofiterations = 100 + for iteration_n in range(noofiterations): + + ##EXPECTATION STEP + ##Based on the centroid locations till last iteration, compute + ##the _expected_ centroid assignments. + #Iterate over each vector + for vector_n in range(len(vectors)): + vect = vectors[vector_n] + #Compute Euclidean distance between this vector and each + #centroid. Remember that this list cannot be named + #'centroid_distances', since that is the input to the + #cluster assignment node. + distances = [sess.run(euclid_dist, feed_dict={ + v1: vect, v2: sess.run(centroid)}) + for centroid in centroids] + #Now use the cluster assignment node, with the distances + #as the input + assignment = sess.run(cluster_assignment, feed_dict = { + centroid_distances: distances}) + #Now assign the value to the appropriate state variable + sess.run(cluster_assigns[vector_n], feed_dict={ + assignment_value: assignment}) + + ##MAXIMIZATION STEP + #Based on the expected state computed from the Expectation Step, + #compute the locations of the centroids so as to maximize the + #overall objective of minimizing within-cluster Sum-of-Squares + for cluster_n in range(noofclusters): + #Collect all the vectors assigned to this cluster + assigned_vects = [vectors[i] for i in range(len(vectors)) + if sess.run(assignments[i]) == cluster_n] + #Compute new centroid location + new_location = sess.run(mean_op, feed_dict={ + mean_input: array(assigned_vects)}) + #Assign value to appropriate variable + sess.run(cent_assigns[cluster_n], feed_dict={ + centroid_value: new_location}) + + #Return centroids and assignments + centroids = sess.run(centroids) + assignments = sess.run(assignments) + return centroids, assignments + From 5e3f111c8315e3388d42e8503e6452b99d9f2aff Mon Sep 17 00:00:00 2001 From: Anshuman Date: Tue, 8 Aug 2017 13:58:09 +0530 Subject: [PATCH 20/22] Corrected Markdown --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1d8d3386e..ab0738bee 100644 --- a/README.md +++ b/README.md @@ -83,7 +83,7 @@ __Properties__ ###### View the algorithm in [action][shell-toptal] -###Time-Compexity Graphs +### Time-Compexity Graphs Comparing the complexity of sorting algorithms (Bubble Sort, Insertion Sort, Selection Sort) From cc6814bd1cdb6fe136400caaf838ba6af3210d0e Mon Sep 17 00:00:00 2001 From: Chris Jim Date: Thu, 10 Aug 2017 22:20:28 +0800 Subject: [PATCH 21/22] Correct markdown (ShellSort) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1d8d3386e..4628e2de7 100644 --- a/README.md +++ b/README.md @@ -74,7 +74,7 @@ __Properties__ ### Shell ![alt text][shell-image] -From [Wikipedia][shell-wiki]: Shellsort is a generalization of insertion sort that allows the exchange of items that are far apart. The idea is to arrange the list of elements so that, starting anywherem considereing every nth element gives a sorted list. Such a list is said to be h-sorted. Equivanelty, it can be thought of as h intterleaved lists, each individually sorted. +From [Wikipedia][shell-wiki]: Shellsort is a generalization of insertion sort that allows the exchange of items that are far apart. The idea is to arrange the list of elements so that, starting anywhere, considering every nth element gives a sorted list. Such a list is said to be h-sorted. Equivalently, it can be thought of as h interleaved lists, each individually sorted. __Properties__ * Worst case performance O(nlog2 2n) From 75ccf5b77edcdc4c695b2d818324b68a34a804c2 Mon Sep 17 00:00:00 2001 From: Anup Kumar Panwar <1anuppanwar@gmail.com> Date: Tue, 22 Aug 2017 18:08:11 +0530 Subject: [PATCH 22/22] Empty File --- other/anagrams.txt | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 other/anagrams.txt diff --git a/other/anagrams.txt b/other/anagrams.txt deleted file mode 100644 index e69de29bb..000000000