diff --git a/DIRECTORY.md b/DIRECTORY.md index c781b17bf..34967082b 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -309,6 +309,7 @@ * [Floyd Warshall](dynamic_programming/floyd_warshall.py) * [Integer Partition](dynamic_programming/integer_partition.py) * [Iterating Through Submasks](dynamic_programming/iterating_through_submasks.py) + * [K Means Clustering Tensorflow](dynamic_programming/k_means_clustering_tensorflow.py) * [Knapsack](dynamic_programming/knapsack.py) * [Longest Common Subsequence](dynamic_programming/longest_common_subsequence.py) * [Longest Common Substring](dynamic_programming/longest_common_substring.py) @@ -685,6 +686,7 @@ * [2 Hidden Layers Neural Network](neural_network/2_hidden_layers_neural_network.py) * [Back Propagation Neural Network](neural_network/back_propagation_neural_network.py) * [Convolution Neural Network](neural_network/convolution_neural_network.py) + * [Input Data](neural_network/input_data.py) * [Perceptron](neural_network/perceptron.py) * [Simple Neural Network](neural_network/simple_neural_network.py) diff --git a/dynamic_programming/k_means_clustering_tensorflow.py_tf b/dynamic_programming/k_means_clustering_tensorflow.py similarity index 98% rename from dynamic_programming/k_means_clustering_tensorflow.py_tf rename to dynamic_programming/k_means_clustering_tensorflow.py index 4fbcedeaa..8d3f6f0df 100644 --- a/dynamic_programming/k_means_clustering_tensorflow.py_tf +++ b/dynamic_programming/k_means_clustering_tensorflow.py @@ -1,9 +1,10 @@ -import tensorflow as tf from random import shuffle + +import tensorflow as tf from numpy import array -def TFKMeansCluster(vectors, noofclusters): +def tf_k_means_cluster(vectors, noofclusters): """ K-Means Clustering using TensorFlow. 'vectors' should be a n*k 2-D NumPy array, where n is the number @@ -30,7 +31,6 @@ def TFKMeansCluster(vectors, noofclusters): graph = tf.Graph() with graph.as_default(): - # SESSION OF COMPUTATION sess = tf.Session() @@ -95,8 +95,7 @@ def TFKMeansCluster(vectors, noofclusters): # iterations. To keep things simple, we will only do a set number of # iterations, instead of using a Stopping Criterion. noofiterations = 100 - for iteration_n in range(noofiterations): - + for _ in range(noofiterations): ##EXPECTATION STEP ##Based on the centroid locations till last iteration, compute ##the _expected_ centroid assignments. diff --git a/neural_network/input_data.py_tf b/neural_network/input_data.py similarity index 83% rename from neural_network/input_data.py_tf rename to neural_network/input_data.py index 0e22ac0bc..2a32f0b82 100644 --- a/neural_network/input_data.py_tf +++ b/neural_network/input_data.py @@ -21,13 +21,10 @@ This module and all its submodules are deprecated. import collections import gzip import os +import urllib import numpy -from six.moves import urllib -from six.moves import xrange # pylint: disable=redefined-builtin - -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import random_seed +from tensorflow.python.framework import dtypes, random_seed from tensorflow.python.platform import gfile from tensorflow.python.util.deprecation import deprecated @@ -46,16 +43,16 @@ def _read32(bytestream): def _extract_images(f): """Extract the images into a 4D uint8 numpy array [index, y, x, depth]. - Args: - f: A file object that can be passed into a gzip reader. + Args: + f: A file object that can be passed into a gzip reader. - Returns: - data: A 4D uint8 numpy array [index, y, x, depth]. + Returns: + data: A 4D uint8 numpy array [index, y, x, depth]. - Raises: - ValueError: If the bytestream does not start with 2051. + Raises: + ValueError: If the bytestream does not start with 2051. - """ + """ print("Extracting", f.name) with gzip.GzipFile(fileobj=f) as bytestream: magic = _read32(bytestream) @@ -86,17 +83,17 @@ def _dense_to_one_hot(labels_dense, num_classes): def _extract_labels(f, one_hot=False, num_classes=10): """Extract the labels into a 1D uint8 numpy array [index]. - Args: - f: A file object that can be passed into a gzip reader. - one_hot: Does one hot encoding for the result. - num_classes: Number of classes for the one hot encoding. + Args: + f: A file object that can be passed into a gzip reader. + one_hot: Does one hot encoding for the result. + num_classes: Number of classes for the one hot encoding. - Returns: - labels: a 1D uint8 numpy array. + Returns: + labels: a 1D uint8 numpy array. - Raises: - ValueError: If the bystream doesn't start with 2049. - """ + Raises: + ValueError: If the bystream doesn't start with 2049. + """ print("Extracting", f.name) with gzip.GzipFile(fileobj=f) as bytestream: magic = _read32(bytestream) @@ -115,8 +112,8 @@ def _extract_labels(f, one_hot=False, num_classes=10): class _DataSet: """Container class for a _DataSet (deprecated). - THIS CLASS IS DEPRECATED. - """ + THIS CLASS IS DEPRECATED. + """ @deprecated( None, @@ -135,21 +132,21 @@ class _DataSet: ): """Construct a _DataSet. - one_hot arg is used only if fake_data is true. `dtype` can be either - `uint8` to leave the input as `[0, 255]`, or `float32` to rescale into - `[0, 1]`. Seed arg provides for convenient deterministic testing. + one_hot arg is used only if fake_data is true. `dtype` can be either + `uint8` to leave the input as `[0, 255]`, or `float32` to rescale into + `[0, 1]`. Seed arg provides for convenient deterministic testing. - Args: - images: The images - labels: The labels - fake_data: Ignore inages and labels, use fake data. - one_hot: Bool, return the labels as one hot vectors (if True) or ints (if - False). - dtype: Output image dtype. One of [uint8, float32]. `uint8` output has - range [0,255]. float32 output has range [0,1]. - reshape: Bool. If True returned images are returned flattened to vectors. - seed: The random seed to use. - """ + Args: + images: The images + labels: The labels + fake_data: Ignore inages and labels, use fake data. + one_hot: Bool, return the labels as one hot vectors (if True) or ints (if + False). + dtype: Output image dtype. One of [uint8, float32]. `uint8` output has + range [0,255]. float32 output has range [0,1]. + reshape: Bool. If True returned images are returned flattened to vectors. + seed: The random seed to use. + """ seed1, seed2 = random_seed.get_seed(seed) # If op level seed is not set, use whatever graph level seed is returned numpy.random.seed(seed1 if seed is None else seed2) @@ -206,8 +203,8 @@ class _DataSet: else: fake_label = 0 return ( - [fake_image for _ in xrange(batch_size)], - [fake_label for _ in xrange(batch_size)], + [fake_image for _ in range(batch_size)], + [fake_label for _ in range(batch_size)], ) start = self._index_in_epoch # Shuffle for the first epoch @@ -250,19 +247,19 @@ class _DataSet: def _maybe_download(filename, work_directory, source_url): """Download the data from source url, unless it's already here. - Args: - filename: string, name of the file in the directory. - work_directory: string, path to working directory. - source_url: url to download from if file doesn't exist. + Args: + filename: string, name of the file in the directory. + work_directory: string, path to working directory. + source_url: url to download from if file doesn't exist. - Returns: - Path to resulting file. - """ + Returns: + Path to resulting file. + """ if not gfile.Exists(work_directory): gfile.MakeDirs(work_directory) filepath = os.path.join(work_directory, filename) if not gfile.Exists(filepath): - urllib.request.urlretrieve(source_url, filepath) + urllib.request.urlretrieve(source_url, filepath) # noqa: S310 with gfile.GFile(filepath) as f: size = f.size() print("Successfully downloaded", filename, size, "bytes.") @@ -328,7 +325,8 @@ def read_data_sets( if not 0 <= validation_size <= len(train_images): raise ValueError( - f"Validation size should be between 0 and {len(train_images)}. Received: {validation_size}." + f"Validation size should be between 0 and {len(train_images)}. " + f"Received: {validation_size}." ) validation_images = train_images[:validation_size] @@ -336,7 +334,7 @@ def read_data_sets( train_images = train_images[validation_size:] train_labels = train_labels[validation_size:] - options = dict(dtype=dtype, reshape=reshape, seed=seed) + options = {"dtype": dtype, "reshape": reshape, "seed": seed} train = _DataSet(train_images, train_labels, **options) validation = _DataSet(validation_images, validation_labels, **options) diff --git a/requirements.txt b/requirements.txt index a1d607df0..acfbc823e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,7 +15,7 @@ scikit-fuzzy scikit-learn statsmodels sympy -tensorflow; python_version < "3.11" +tensorflow texttable tweepy xgboost