diff --git a/howtos_as_py_files/README.md b/howtos_as_py_files/README.md new file mode 100644 index 0000000..840e737 --- /dev/null +++ b/howtos_as_py_files/README.md @@ -0,0 +1,10 @@ +Sebastian Raschka +last updated: 09/26/2014 + +# A collection of useful Python patterns + + +new_msg("Hello, World") +# prints: "My message: Hello, World" + +# print(dir(create_message.__closure__)) \ No newline at end of file diff --git a/howtos_as_py_files/closures.py b/howtos_as_py_files/closures.py deleted file mode 100755 index dc8dfea..0000000 --- a/howtos_as_py_files/closures.py +++ /dev/null @@ -1,17 +0,0 @@ -# Python 3.x -# sr 11/04/2013 -# closures -# - -def create_message(msg_txt): - def _priv_msg(message): # private, no access from outside - print("{}: {}".format(msg_txt, message)) - return _priv_msg # returns a function - -new_msg = create_message("My message") -# note, new_msg is a function - -new_msg("Hello, World") -# prints: "My message: Hello, World" - -# print(dir(create_message.__closure__)) diff --git a/howtos_as_py_files/cmd_line_args_1_sysarg.py b/howtos_as_py_files/cmd_line_args_1_sysarg.py deleted file mode 100644 index b8f8cbf..0000000 --- a/howtos_as_py_files/cmd_line_args_1_sysarg.py +++ /dev/null @@ -1,24 +0,0 @@ -# Getting command line arguments via sys.arg -# sr 11/30/2013 - -import sys - -def error(msg): - """Prints error message, sends it to stderr, and quites the program.""" - sys.exit(msg) - - -args = sys.argv[1:] # sys.argv[0] is the name of the python script itself - -try: - arg1 = int(args[0]) - arg2 = args[1] - arg3 = args[2] - print("Everything okay!") - -except ValueError: - error("First argument must be integer type!") - -except IndexError: - error("Requires 3 arguments!") - diff --git a/howtos_as_py_files/cpu_time.py b/howtos_as_py_files/cpu_time.py deleted file mode 100755 index 472cae7..0000000 --- a/howtos_as_py_files/cpu_time.py +++ /dev/null @@ -1,18 +0,0 @@ -# sr 10/29/13 -# Calculates elapsed CPU time in seconds as float. - -import time - -start_time = time.clock() - -i = 0 -while i < 10000000: - i += 1 - -elapsed_time = time.clock() - start_time -print "Time elapsed: {} seconds".format(elapsed_time) - -# prints "Time elapsed: 1.06 seconds" -# on 4 x 2.80 Ghz Intel Xeon, 6 Gb RAM - - diff --git a/howtos_as_py_files/date_time.py b/howtos_as_py_files/date_time.py deleted file mode 100644 index 28e7bcc..0000000 --- a/howtos_as_py_files/date_time.py +++ /dev/null @@ -1,13 +0,0 @@ -# Sebastian Raschka, 03/2014 -# Date and Time in Python - -import time - -# print time HOURS:MINUTES:SECONDS -# e.g., '10:50:58' -print(time.strftime("%H:%M:%S")) - - -# print current date DAY:MONTH:YEAR -# e.g., '06/03/2014' -print(time.strftime("%d/%m/%Y")) diff --git a/howtos_as_py_files/diff_files.py b/howtos_as_py_files/diff_files.py deleted file mode 100644 index 9399695..0000000 --- a/howtos_as_py_files/diff_files.py +++ /dev/null @@ -1,21 +0,0 @@ -# Sebastian Raschka, 2014 -# -# Print lines that are different between 2 files. Insensitive -# to the order of the file contents. - -id_set1 = set() -id_set2 = set() - -with open('id_file1.txt', 'r') as id_file: - for line in id_file: - id_set1.add(line.strip()) - -with open('id_file2.txt', 'r') as id_file: - for line in id_file: - id_set2.add(line.strip()) - -diffs = id_set2.difference(id_set1) - -for d in diffs: - print(d) -print("Total differences:",len(diffs)) diff --git a/howtos_as_py_files/doctest_example.py b/howtos_as_py_files/doctest_example.py deleted file mode 100644 index 246ecf8..0000000 --- a/howtos_as_py_files/doctest_example.py +++ /dev/null @@ -1,47 +0,0 @@ -# doctest example -# Sebastian Raschka 11/19/2013 - -def subtract(a, b): - """ - Subtracts second from first number and returns result. - >>> subtract(10, 5) - 5 - >>> subtract(11, 0.7) - 10.3 - """ - return a-b - -def hello_world(): - """ - Returns 'Hello, World' - >>> hello_world() - "Hello, World" - >>> hello_world() - 'Hello, World' - """ - return "Hello, World" - - -if __name__ == "__main__": # is 'false' if imported - import doctest - doctest.testmod() - - -""" RESULTS - -sebastian ~/Desktop> python3 doctest_example.py -********************************************************************** -File "doctest_example.py", line 17, in __main__.hello_world -Failed example: - hello_world() -Expected: - "Hello, World" -Got: - 'Hello, World' -********************************************************************** -1 items had failures: - 1 of 2 in __main__.hello_world -***Test Failed*** 1 failures. -sebastian ~/Desktop> - -""" diff --git a/howtos_as_py_files/file_browsing.py b/howtos_as_py_files/file_browsing.py deleted file mode 100644 index ce1cb5f..0000000 --- a/howtos_as_py_files/file_browsing.py +++ /dev/null @@ -1,80 +0,0 @@ -# File system operations using Python -# sr 11/30/2013 - - -import os -import shutil -import glob - -# working directory -c_dir = os.getcwd() # show current working directory -os.listdir(c_dir) # shows all files in the working directory -os.chdir('~/Data') # change working directory - - -# get all files in a directory -glob.glob('/Users/sebastian/Desktop/*') - -# e.g., ['/Users/sebastian/Desktop/untitled folder', '/Users/sebastian/Desktop/Untitled.txt'] - - - -# walk -tree = os.walk(c_dir) -# moves through sub directories and creates a 'generator' object of tuples -# ('dir', [file1, file2, ...] [subdirectory1, subdirectory2, ...]), -# (...), ... - - - -#check files: returns either True or False -os.exists('../rel_path') -os.exists('/home/abs_path') -os.isfile('./file.txt') -os.isdir('./subdir') - - - -# file permission (True or False -os.access('./some_file', os.F_OK) # File exists? Python 2.7 -os.access('./some_file', os.R_OK) # Ok to read? Python 2.7 -os.access('./some_file', os.W_OK) # Ok to write? Python 2.7 -os.access('./some_file', os.X_OK) # Ok to execute? Python 2.7 -os.access('./some_file', os.X_OK | os.W_OK) # Ok to execute or write? Python 2.7 - - - -# join (creates operating system dependent paths) -os.path.join('a', 'b', 'c') -# 'a/b/c' on Unix/Linux -# 'a\\b\\c' on Windows -os.path.normpath('a/b/c') # converts file separators - - - -# os.path: direcory and file names -os.path.samefile('./some_file', '/home/some_file') # True if those are the same -os.path.dirname('./some_file') # returns '.' (everythin but last component) -os.path.basename('./some_file') # returns 'some_file' (only last component -os.path.split('./some_file') # returns (dirname, basename) or ('.', 'some_file) -os.path.splitext('./some_file.txt') # returns ('./some_file', '.txt') -os.path.splitdrive('./some_file.txt') # returns ('', './some_file.txt') -os.path.isabs('./some_file.txt') # returns False (not an absolute path) -os.path.abspath('./some_file.txt') - - - - -# create and delete files and directories -os.mkdir('./test') # create a new direcotory -os.rmdir('./test') # removes an empty direcotory -os.removedirs('./test') # removes nested empty directories -os.remove('file.txt') # removes an individual file -shutil.rmtree('./test') # removes directory (empty or not empty) - -os.rename('./dir_before', './renamed') # renames directory if destination doesn't exist -shutil.move('./dir_before', './renamed') # renames directory always - -shutil.copytree('./orig', './copy') # copies a directory recursively -shutil.copyfile('file', 'copy') # copies a file - diff --git a/howtos_as_py_files/get_filename.py b/howtos_as_py_files/get_filename.py deleted file mode 100755 index a05f92a..0000000 --- a/howtos_as_py_files/get_filename.py +++ /dev/null @@ -1,63 +0,0 @@ -# Python 2.7 -# prompt user for file of specific type(s). -# 11/01/13 sebastian raschka - -import os.path - -def get_filename(file_type): - '''repeatedly prompts user for a file of specific type. - arguments: - file_type: list with accepted file types as strings. - returns: - (string): absolute path to the specified input file. - ''' - while True: - print "\n\nplease enter a file name, \nor type --help to get"\ - " a list of the accepted file formats" - file_name = raw_input(": ") - if file_name == "--help": - print "\naccepted file format(s): ", - for f in file_type: - print f, - continue - if not os.path.isfile(file_name): - print "\n\nsorry, this file doesn't exist. please try again.\n" - continue - if not (file_name.split(".")[-1] in file_type): - print "\nplease provide a file in correct format." - continue - break - return os.path.abspath(file_name) - -#get_filename(["txt", "doc"]) - - -# =========================== -# EXAMPLE -# =========================== - -''' -[bash]~/Desktop >python get_filename.py - - -please enter a file name, -or type --help to get a list of the accepted file formats -: --help - -accepted file format(s): txt doc - -please enter a file name, -or type --help to get a list of the accepted file formats -: test.tx - - -sorry, this file doesn't exist. please try again. - - - -please enter a file name, -or type --help to get a list of the accepted file formats -: test.txt -[bash]~/Desktop > -''' - diff --git a/howtos_as_py_files/get_minmax_indeces.py b/howtos_as_py_files/get_minmax_indeces.py deleted file mode 100644 index 1fe5b2a..0000000 --- a/howtos_as_py_files/get_minmax_indeces.py +++ /dev/null @@ -1,12 +0,0 @@ -# Sebastian Raschka, 03/2014 -# Getting the positions of min and max values in a list - -import operator - -values = [1, 2, 3, 4, 5] - -min_index, min_value = min(enumerate(values), key=operator.itemgetter(1)) -max_index, max_value = max(enumerate(values), key=operator.itemgetter(1)) - -print('min_index:', min_index, 'min_value:', min_value) -print('max_index:', max_index, 'max_value:', max_value) diff --git a/howtos_as_py_files/id_file1.txt b/howtos_as_py_files/id_file1.txt new file mode 100644 index 0000000..a600893 --- /dev/null +++ b/howtos_as_py_files/id_file1.txt @@ -0,0 +1,3 @@ +1234 +2342 +2341 \ No newline at end of file diff --git a/howtos_as_py_files/id_file2.txt b/howtos_as_py_files/id_file2.txt new file mode 100644 index 0000000..d05914a --- /dev/null +++ b/howtos_as_py_files/id_file2.txt @@ -0,0 +1,3 @@ +5234 +3344 +2341 \ No newline at end of file diff --git a/howtos_as_py_files/lambda_function.py b/howtos_as_py_files/lambda_function.py deleted file mode 100644 index 9da0c9c..0000000 --- a/howtos_as_py_files/lambda_function.py +++ /dev/null @@ -1,11 +0,0 @@ -# Sebastian Raschka 08/2014 - -# Lambda functions are just a short-hand way or writing -# short function definitions - -def square_root1(x): - return x**0.5 - -square_root2 = lambda x: x**0.5 - -assert(square_root1(9) == square_root2(9)) \ No newline at end of file diff --git a/howtos_as_py_files/make_bitstring.py b/howtos_as_py_files/make_bitstring.py deleted file mode 100644 index fb27089..0000000 --- a/howtos_as_py_files/make_bitstring.py +++ /dev/null @@ -1,17 +0,0 @@ -# Generating a bitstring from a Python list or numpy array -# where all postive values -> 1 -# all negative values -> 0 - -def make_bitstring(ary) - return np.where(ary > 0, 1, 0) - - -def faster_bitstring(ary) - return np.where(ary > 0).astype('i1') - -### Example: - -ary1 = np.array([1, 2, 0.3, -1, -2]) -make_bitstring(ary1) - -# returns array([1, 1, 1, 0, 0]) diff --git a/howtos_as_py_files/my_file.pkl b/howtos_as_py_files/my_file.pkl new file mode 100644 index 0000000..f24f898 Binary files /dev/null and b/howtos_as_py_files/my_file.pkl differ diff --git a/howtos_as_py_files/namedtuple_example.py b/howtos_as_py_files/namedtuple_example.py deleted file mode 100644 index 0546d85..0000000 --- a/howtos_as_py_files/namedtuple_example.py +++ /dev/null @@ -1,5 +0,0 @@ -from collections import namedtuple - -my_namedtuple = namedtuple('field_name', ['x', 'y', 'z', 'bla', 'blub']) -p = my_namedtuple(1, 2, 3, 4, 5) -print(p.x, p.y, p.z) diff --git a/howtos_as_py_files/normalize_data.py b/howtos_as_py_files/normalize_data.py deleted file mode 100644 index 117d2fb..0000000 --- a/howtos_as_py_files/normalize_data.py +++ /dev/null @@ -1,15 +0,0 @@ -# Sebastian Raschka, 03/2014 - -def normalize_val(x, data_list): - """ - Normalizes a value to a data list returning a float - between 0.0 and 1.0. - Returns the original object if value is not a integer or float. - - """ - if isinstance(x, float) or isinstance(x, int): - numerator = x - min(data_list) - denominator = max(data_list) - min(data_list) - return numerator/denominator - else: - return x diff --git a/howtos_as_py_files/numpy_matrix.py b/howtos_as_py_files/numpy_matrix.py deleted file mode 100644 index 06d5eb2..0000000 --- a/howtos_as_py_files/numpy_matrix.py +++ /dev/null @@ -1,36 +0,0 @@ -# numpy matrix operations -# sr 12/01/2013 - -import numpy - -ary1 = numpy.array([1,2,3,4,5]) # must be same type -ary2 = numpy.zeros((3,4)) # 3x4 matrix consisiting of 0s -ary3 = numpy.ones((3,4)) # 3x4 matrix consisiting of 1s -ary4 = numpy.identity(3) # 3x3 identity matrix -ary5 = ary1.copy() # make a copy of ary1 - -item1 = ary3[0, 0] # item in row1, column1 - -ary2.shape # tuple of dimensions. Here: (3,4) -ary2.size # number of elements. Here: 12 - - -ary2_t = ary2.transpose() # transposes matrix - -ary2.ravel() # makes an array linear (1-dimensional) - # by concatenating rows -ary2.reshape(2,6) # reshapes array (must have same dimensions) - -ary3[0:2, 0:3] # submatrix of first 2 rows and first 3 columns - -ary3 = ary3[[2,0,1]] # re-arrange rows - - -# element-wise operations - -ary1 + ary1 -ary1 * ary1 -numpy.dot(ary1, ary1) # matrix/vector (dot) product - -numpy.sum(ary1) # sums up all elements in the array -numpy.mean(ary1) # average of all elements in the array diff --git a/howtos_as_py_files/os_shutil_fileops.py b/howtos_as_py_files/os_shutil_fileops.py deleted file mode 100644 index d517f22..0000000 --- a/howtos_as_py_files/os_shutil_fileops.py +++ /dev/null @@ -1,22 +0,0 @@ -# sr 11/19/2013 -# common file operations in os and shutil modules - -import shutil -import os - -# Getting files of particular type from directory -files = [f for f in os.listdir(s_pdb_dir) if f.endswith(".txt")] - -# Copy and move -shutil.copyfile("/path/to/file", "/path/to/new/file") -shutil.copy("/path/to/file", "/path/to/directory") -shutil.move("/path/to/file","/path/to/directory") - -# Check if file or directory exists -os.path.exists("file or directory") -os.path.isfile("file") -os.path.isdir("directory") - -# Working directory and absolute path to files -os.getcwd() -os.path.abspath("file") diff --git a/howtos_as_py_files/patterns.ipynb b/howtos_as_py_files/patterns.ipynb new file mode 100644 index 0000000..42fb3aa --- /dev/null +++ b/howtos_as_py_files/patterns.ipynb @@ -0,0 +1,1486 @@ +{ + "metadata": { + "name": "", + "signature": "sha256:0d6c2b08bbaff6330460e1379004dabcfdd16f3712b1fb820c0315a3f70294f5" + }, + "nbformat": 3, + "nbformat_minor": 0, + "worksheets": [ + { + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[Go back](#https://github.com/rasbt/python_reference) to the `python_reference` repository." + ] + }, + { + "cell_type": "heading", + "level": 1, + "metadata": {}, + "source": [ + "A random collection of useful Python patterns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "I just cleaned my hard drive and found a couple of useful Python patterns that I had some use for in the past. I thought it would be worthwhile to collect them in a IPython notebook for personal reference and share it with people who might find them useful too. \n", + "Most of those snippets are hopefully self-explanatory, but I am planning to add more comments and descriptions in future." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "
" + ] + }, + { + "cell_type": "heading", + "level": 2, + "metadata": {}, + "source": [ + "Table of Contents" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- [Bitstrings from positive and negative elements in a list](#Bitstrings-from-positive-and-negative-elements-in-a-list)\n", + "- [Command line arguments 1 - sys.argv](#Command-line-arguments-1---sys.argv)\n", + "- [Data and time basics](#Data-and-time-basics)\n", + "- [Differences between 2 files](#Differences-between-2-files)\n", + "- [Differences between successive elements in a list](#Differences-between-successive-elements-in-a-list)\n", + "- [Doctest example](#Doctest-example)\n", + "- [File browsing basics](#File-browsing-basics)\n", + "- [File reading basics](#File-reading-basics)\n", + "- [Indices of min and max elements from a list](#Indices-of-min-and-max-elements-from-a-list)\n", + "- [Lambda functions](#Lambda-functions)\n", + "- [Private functions](#Private-functions)\n", + "- [Namedtuples](#Namedtuples)\n", + "- [Normalizing data](#Normalizing-data)\n", + "- [NumPy essentials](#NumPy-essentials)\n", + "- [Pickling Python objects to bitstreams](#Pickling-Python-objects-to-bitstreams)\n", + "- [Python version check](#Python-version-check)\n", + "- [Runtime within a script](#Runtime-within-a-script)\n", + "- [Sorting lists of tuples by elements](#Sorting-lists-of-tuples-by-elements)\n", + "- [Sorting multiple lists relative to each other](#Sorting-multiple-lists-relative-to-each-other)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "
" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "%load_ext watermark" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 1 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "%watermark -d -a \"Sebastian Raschka\" -v" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "Sebastian Raschka 26/09/2014 \n", + "\n", + "CPython 3.4.1\n", + "IPython 2.0.0\n" + ] + } + ], + "prompt_number": 2 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[More information](https://github.com/rasbt/watermark) about the `watermark` magic command extension." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "
" + ] + }, + { + "cell_type": "heading", + "level": 2, + "metadata": {}, + "source": [ + "Bitstrings from positive and negative elements in a list" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[back to top](#Table-of-Contents)" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# Generating a bitstring from a Python list or numpy array\n", + "# where all postive values -> 1\n", + "# all negative values -> 0\n", + "\n", + "import numpy as np\n", + "\n", + "def make_bitstring(ary):\n", + " return np.where(ary > 0, 1, 0)\n", + "\n", + "\n", + "def faster_bitstring(ary):\n", + " return np.where(ary > 0).astype('i1')\n", + "\n", + "### Example:\n", + "\n", + "ary1 = np.array([1, 2, 0.3, -1, -2])\n", + "print('input values %s' %ary1)\n", + "print('bitstring %s' %make_bitstring(ary1))" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "input values [ 1. 2. 0.3 -1. -2. ]\n", + "bitstring [1 1 1 0 0]\n" + ] + } + ], + "prompt_number": 3 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "
" + ] + }, + { + "cell_type": "heading", + "level": 2, + "metadata": {}, + "source": [ + "Command line arguments 1 - sys.argv" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[back to top](#Table-of-Contents)" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "%%file cmd_line_args_1_sysarg.py\n", + "import sys\n", + "\n", + "def error(msg):\n", + " \"\"\"Prints error message, sends it to stderr, and quites the program.\"\"\"\n", + " sys.exit(msg)\n", + "\n", + "args = sys.argv[1:] # sys.argv[0] is the name of the python script itself\n", + "\n", + "try:\n", + " arg1 = int(args[0])\n", + " arg2 = args[1]\n", + " arg3 = args[2]\n", + " print(\"Everything okay!\")\n", + "\n", + "except ValueError:\n", + " error(\"First argument must be integer type!\")\n", + "\n", + "except IndexError:\n", + " error(\"Requires 3 arguments!\")" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "Overwriting cmd_line_args_1_sysarg.py\n" + ] + } + ], + "prompt_number": 5 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "% run cmd_line_args_1_sysarg.py 1 2 3" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "Everything okay!\n" + ] + } + ], + "prompt_number": 6 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "% run cmd_line_args_1_sysarg.py a 2 3" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "ename": "SystemExit", + "evalue": "First argument must be integer type!", + "output_type": "pyerr", + "traceback": [ + "An exception has occurred, use %tb to see the full traceback.\n", + "\u001b[0;31mSystemExit\u001b[0m\u001b[0;31m:\u001b[0m First argument must be integer type!\n" + ] + } + ], + "prompt_number": 7 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "
" + ] + }, + { + "cell_type": "heading", + "level": 2, + "metadata": {}, + "source": [ + "Data and time basics" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[back to top](#Table-of-Contents)" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import time\n", + "\n", + "# print time HOURS:MINUTES:SECONDS\n", + "# e.g., '10:50:58'\n", + "print(time.strftime(\"%H:%M:%S\"))\n", + "\n", + "# print current date DAY:MONTH:YEAR\n", + "# e.g., '06/03/2014'\n", + "print(time.strftime(\"%d/%m/%Y\"))" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "13:28:05\n", + "26/09/2014\n" + ] + } + ], + "prompt_number": 7 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "
" + ] + }, + { + "cell_type": "heading", + "level": 2, + "metadata": {}, + "source": [ + "Differences between 2 files" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[back to top](#Table-of-Contents)" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "%%file id_file1.txt\n", + "1234\n", + "2342\n", + "2341" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "Writing id_file1.txt\n" + ] + } + ], + "prompt_number": 9 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "%%file id_file2.txt\n", + "5234\n", + "3344\n", + "2341" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "Writing id_file2.txt\n" + ] + } + ], + "prompt_number": 10 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# Print lines that are different between 2 files. Insensitive\n", + "# to the order of the file contents.\n", + "\n", + "id_set1 = set()\n", + "id_set2 = set()\n", + "\n", + "with open('id_file1.txt', 'r') as id_file:\n", + " for line in id_file:\n", + " id_set1.add(line.strip())\n", + "\n", + "with open('id_file2.txt', 'r') as id_file:\n", + " for line in id_file:\n", + " id_set2.add(line.strip()) \n", + "\n", + "diffs = id_set2.difference(id_set1)\n", + "\n", + "for d in diffs:\n", + " print(d)\n", + "print(\"Total differences:\",len(diffs))" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "5234\n", + "3344\n", + "Total differences: 2\n" + ] + } + ], + "prompt_number": 11 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "
" + ] + }, + { + "cell_type": "heading", + "level": 2, + "metadata": {}, + "source": [ + "Differences between successive elements in a list" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[back to top](#Table-of-Contents)" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "from itertools import islice\n", + "\n", + "lst = [1,2,3,5,8]\n", + "diff = [j - i for i, j in zip(lst, islice(lst, 1, None))]\n", + "print(diff)" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "[1, 1, 2, 3]\n" + ] + } + ], + "prompt_number": 12 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "
" + ] + }, + { + "cell_type": "heading", + "level": 2, + "metadata": {}, + "source": [ + "Doctest example" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[back to top](#Table-of-Contents)" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "def subtract(a, b):\n", + " \"\"\"\n", + " Subtracts second from first number and returns result.\n", + " >>> subtract(10, 5)\n", + " 5\n", + " >>> subtract(11, 0.7)\n", + " 10.3\n", + " \"\"\"\n", + " return a-b\n", + "\n", + "if __name__ == \"__main__\": # is 'false' if imported\n", + " import doctest\n", + " doctest.testmod()\n", + " print('ok')" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "ok\n" + ] + } + ], + "prompt_number": 17 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "def hello_world():\n", + " \"\"\"\n", + " Returns 'Hello, World'\n", + " >>> hello_world()\n", + " 'Hello, World'\n", + " \"\"\"\n", + " return 'hello world'\n", + "\n", + "if __name__ == \"__main__\": # is 'false' if imported\n", + " import doctest\n", + " doctest.testmod()" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "**********************************************************************\n", + "File \"__main__\", line 4, in __main__.hello_world\n", + "Failed example:\n", + " hello_world()\n", + "Expected:\n", + " 'Hello, World'\n", + "Got:\n", + " 'hello world'\n", + "**********************************************************************\n", + "1 items had failures:\n", + " 1 of 1 in __main__.hello_world\n", + "***Test Failed*** 1 failures.\n" + ] + } + ], + "prompt_number": 18 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "
" + ] + }, + { + "cell_type": "heading", + "level": 2, + "metadata": {}, + "source": [ + "File browsing basics" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[back to top](#Table-of-Contents)" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import os\n", + "import shutil\n", + "import glob\n", + "\n", + "# working directory\n", + "c_dir = os.getcwd() # show current working directory\n", + "os.listdir(c_dir) # shows all files in the working directory\n", + "os.chdir('~/Data') # change working directory\n", + "\n", + "\n", + "# get all files in a directory\n", + "glob.glob('/Users/sebastian/Desktop/*')\n", + "\n", + "# e.g., ['/Users/sebastian/Desktop/untitled folder', '/Users/sebastian/Desktop/Untitled.txt']\n", + "\n", + "# walk\n", + "tree = os.walk(c_dir) \n", + "# moves through sub directories and creates a 'generator' object of tuples\n", + "# ('dir', [file1, file2, ...] [subdirectory1, subdirectory2, ...]), \n", + "# (...), ...\n", + "\n", + "#check files: returns either True or False\n", + "os.exists('../rel_path')\n", + "os.exists('/home/abs_path')\n", + "os.isfile('./file.txt')\n", + "os.isdir('./subdir')\n", + "\n", + "\n", + "# file permission (True or False\n", + "os.access('./some_file', os.F_OK) # File exists? Python 2.7\n", + "os.access('./some_file', os.R_OK) # Ok to read? Python 2.7\n", + "os.access('./some_file', os.W_OK) # Ok to write? Python 2.7\n", + "os.access('./some_file', os.X_OK) # Ok to execute? Python 2.7\n", + "os.access('./some_file', os.X_OK | os.W_OK) # Ok to execute or write? Python 2.7\n", + "\n", + "# join (creates operating system dependent paths)\n", + "os.path.join('a', 'b', 'c')\n", + "# 'a/b/c' on Unix/Linux\n", + "# 'a\\\\b\\\\c' on Windows\n", + "os.path.normpath('a/b/c') # converts file separators\n", + "\n", + "\n", + "# os.path: direcory and file names\n", + "os.path.samefile('./some_file', '/home/some_file') # True if those are the same\n", + "os.path.dirname('./some_file') # returns '.' (everythin but last component)\n", + "os.path.basename('./some_file') # returns 'some_file' (only last component\n", + "os.path.split('./some_file') # returns (dirname, basename) or ('.', 'some_file)\n", + "os.path.splitext('./some_file.txt') # returns ('./some_file', '.txt')\n", + "os.path.splitdrive('./some_file.txt') # returns ('', './some_file.txt')\n", + "os.path.isabs('./some_file.txt') # returns False (not an absolute path)\n", + "os.path.abspath('./some_file.txt')\n", + "\n", + "\n", + "# create and delete files and directories\n", + "os.mkdir('./test') # create a new direcotory\n", + "os.rmdir('./test') # removes an empty direcotory\n", + "os.removedirs('./test') # removes nested empty directories\n", + "os.remove('file.txt') # removes an individual file\n", + "shutil.rmtree('./test') # removes directory (empty or not empty)\n", + "\n", + "os.rename('./dir_before', './renamed') # renames directory if destination doesn't exist\n", + "shutil.move('./dir_before', './renamed') # renames directory always\n", + "\n", + "shutil.copytree('./orig', './copy') # copies a directory recursively\n", + "shutil.copyfile('file', 'copy') # copies a file\n", + "\n", + " \n", + "# Getting files of particular type from directory\n", + "files = [f for f in os.listdir(s_pdb_dir) if f.endswith(\".txt\")]\n", + " \n", + "# Copy and move\n", + "shutil.copyfile(\"/path/to/file\", \"/path/to/new/file\") \n", + "shutil.copy(\"/path/to/file\", \"/path/to/directory\")\n", + "shutil.move(\"/path/to/file\",\"/path/to/directory\")\n", + " \n", + "# Check if file or directory exists\n", + "os.path.exists(\"file or directory\")\n", + "os.path.isfile(\"file\")\n", + "os.path.isdir(\"directory\")\n", + " \n", + "# Working directory and absolute path to files\n", + "os.getcwd()\n", + "os.path.abspath(\"file\")" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "
" + ] + }, + { + "cell_type": "heading", + "level": 2, + "metadata": {}, + "source": [ + "File reading basics" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[back to top](#Table-of-Contents)" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# Note: rb opens file in binary mode to avoid issues with Windows systems\n", + "# where '\\r\\n' is used instead of '\\n' as newline character(s).\n", + "\n", + "\n", + "# A) Reading in Byte chunks\n", + "reader_a = open(\"file.txt\", \"rb\")\n", + "chunks = []\n", + "data = reader_a.read(64) # reads first 64 bytes\n", + "while data != \"\":\n", + " chunks.append(data)\n", + " data = reader_a.read(64)\n", + "if data:\n", + " chunks.append(data)\n", + "print(len(chunks))\n", + "reader_a.close()\n", + "\n", + "\n", + "# B) Reading whole file at once into a list of lines\n", + "with open(\"file.txt\", \"rb\") as reader_b: # recommended syntax, auto closes\n", + " data = reader_b.readlines() # data is assigned a list of lines\n", + "print(len(data))\n", + "\n", + "\n", + "# C) Reading whole file at once into a string\n", + "with open(\"file.txt\", \"rb\") as reader_c:\n", + " data = reader_c.read() # data is assigned a list of lines\n", + "print(len(data))\n", + "\n", + "\n", + "# D) Reading line by line into a list\n", + "data = []\n", + "with open(\"file.txt\", \"rb\") as reader_d:\n", + " for line in reader_d:\n", + " data.append(line)\n", + "print(len(data))\n" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "
" + ] + }, + { + "cell_type": "heading", + "level": 2, + "metadata": {}, + "source": [ + "Indices of min and max elements from a list" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[back to top](#Table-of-Contents)" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import operator\n", + "\n", + "values = [1, 2, 3, 4, 5]\n", + "\n", + "min_index, min_value = min(enumerate(values), key=operator.itemgetter(1))\n", + "max_index, max_value = max(enumerate(values), key=operator.itemgetter(1))\n", + "\n", + "print('min_index:', min_index, 'min_value:', min_value)\n", + "print('max_index:', max_index, 'max_value:', max_value)" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "min_index: 0 min_value: 1\n", + "max_index: 4 max_value: 5\n" + ] + } + ], + "prompt_number": 19 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "
" + ] + }, + { + "cell_type": "heading", + "level": 2, + "metadata": {}, + "source": [ + "Lambda functions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[back to top](#Table-of-Contents)" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# Lambda functions are just a short-hand way or writing\n", + "# short function definitions\n", + "\n", + "def square_root1(x):\n", + " return x**0.5\n", + " \n", + "square_root2 = lambda x: x**0.5\n", + "\n", + "assert(square_root1(9) == square_root2(9))" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 20 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "
" + ] + }, + { + "cell_type": "heading", + "level": 2, + "metadata": {}, + "source": [ + "Private functions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[back to top](#Table-of-Contents)" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "def create_message(msg_txt):\n", + " def _priv_msg(message): # private, no access from outside\n", + " print(\"{}: {}\".format(msg_txt, message))\n", + " return _priv_msg # returns a function\n", + "\n", + "new_msg = create_message(\"My message\")\n", + "# note, new_msg is a function\n", + "\n", + "new_msg(\"Hello, World\")" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "My message: Hello, World\n" + ] + } + ], + "prompt_number": 2 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "
" + ] + }, + { + "cell_type": "heading", + "level": 2, + "metadata": {}, + "source": [ + "Namedtuples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[back to top](#Table-of-Contents)" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "from collections import namedtuple\n", + "\n", + "my_namedtuple = namedtuple('field_name', ['x', 'y', 'z', 'bla', 'blub'])\n", + "p = my_namedtuple(1, 2, 3, 4, 5)\n", + "print(p.x, p.y, p.z)" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "1 2 3\n" + ] + } + ], + "prompt_number": 25 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "
" + ] + }, + { + "cell_type": "heading", + "level": 2, + "metadata": {}, + "source": [ + "Normalizing data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[back to top](#Table-of-Contents)" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "def normalize(data, min_val=0, max_val=1):\n", + " \"\"\"\n", + " Normalizes values in a list of data points to a range, e.g.,\n", + " between 0.0 and 1.0. \n", + " Returns the original object if value is not a integer or float.\n", + " \n", + " \"\"\"\n", + " norm_data = []\n", + " data_min = min(data)\n", + " data_max = max(data)\n", + " for x in data:\n", + " numerator = x - data_min\n", + " denominator = data_max - data_min\n", + " x_norm = (max_val-min_val) * numerator/denominator + min_val\n", + " norm_data.append(x_norm)\n", + " return norm_data" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 28 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "normalize([1,2,3,4,5])" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 31, + "text": [ + "[0.0, 0.25, 0.5, 0.75, 1.0]" + ] + } + ], + "prompt_number": 31 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "normalize([1,2,3,4,5], min_val=-10, max_val=10)" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 30, + "text": [ + "[-10.0, -5.0, 0.0, 5.0, 10.0]" + ] + } + ], + "prompt_number": 30 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "
" + ] + }, + { + "cell_type": "heading", + "level": 2, + "metadata": {}, + "source": [ + "NumPy essentials" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[back to top](#Table-of-Contents)" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import numpy as np\n", + "\n", + "ary1 = np.array([1,2,3,4,5]) # must be same type\n", + "ary2 = np.zeros((3,4)) # 3x4 matrix consisiting of 0s \n", + "ary3 = np.ones((3,4)) # 3x4 matrix consisiting of 1s \n", + "ary4 = np.identity(3) # 3x3 identity matrix\n", + "ary5 = ary1.copy() # make a copy of ary1\n", + "\n", + "item1 = ary3[0, 0] # item in row1, column1\n", + "\n", + "ary2.shape # tuple of dimensions. Here: (3,4)\n", + "ary2.size # number of elements. Here: 12\n", + "\n", + "\n", + "ary2_t = ary2.transpose() # transposes matrix\n", + "\n", + "ary2.ravel() # makes an array linear (1-dimensional)\n", + " # by concatenating rows\n", + "ary2.reshape(2,6) # reshapes array (must have same dimensions)\n", + "\n", + "ary3[0:2, 0:3] # submatrix of first 2 rows and first 3 columns \n", + "\n", + "ary3 = ary3[[2,0,1]] # re-arrange rows\n", + "\n", + "\n", + "# element-wise operations\n", + "\n", + "ary1 + ary1\n", + "ary1 * ary1\n", + "numpy.dot(ary1, ary1) # matrix/vector (dot) product\n", + "\n", + "numpy.sum(ary1, axis=1) # sum of a 1D array, column sums of a 2D array\n", + "numpy.mean(ary1, axis=1) # mean of a 1D array, column means of a 2D array" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "
" + ] + }, + { + "cell_type": "heading", + "level": 2, + "metadata": {}, + "source": [ + "Pickling Python objects to bitstreams" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[back to top](#Table-of-Contents)" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import pickle\n", + "\n", + "#### Generate some object\n", + "my_dict = dict()\n", + "for i in range(1,10):\n", + " my_dict[i] = \"some text\"\n", + "\n", + "#### Save object to file\n", + "pickle_out = open('my_file.pkl', 'wb')\n", + "pickle.dump(my_dict, pickle_out)\n", + "pickle_out.close()\n", + "\n", + "#### Load object from file\n", + "my_object_file = open('my_file.pkl', 'rb')\n", + "my_dict = pickle.load(my_object_file)\n", + "my_object_file.close()\n", + "\n", + "print(my_dict)" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "{1: 'some text', 2: 'some text', 3: 'some text', 4: 'some text', 5: 'some text', 6: 'some text', 7: 'some text', 8: 'some text', 9: 'some text'}\n" + ] + } + ], + "prompt_number": 35 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "
" + ] + }, + { + "cell_type": "heading", + "level": 2, + "metadata": {}, + "source": [ + "Python version check" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[back to top](#Table-of-Contents)" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import sys\n", + "\n", + "def give_letter(word):\n", + " for letter in word:\n", + " yield letter\n", + "\n", + "if sys.version_info[0] == 3:\n", + " print('executed in Python 3.x')\n", + " test = give_letter('Hello')\n", + " print(next(test))\n", + " print('in for-loop:')\n", + " for l in test:\n", + " print(l)\n", + "\n", + "# if Python 2.x\n", + "if sys.version_info[0] == 2:\n", + " print('executed in Python 2.x')\n", + " test = give_letter('Hello')\n", + " print(test.next())\n", + " print('in for-loop:') \n", + " for l in test:\n", + " print(l)" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "executed in Python 3.x\n", + "H\n", + "in for-loop:\n", + "e\n", + "l\n", + "l\n", + "o\n" + ] + } + ], + "prompt_number": 36 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "
" + ] + }, + { + "cell_type": "heading", + "level": 2, + "metadata": {}, + "source": [ + "Runtime within a script" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[back to top](#Table-of-Contents)" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import time\n", + "\n", + "start_time = time.clock()\n", + "\n", + "for i in range(10000000):\n", + " pass\n", + "\n", + "elapsed_time = time.clock() - start_time\n", + "print(\"Time elapsed: {} seconds\".format(elapsed_time))" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "Time elapsed: 0.49176900000000057 seconds\n" + ] + } + ], + "prompt_number": 4 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import timeit\n", + "elapsed_time = timeit.timeit('for i in range(10000000): pass', number=1)\n", + "print(\"Time elapsed: {} seconds\".format(elapsed_time))" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "Time elapsed: 0.3550995970144868 seconds\n" + ] + } + ], + "prompt_number": 6 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "
" + ] + }, + { + "cell_type": "heading", + "level": 2, + "metadata": {}, + "source": [ + "Sorting lists of tuples by elements" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[back to top](#Table-of-Contents)" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# Here, we make use of the \"key\" parameter of the in-built \"sorted()\" function \n", + "# (also available for the \".sort()\" method), which let's us define a function \n", + "# that is called on every element that is to be sorted. In this case, our \n", + "# \"key\"-function is a simple lambda function that returns the last item \n", + "# from every tuple.\n", + "\n", + "a_list = [(1,3,'c'), (2,3,'a'), (3,2,'b'), (2,2,'b')]\n", + "\n", + "sorted_list = sorted(a_list, key=lambda e: e[::-1])\n", + "\n", + "print(sorted_list)" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "[(2, 3, 'a'), (2, 2, 'b'), (3, 2, 'b'), (1, 3, 'c')]\n" + ] + } + ], + "prompt_number": 37 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# prints [(2, 3, 'a'), (2, 2, 'b'), (3, 2, 'b'), (1, 3, 'c')]\n", + "\n", + "# If we are only interesting in sorting the list by the last element\n", + "# of the tuple and don't care about a \"tie\" situation, we can also use\n", + "# the index of the tuple item directly instead of reversing the tuple \n", + "# for efficiency.\n", + "\n", + "a_list = [(1,3,'c'), (2,3,'a'), (3,2,'b'), (2,2,'b')]\n", + "\n", + "sorted_list = sorted(a_list, key=lambda e: e[-1])\n", + "\n", + "print(sorted_list)" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "[(2, 3, 'a'), (3, 2, 'b'), (2, 2, 'b'), (1, 3, 'c')]\n" + ] + } + ], + "prompt_number": 38 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "
" + ] + }, + { + "cell_type": "heading", + "level": 2, + "metadata": {}, + "source": [ + "Sorting multiple lists relative to each other" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[back to top](#Table-of-Contents)" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "\"\"\"\n", + "You have 3 lists that you want to sort \"relative\" to each other,\n", + "for example, picturing each list as a row in a 3x3 matrix: sort it by columns\n", + "\n", + "########################\n", + "If the input lists are\n", + "########################\n", + "\n", + " list1 = ['c','b','a']\n", + " list2 = [6,5,4]\n", + " list3 = ['some-val-associated-with-c','another_val-b','z_another_third_val-a']\n", + "\n", + "########################\n", + "the desired outcome is:\n", + "########################\n", + "\n", + " ['a', 'b', 'c'] \n", + " [4, 5, 6] \n", + " ['z_another_third_val-a', 'another_val-b', 'some-val-associated-with-c']\n", + "\n", + "########################\n", + "and NOT:\n", + "########################\n", + "\n", + " ['a', 'b', 'c'] \n", + " [4, 5, 6] \n", + " ['another_val-b', 'some-val-associated-with-c', 'z_another_third_val-a']\n", + "\n", + "\n", + "\"\"\"\n", + "\n", + "list1 = ['c','b','a']\n", + "list2 = [6,5,4]\n", + "list3 = ['some-val-associated-with-c','another_val-b','z_another_third_val-a']\n", + "\n", + "print('input values:\\n', list1, list2, list3)\n", + "\n", + "list1, list2, list3 = [list(t) for t in zip(*sorted(zip(list1, list2, list3)))]\n", + "\n", + "print('\\n\\nsorted output:\\n', list1, list2, list3 )" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "input values:\n", + " ['c', 'b', 'a'] [6, 5, 4] ['some-val-associated-with-c', 'another_val-b', 'z_another_third_val-a']\n", + "\n", + "\n", + "sorted output:\n", + " ['a', 'b', 'c'] [4, 5, 6] ['z_another_third_val-a', 'another_val-b', 'some-val-associated-with-c']\n" + ] + } + ], + "prompt_number": 49 + } + ], + "metadata": {} + } + ] +} \ No newline at end of file diff --git a/howtos_as_py_files/pickle_module.py b/howtos_as_py_files/pickle_module.py deleted file mode 100755 index 81afd92..0000000 --- a/howtos_as_py_files/pickle_module.py +++ /dev/null @@ -1,23 +0,0 @@ -# sr 10/29/13 -# The pickle module converts Python objects into byte streams -# to save them as a file on your drive for re-use. -# -# module documentation http://docs.python.org/2/library/pickle.html - -import pickle - -#### Generate some object -my_dict = dict() -for i in range(1,1000): - my_dict[i] = "some text" - -#### Save object to file -pickle_out = open('my_file.pkl', 'wb') -pickle.dump(my_dict, pickle_out) -pickle_out.close() - -#### Load object from file -my_object_file = open('my_file.pkl', 'rb') -my_dict = pickle.load(my_object_file) -my_object_file.close() - diff --git a/howtos_as_py_files/pil_image_processing.py b/howtos_as_py_files/pil_image_processing.py deleted file mode 100644 index e69de29..0000000 diff --git a/howtos_as_py_files/python2_vs_3_version_info.py b/howtos_as_py_files/python2_vs_3_version_info.py deleted file mode 100644 index 19e7bb2..0000000 --- a/howtos_as_py_files/python2_vs_3_version_info.py +++ /dev/null @@ -1,24 +0,0 @@ -# Sebastian Raschka 04/10/2014 - -import sys - -def give_letter(word): - for letter in word: - yield letter - -if sys.version_info[0] == 3: - print('executed in Python 3.x') - test = give_letter('Hello') - print(next(test)) - print('in for-loop:') - for l in test: - print(l) - -# if Python 2.x -if sys.version_info[0] == 2: - print('executed in Python 2.x') - test = give_letter('Hello') - print(test.next()) - print('in for-loop:') - for l in test: - print(l) diff --git a/howtos_as_py_files/read_file.py b/howtos_as_py_files/read_file.py deleted file mode 100755 index 567ae0c..0000000 --- a/howtos_as_py_files/read_file.py +++ /dev/null @@ -1,44 +0,0 @@ -# Different methods to read from text files -# sr 11/18/2013 -# Python 3.x - -# Note: rb opens file in binary mode to avoid issues with Windows systems -# where '\r\n' is used instead of '\n' as newline character(s). - - -# A) Reading in Byte chunks -reader_a = open("file.txt", "rb") -chunks = [] -data = reader_a.read(64) # reads first 64 bytes -while data != "": - chunks.append(data) - data = reader_a.read(64) -if data: - chunks.append(data) -print (len(chunks)) -reader_a.close() - - -# B) Reading whole file at once into a list of lines -with open("file.txt", "rb") as reader_b: # recommended syntax, auto closes - data = reader_b.readlines() # data is assigned a list of lines -print (len(data)) - - -# C) Reading whole file at once into a string -with open("file.txt", "rb") as reader_c: - data = reader_c.read() # data is assigned a list of lines -print (len(data)) - - -# D) Reading line by line into a list -data = [] -with open("file.txt", "rb") as reader_d: - for line in reader_d: - data.append(line) -print (len(data)) - - - - - diff --git a/howtos_as_py_files/reg_expr_1_basics.py b/howtos_as_py_files/reg_expr_1_basics.py deleted file mode 100644 index 5fafab8..0000000 --- a/howtos_as_py_files/reg_expr_1_basics.py +++ /dev/null @@ -1,101 +0,0 @@ -# Examples for using Python's Regular expression module "re" -# sr 11/30/2013 - -import re - -'''OVERVIEW - '|' means 'or' - '.' matches any single character - '()' groups into substrings -''' - - - - - -# read in data -fileobj = '''abc mno -def pqr -ghi stu -jkl vwx''' - -data = fileobj.strip().split('\n') - - -# A >> if 's' in line -print (50*'-' + '\nA\n' + 50*'-') -for line in data: - if re.search('abc', line): - print(">>", line) - else: - print(" ", line) - -''' --------------------------------------------------- -A --------------------------------------------------- ->> abc mno - def pqr - ghi stu - jkl vwx''' - - - -# B >> if 's' in line or 'r' in line -print (50*'-' + '\nB\n' + 50*'-') -for line in data: - if re.search('abc|efg', line): - print(">>", line) - else: - print(" ", line) - -''' --------------------------------------------------- -B --------------------------------------------------- ->> abc mno - def pqr - ghi stu - jkl vwx ----------------''' - - -# C >> -# use () to remember which object was found and return a match object -print (50*'-' + '\nC\n' + 50*'-') -for line in data: - match = re.search('(abc|efg)', line) # note the parantheses - if match: - print(match.group(1)) # prints 'abc' if found, else None - # match.group(0) is the whole pattern that matched - -''' --------------------------------------------------- -C --------------------------------------------------- -abc''' - - - -# read in data -fileobj = '''2013-01-01 -2012-02-02 -ghi stu -2012-03-03''' - -data = fileobj.strip().split('\n') - - -# D >> use '.' to match 'any character' -print (50*'-' + '\nD\n' + 50*'-') -for line in data: - match = re.search('(2012)-(..)-(..)', line) # note the parantheses - if match: - print(match.group(1), match.group(2), match.group(3)) - -''' --------------------------------------------------- -D --------------------------------------------------- -2012 02 02 -2012 03 03''' diff --git a/howtos_as_py_files/reg_expr_2_operators.py b/howtos_as_py_files/reg_expr_2_operators.py deleted file mode 100644 index 4994159..0000000 --- a/howtos_as_py_files/reg_expr_2_operators.py +++ /dev/null @@ -1,127 +0,0 @@ -# Examples for using Python's Regular expression module "re" -# sr 11/30/2013 - -import re - -'''OVERVIEW - '*' matches all characters that follow (0 or more) - '+' matches all characters that follow (1 or more) - '?' makes the previous character optional - '{4}' previous character must match exactly 4 times - '{2-4}' previous character must match exactly 2-4 times - '[0-9]' matches all characters in the set of numbers 0 to 9 - '[A-Z]' matches all characters in the set of A to Z - '\d' matches all digits, e.g., '4', '9' ... - '\D' matches all NON-digit characters - '\s' matches all space characters: '', '\t', '\r', '\n' - '\S' matches all NON-space characters - '\w' matches all non-punctuation characters (i.e., letters and digits) - '\W' matches all NON-letter and NON-digit characters - '^bla' NOT-matches 'bla' - 'let$' matches 'let' but not 'letter' - '\b' matches transition between non-word characters and word characters - -''' - -data = '''2013-01-01 -2012-02-02 -aaaa-02-02 -aa-02-02 --04-04 -2000 02-02 -ghi stu -2012-03-03'''.strip().split('\n') - - -# A >> '*' matches all characters that follow (0 or more) -print (50*'-' + '\nA\n' + 50*'-') - -for line in data: - match = re.search('(.*)-(..)-(..)', line) # note the parantheses - if match: - print(match.group(1), match.group(2), match.group(3)) - -''' --------------------------------------------------- -A --------------------------------------------------- -2013 01 01 -2012 02 02 -aaaa 02 02 -aa 02 02 - 04 04 -2012 03 03 -''' - - -# B >> '+' matches all characters that follow (1 or more) -print (50*'-' + '\nB\n' + 50*'-') - -for line in data: - match = re.search('(.+)-(..)-(..)', line) # note the parantheses - if match: - print(match.group(1), match.group(2), match.group(3)) - -''' --------------------------------------------------- -B --------------------------------------------------- -2013 01 01 -2012 02 02 -aaaa 02 02 -aa 02 02 -2012 03 03 -''' - - -# C >> '?' makes the previous character optional -print (50*'-' + '\nC\n' + 50*'-') - -for line in data: - match = re.search('(.+)-?(..)-(..)', line) # note the parantheses - if match: - print(match.group(1), match.group(2), match.group(3)) - -''' --------------------------------------------------- -C --------------------------------------------------- -2013- 01 01 -2012- 02 02 -aaaa- 02 02 -aa- 02 02 -- 04 04 -2000 02 02 -2012- 03 03 -''' - -# D >> '{4}' previous character must match exactly 4 times -print (50*'-' + '\nD\n' + 50*'-') - -for line in data: - match = re.search('(a{4})-(..)-(..)', line) # note the parantheses - if match: - print(match.group(1), match.group(2), match.group(3)) - -''' --------------------------------------------------- -D --------------------------------------------------- -aaaa 02 02 -''' - -# E >>'{2-4}' previous character must match exactly 2-4 times -print (50*'-' + '\nE\n' + 50*'-') - -for line in data: - match = re.search('(a{2,4})-(..)-(..)', line) # note the parantheses - if match: - print(match.group(1), match.group(2), match.group(3)) - -''' --------------------------------------------------- -E --------------------------------------------------- -aaaa 02 02 -aa 02 02 -''' diff --git a/howtos_as_py_files/sort_list_of_tuples_by_ele.py b/howtos_as_py_files/sort_list_of_tuples_by_ele.py deleted file mode 100644 index 4a94d4b..0000000 --- a/howtos_as_py_files/sort_list_of_tuples_by_ele.py +++ /dev/null @@ -1,34 +0,0 @@ -# Sebastian Raschka 09/02/2014 -# Sorting a list of tuples by starting with the last element of the tuple (=reversed tuple) - -# Here, we make use of the "key" parameter of the in-built "sorted()" function -# (also available for the ".sort()" method), which let's us define a function -# that is called on every element that is to be sorted. In this case, our -# "key"-function is a simple lambda function that returns the last item -# from every tuple. - - -a_list = [(1,3,'c'), (2,3,'a'), (3,2,'b'), (2,2,'b')] - -sorted_list = sorted(a_list, key=lambda e: e[::-1]) - -print(sorted_list) - -# prints [(2, 3, 'a'), (2, 2, 'b'), (3, 2, 'b'), (1, 3, 'c')] - - - -# If we are only interesting in sorting the list by the last element -# of the tuple and don't care about a "tie" situation, we can also use -# the index of the tuple item directly instead of reversing the tuple -# for efficiency. - - - -a_list = [(1,3,'c'), (2,3,'a'), (3,2,'b'), (2,2,'b')] - -sorted_list = sorted(a_list, key=lambda e: e[-1]) - -print(sorted_list) - -# prints [(2, 3, 'a'), (3, 2, 'b'), (2, 2, 'b'), (1, 3, 'c')] \ No newline at end of file diff --git a/howtos_as_py_files/sorting_multiple_lists_by_col.py b/howtos_as_py_files/sorting_multiple_lists_by_col.py deleted file mode 100644 index 7c534cb..0000000 --- a/howtos_as_py_files/sorting_multiple_lists_by_col.py +++ /dev/null @@ -1,39 +0,0 @@ -# Sebastian Raschka 2014 - -""" -You have 3 lists that you want to sort "relative" to each other, -for example, picturing each list as a row in a 3x3 matrix: sort it by columns - -######################## -If the input lists are -######################## - - list1 = ['c','b','a'] - list2 = [6,5,4] - list3 = ['some-val-associated-with-c','another_val-b','z_another_third_val-a'] - -######################## -the desired outcome is: -######################## - - ['a', 'b', 'c'] - [4, 5, 6] - ['z_another_third_val-a', 'another_val-b', 'some-val-associated-with-c'] - -######################## -and NOT: -######################## - - ['a', 'b', 'c'] - [4, 5, 6] - ['another_val-b', 'some-val-associated-with-c', 'z_another_third_val-a'] - - -""" - -list1 = ['c','b','a'] -list2 = [6,5,4] -list3 = ['some-val-associated-with-c','another_val-b','z_another_third_val-a'] - - -list1, list2, list3 = zip(*sorted(zip(list1, list2, list3))) diff --git a/howtos_as_py_files/timeit_test.py b/howtos_as_py_files/timeit_test.py deleted file mode 100644 index 31bb93e..0000000 --- a/howtos_as_py_files/timeit_test.py +++ /dev/null @@ -1,24 +0,0 @@ -# Sebastian Raschka, 03/2014 -# comparing string formating: %s and .format() - -import timeit - -format_res = timeit.timeit("['{}'.format(i) for i in range(10000)]", number=1000) - -binaryop_res = timeit.timeit("['%s' %i for i in range(10000)]", number=1000) - -print('{}: {}\n{}: {}'.format('format()', format_res, '%s', binaryop_res)) - -################################ -# On my machine -################################ -# -# Python 3.4.0 -# MacOS X 10.9.2 -# 2.5 GHz Intel Core i5 -# 4 GB 1600 Mhz DDR3 -# -################################ -# format(): 2.815331667999999 -# %s: 1.630353775999538 -################################ diff --git a/howtos_as_py_files/zen_of_python.py b/howtos_as_py_files/zen_of_python.py deleted file mode 100644 index d82cacd..0000000 --- a/howtos_as_py_files/zen_of_python.py +++ /dev/null @@ -1,24 +0,0 @@ ->>> import this -""" -The Zen of Python, by Tim Peters - -Beautiful is better than ugly. -Explicit is better than implicit. -Simple is better than complex. -Complex is better than complicated. -Flat is better than nested. -Sparse is better than dense. -Readability counts. -Special cases aren't special enough to break the rules. -Although practicality beats purity. -Errors should never pass silently. -Unless explicitly silenced. -In the face of ambiguity, refuse the temptation to guess. -There should be one-- and preferably only one --obvious way to do it. -Although that way may not be obvious at first unless you're Dutch. -Now is better than never. -Although never is often better than *right* now. -If the implementation is hard to explain, it's a bad idea. -If the implementation is easy to explain, it may be a good idea. -Namespaces are one honking great idea -- let's do more of those! -"""