python patterns 1

This commit is contained in:
rasbt 2014-09-26 14:17:54 -04:00
parent 21e4d5e813
commit 89bf317556
30 changed files with 1502 additions and 841 deletions

View File

@ -0,0 +1,10 @@
Sebastian Raschka
last updated: 09/26/2014
# A collection of useful Python patterns
new_msg("Hello, World")
# prints: "My message: Hello, World"
# print(dir(create_message.__closure__))

View File

@ -1,17 +0,0 @@
# Python 3.x
# sr 11/04/2013
# closures
#
def create_message(msg_txt):
def _priv_msg(message): # private, no access from outside
print("{}: {}".format(msg_txt, message))
return _priv_msg # returns a function
new_msg = create_message("My message")
# note, new_msg is a function
new_msg("Hello, World")
# prints: "My message: Hello, World"
# print(dir(create_message.__closure__))

View File

@ -1,24 +0,0 @@
# Getting command line arguments via sys.arg
# sr 11/30/2013
import sys
def error(msg):
"""Prints error message, sends it to stderr, and quites the program."""
sys.exit(msg)
args = sys.argv[1:] # sys.argv[0] is the name of the python script itself
try:
arg1 = int(args[0])
arg2 = args[1]
arg3 = args[2]
print("Everything okay!")
except ValueError:
error("First argument must be integer type!")
except IndexError:
error("Requires 3 arguments!")

View File

@ -1,18 +0,0 @@
# sr 10/29/13
# Calculates elapsed CPU time in seconds as float.
import time
start_time = time.clock()
i = 0
while i < 10000000:
i += 1
elapsed_time = time.clock() - start_time
print "Time elapsed: {} seconds".format(elapsed_time)
# prints "Time elapsed: 1.06 seconds"
# on 4 x 2.80 Ghz Intel Xeon, 6 Gb RAM

View File

@ -1,13 +0,0 @@
# Sebastian Raschka, 03/2014
# Date and Time in Python
import time
# print time HOURS:MINUTES:SECONDS
# e.g., '10:50:58'
print(time.strftime("%H:%M:%S"))
# print current date DAY:MONTH:YEAR
# e.g., '06/03/2014'
print(time.strftime("%d/%m/%Y"))

View File

@ -1,21 +0,0 @@
# Sebastian Raschka, 2014
#
# Print lines that are different between 2 files. Insensitive
# to the order of the file contents.
id_set1 = set()
id_set2 = set()
with open('id_file1.txt', 'r') as id_file:
for line in id_file:
id_set1.add(line.strip())
with open('id_file2.txt', 'r') as id_file:
for line in id_file:
id_set2.add(line.strip())
diffs = id_set2.difference(id_set1)
for d in diffs:
print(d)
print("Total differences:",len(diffs))

View File

@ -1,47 +0,0 @@
# doctest example
# Sebastian Raschka 11/19/2013
def subtract(a, b):
"""
Subtracts second from first number and returns result.
>>> subtract(10, 5)
5
>>> subtract(11, 0.7)
10.3
"""
return a-b
def hello_world():
"""
Returns 'Hello, World'
>>> hello_world()
"Hello, World"
>>> hello_world()
'Hello, World'
"""
return "Hello, World"
if __name__ == "__main__": # is 'false' if imported
import doctest
doctest.testmod()
""" RESULTS
sebastian ~/Desktop> python3 doctest_example.py
**********************************************************************
File "doctest_example.py", line 17, in __main__.hello_world
Failed example:
hello_world()
Expected:
"Hello, World"
Got:
'Hello, World'
**********************************************************************
1 items had failures:
1 of 2 in __main__.hello_world
***Test Failed*** 1 failures.
sebastian ~/Desktop>
"""

View File

@ -1,80 +0,0 @@
# File system operations using Python
# sr 11/30/2013
import os
import shutil
import glob
# working directory
c_dir = os.getcwd() # show current working directory
os.listdir(c_dir) # shows all files in the working directory
os.chdir('~/Data') # change working directory
# get all files in a directory
glob.glob('/Users/sebastian/Desktop/*')
# e.g., ['/Users/sebastian/Desktop/untitled folder', '/Users/sebastian/Desktop/Untitled.txt']
# walk
tree = os.walk(c_dir)
# moves through sub directories and creates a 'generator' object of tuples
# ('dir', [file1, file2, ...] [subdirectory1, subdirectory2, ...]),
# (...), ...
#check files: returns either True or False
os.exists('../rel_path')
os.exists('/home/abs_path')
os.isfile('./file.txt')
os.isdir('./subdir')
# file permission (True or False
os.access('./some_file', os.F_OK) # File exists? Python 2.7
os.access('./some_file', os.R_OK) # Ok to read? Python 2.7
os.access('./some_file', os.W_OK) # Ok to write? Python 2.7
os.access('./some_file', os.X_OK) # Ok to execute? Python 2.7
os.access('./some_file', os.X_OK | os.W_OK) # Ok to execute or write? Python 2.7
# join (creates operating system dependent paths)
os.path.join('a', 'b', 'c')
# 'a/b/c' on Unix/Linux
# 'a\\b\\c' on Windows
os.path.normpath('a/b/c') # converts file separators
# os.path: direcory and file names
os.path.samefile('./some_file', '/home/some_file') # True if those are the same
os.path.dirname('./some_file') # returns '.' (everythin but last component)
os.path.basename('./some_file') # returns 'some_file' (only last component
os.path.split('./some_file') # returns (dirname, basename) or ('.', 'some_file)
os.path.splitext('./some_file.txt') # returns ('./some_file', '.txt')
os.path.splitdrive('./some_file.txt') # returns ('', './some_file.txt')
os.path.isabs('./some_file.txt') # returns False (not an absolute path)
os.path.abspath('./some_file.txt')
# create and delete files and directories
os.mkdir('./test') # create a new direcotory
os.rmdir('./test') # removes an empty direcotory
os.removedirs('./test') # removes nested empty directories
os.remove('file.txt') # removes an individual file
shutil.rmtree('./test') # removes directory (empty or not empty)
os.rename('./dir_before', './renamed') # renames directory if destination doesn't exist
shutil.move('./dir_before', './renamed') # renames directory always
shutil.copytree('./orig', './copy') # copies a directory recursively
shutil.copyfile('file', 'copy') # copies a file

View File

@ -1,63 +0,0 @@
# Python 2.7
# prompt user for file of specific type(s).
# 11/01/13 sebastian raschka
import os.path
def get_filename(file_type):
'''repeatedly prompts user for a file of specific type.
arguments:
file_type: list with accepted file types as strings.
returns:
(string): absolute path to the specified input file.
'''
while True:
print "\n\nplease enter a file name, \nor type --help to get"\
" a list of the accepted file formats"
file_name = raw_input(": ")
if file_name == "--help":
print "\naccepted file format(s): ",
for f in file_type:
print f,
continue
if not os.path.isfile(file_name):
print "\n\nsorry, this file doesn't exist. please try again.\n"
continue
if not (file_name.split(".")[-1] in file_type):
print "\nplease provide a file in correct format."
continue
break
return os.path.abspath(file_name)
#get_filename(["txt", "doc"])
# ===========================
# EXAMPLE
# ===========================
'''
[bash]~/Desktop >python get_filename.py
please enter a file name,
or type --help to get a list of the accepted file formats
: --help
accepted file format(s): txt doc
please enter a file name,
or type --help to get a list of the accepted file formats
: test.tx
sorry, this file doesn't exist. please try again.
please enter a file name,
or type --help to get a list of the accepted file formats
: test.txt
[bash]~/Desktop >
'''

View File

@ -1,12 +0,0 @@
# Sebastian Raschka, 03/2014
# Getting the positions of min and max values in a list
import operator
values = [1, 2, 3, 4, 5]
min_index, min_value = min(enumerate(values), key=operator.itemgetter(1))
max_index, max_value = max(enumerate(values), key=operator.itemgetter(1))
print('min_index:', min_index, 'min_value:', min_value)
print('max_index:', max_index, 'max_value:', max_value)

View File

@ -0,0 +1,3 @@
1234
2342
2341

View File

@ -0,0 +1,3 @@
5234
3344
2341

View File

@ -1,11 +0,0 @@
# Sebastian Raschka 08/2014
# Lambda functions are just a short-hand way or writing
# short function definitions
def square_root1(x):
return x**0.5
square_root2 = lambda x: x**0.5
assert(square_root1(9) == square_root2(9))

View File

@ -1,17 +0,0 @@
# Generating a bitstring from a Python list or numpy array
# where all postive values -> 1
# all negative values -> 0
def make_bitstring(ary)
return np.where(ary > 0, 1, 0)
def faster_bitstring(ary)
return np.where(ary > 0).astype('i1')
### Example:
ary1 = np.array([1, 2, 0.3, -1, -2])
make_bitstring(ary1)
# returns array([1, 1, 1, 0, 0])

Binary file not shown.

View File

@ -1,5 +0,0 @@
from collections import namedtuple
my_namedtuple = namedtuple('field_name', ['x', 'y', 'z', 'bla', 'blub'])
p = my_namedtuple(1, 2, 3, 4, 5)
print(p.x, p.y, p.z)

View File

@ -1,15 +0,0 @@
# Sebastian Raschka, 03/2014
def normalize_val(x, data_list):
"""
Normalizes a value to a data list returning a float
between 0.0 and 1.0.
Returns the original object if value is not a integer or float.
"""
if isinstance(x, float) or isinstance(x, int):
numerator = x - min(data_list)
denominator = max(data_list) - min(data_list)
return numerator/denominator
else:
return x

View File

@ -1,36 +0,0 @@
# numpy matrix operations
# sr 12/01/2013
import numpy
ary1 = numpy.array([1,2,3,4,5]) # must be same type
ary2 = numpy.zeros((3,4)) # 3x4 matrix consisiting of 0s
ary3 = numpy.ones((3,4)) # 3x4 matrix consisiting of 1s
ary4 = numpy.identity(3) # 3x3 identity matrix
ary5 = ary1.copy() # make a copy of ary1
item1 = ary3[0, 0] # item in row1, column1
ary2.shape # tuple of dimensions. Here: (3,4)
ary2.size # number of elements. Here: 12
ary2_t = ary2.transpose() # transposes matrix
ary2.ravel() # makes an array linear (1-dimensional)
# by concatenating rows
ary2.reshape(2,6) # reshapes array (must have same dimensions)
ary3[0:2, 0:3] # submatrix of first 2 rows and first 3 columns
ary3 = ary3[[2,0,1]] # re-arrange rows
# element-wise operations
ary1 + ary1
ary1 * ary1
numpy.dot(ary1, ary1) # matrix/vector (dot) product
numpy.sum(ary1) # sums up all elements in the array
numpy.mean(ary1) # average of all elements in the array

View File

@ -1,22 +0,0 @@
# sr 11/19/2013
# common file operations in os and shutil modules
import shutil
import os
# Getting files of particular type from directory
files = [f for f in os.listdir(s_pdb_dir) if f.endswith(".txt")]
# Copy and move
shutil.copyfile("/path/to/file", "/path/to/new/file")
shutil.copy("/path/to/file", "/path/to/directory")
shutil.move("/path/to/file","/path/to/directory")
# Check if file or directory exists
os.path.exists("file or directory")
os.path.isfile("file")
os.path.isdir("directory")
# Working directory and absolute path to files
os.getcwd()
os.path.abspath("file")

File diff suppressed because it is too large Load Diff

View File

@ -1,23 +0,0 @@
# sr 10/29/13
# The pickle module converts Python objects into byte streams
# to save them as a file on your drive for re-use.
#
# module documentation http://docs.python.org/2/library/pickle.html
import pickle
#### Generate some object
my_dict = dict()
for i in range(1,1000):
my_dict[i] = "some text"
#### Save object to file
pickle_out = open('my_file.pkl', 'wb')
pickle.dump(my_dict, pickle_out)
pickle_out.close()
#### Load object from file
my_object_file = open('my_file.pkl', 'rb')
my_dict = pickle.load(my_object_file)
my_object_file.close()

View File

@ -1,24 +0,0 @@
# Sebastian Raschka 04/10/2014
import sys
def give_letter(word):
for letter in word:
yield letter
if sys.version_info[0] == 3:
print('executed in Python 3.x')
test = give_letter('Hello')
print(next(test))
print('in for-loop:')
for l in test:
print(l)
# if Python 2.x
if sys.version_info[0] == 2:
print('executed in Python 2.x')
test = give_letter('Hello')
print(test.next())
print('in for-loop:')
for l in test:
print(l)

View File

@ -1,44 +0,0 @@
# Different methods to read from text files
# sr 11/18/2013
# Python 3.x
# Note: rb opens file in binary mode to avoid issues with Windows systems
# where '\r\n' is used instead of '\n' as newline character(s).
# A) Reading in Byte chunks
reader_a = open("file.txt", "rb")
chunks = []
data = reader_a.read(64) # reads first 64 bytes
while data != "":
chunks.append(data)
data = reader_a.read(64)
if data:
chunks.append(data)
print (len(chunks))
reader_a.close()
# B) Reading whole file at once into a list of lines
with open("file.txt", "rb") as reader_b: # recommended syntax, auto closes
data = reader_b.readlines() # data is assigned a list of lines
print (len(data))
# C) Reading whole file at once into a string
with open("file.txt", "rb") as reader_c:
data = reader_c.read() # data is assigned a list of lines
print (len(data))
# D) Reading line by line into a list
data = []
with open("file.txt", "rb") as reader_d:
for line in reader_d:
data.append(line)
print (len(data))

View File

@ -1,101 +0,0 @@
# Examples for using Python's Regular expression module "re"
# sr 11/30/2013
import re
'''OVERVIEW
'|' means 'or'
'.' matches any single character
'()' groups into substrings
'''
# read in data
fileobj = '''abc mno
def pqr
ghi stu
jkl vwx'''
data = fileobj.strip().split('\n')
# A >> if 's' in line
print (50*'-' + '\nA\n' + 50*'-')
for line in data:
if re.search('abc', line):
print(">>", line)
else:
print(" ", line)
'''
--------------------------------------------------
A
--------------------------------------------------
>> abc mno
def pqr
ghi stu
jkl vwx'''
# B >> if 's' in line or 'r' in line
print (50*'-' + '\nB\n' + 50*'-')
for line in data:
if re.search('abc|efg', line):
print(">>", line)
else:
print(" ", line)
'''
--------------------------------------------------
B
--------------------------------------------------
>> abc mno
def pqr
ghi stu
jkl vwx
---------------'''
# C >>
# use () to remember which object was found and return a match object
print (50*'-' + '\nC\n' + 50*'-')
for line in data:
match = re.search('(abc|efg)', line) # note the parantheses
if match:
print(match.group(1)) # prints 'abc' if found, else None
# match.group(0) is the whole pattern that matched
'''
--------------------------------------------------
C
--------------------------------------------------
abc'''
# read in data
fileobj = '''2013-01-01
2012-02-02
ghi stu
2012-03-03'''
data = fileobj.strip().split('\n')
# D >> use '.' to match 'any character'
print (50*'-' + '\nD\n' + 50*'-')
for line in data:
match = re.search('(2012)-(..)-(..)', line) # note the parantheses
if match:
print(match.group(1), match.group(2), match.group(3))
'''
--------------------------------------------------
D
--------------------------------------------------
2012 02 02
2012 03 03'''

View File

@ -1,127 +0,0 @@
# Examples for using Python's Regular expression module "re"
# sr 11/30/2013
import re
'''OVERVIEW
'*' matches all characters that follow (0 or more)
'+' matches all characters that follow (1 or more)
'?' makes the previous character optional
'{4}' previous character must match exactly 4 times
'{2-4}' previous character must match exactly 2-4 times
'[0-9]' matches all characters in the set of numbers 0 to 9
'[A-Z]' matches all characters in the set of A to Z
'\d' matches all digits, e.g., '4', '9' ...
'\D' matches all NON-digit characters
'\s' matches all space characters: '', '\t', '\r', '\n'
'\S' matches all NON-space characters
'\w' matches all non-punctuation characters (i.e., letters and digits)
'\W' matches all NON-letter and NON-digit characters
'^bla' NOT-matches 'bla'
'let$' matches 'let' but not 'letter'
'\b' matches transition between non-word characters and word characters
'''
data = '''2013-01-01
2012-02-02
aaaa-02-02
aa-02-02
-04-04
2000 02-02
ghi stu
2012-03-03'''.strip().split('\n')
# A >> '*' matches all characters that follow (0 or more)
print (50*'-' + '\nA\n' + 50*'-')
for line in data:
match = re.search('(.*)-(..)-(..)', line) # note the parantheses
if match:
print(match.group(1), match.group(2), match.group(3))
'''
--------------------------------------------------
A
--------------------------------------------------
2013 01 01
2012 02 02
aaaa 02 02
aa 02 02
04 04
2012 03 03
'''
# B >> '+' matches all characters that follow (1 or more)
print (50*'-' + '\nB\n' + 50*'-')
for line in data:
match = re.search('(.+)-(..)-(..)', line) # note the parantheses
if match:
print(match.group(1), match.group(2), match.group(3))
'''
--------------------------------------------------
B
--------------------------------------------------
2013 01 01
2012 02 02
aaaa 02 02
aa 02 02
2012 03 03
'''
# C >> '?' makes the previous character optional
print (50*'-' + '\nC\n' + 50*'-')
for line in data:
match = re.search('(.+)-?(..)-(..)', line) # note the parantheses
if match:
print(match.group(1), match.group(2), match.group(3))
'''
--------------------------------------------------
C
--------------------------------------------------
2013- 01 01
2012- 02 02
aaaa- 02 02
aa- 02 02
- 04 04
2000 02 02
2012- 03 03
'''
# D >> '{4}' previous character must match exactly 4 times
print (50*'-' + '\nD\n' + 50*'-')
for line in data:
match = re.search('(a{4})-(..)-(..)', line) # note the parantheses
if match:
print(match.group(1), match.group(2), match.group(3))
'''
--------------------------------------------------
D
--------------------------------------------------
aaaa 02 02
'''
# E >>'{2-4}' previous character must match exactly 2-4 times
print (50*'-' + '\nE\n' + 50*'-')
for line in data:
match = re.search('(a{2,4})-(..)-(..)', line) # note the parantheses
if match:
print(match.group(1), match.group(2), match.group(3))
'''
--------------------------------------------------
E
--------------------------------------------------
aaaa 02 02
aa 02 02
'''

View File

@ -1,34 +0,0 @@
# Sebastian Raschka 09/02/2014
# Sorting a list of tuples by starting with the last element of the tuple (=reversed tuple)
# Here, we make use of the "key" parameter of the in-built "sorted()" function
# (also available for the ".sort()" method), which let's us define a function
# that is called on every element that is to be sorted. In this case, our
# "key"-function is a simple lambda function that returns the last item
# from every tuple.
a_list = [(1,3,'c'), (2,3,'a'), (3,2,'b'), (2,2,'b')]
sorted_list = sorted(a_list, key=lambda e: e[::-1])
print(sorted_list)
# prints [(2, 3, 'a'), (2, 2, 'b'), (3, 2, 'b'), (1, 3, 'c')]
# If we are only interesting in sorting the list by the last element
# of the tuple and don't care about a "tie" situation, we can also use
# the index of the tuple item directly instead of reversing the tuple
# for efficiency.
a_list = [(1,3,'c'), (2,3,'a'), (3,2,'b'), (2,2,'b')]
sorted_list = sorted(a_list, key=lambda e: e[-1])
print(sorted_list)
# prints [(2, 3, 'a'), (3, 2, 'b'), (2, 2, 'b'), (1, 3, 'c')]

View File

@ -1,39 +0,0 @@
# Sebastian Raschka 2014
"""
You have 3 lists that you want to sort "relative" to each other,
for example, picturing each list as a row in a 3x3 matrix: sort it by columns
########################
If the input lists are
########################
list1 = ['c','b','a']
list2 = [6,5,4]
list3 = ['some-val-associated-with-c','another_val-b','z_another_third_val-a']
########################
the desired outcome is:
########################
['a', 'b', 'c']
[4, 5, 6]
['z_another_third_val-a', 'another_val-b', 'some-val-associated-with-c']
########################
and NOT:
########################
['a', 'b', 'c']
[4, 5, 6]
['another_val-b', 'some-val-associated-with-c', 'z_another_third_val-a']
"""
list1 = ['c','b','a']
list2 = [6,5,4]
list3 = ['some-val-associated-with-c','another_val-b','z_another_third_val-a']
list1, list2, list3 = zip(*sorted(zip(list1, list2, list3)))

View File

@ -1,24 +0,0 @@
# Sebastian Raschka, 03/2014
# comparing string formating: %s and .format()
import timeit
format_res = timeit.timeit("['{}'.format(i) for i in range(10000)]", number=1000)
binaryop_res = timeit.timeit("['%s' %i for i in range(10000)]", number=1000)
print('{}: {}\n{}: {}'.format('format()', format_res, '%s', binaryop_res))
################################
# On my machine
################################
#
# Python 3.4.0
# MacOS X 10.9.2
# 2.5 GHz Intel Core i5
# 4 GB 1600 Mhz DDR3
#
################################
# format(): 2.815331667999999
# %s: 1.630353775999538
################################

View File

@ -1,24 +0,0 @@
>>> import this
"""
The Zen of Python, by Tim Peters
Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases aren't special enough to break the rules.
Although practicality beats purity.
Errors should never pass silently.
Unless explicitly silenced.
In the face of ambiguity, refuse the temptation to guess.
There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!
"""