regular expressions

This commit is contained in:
rasbt 2013-11-30 20:32:42 -05:00
parent 1fade9f841
commit e007b92563
4 changed files with 258 additions and 0 deletions

15
.gitignore vendored Executable file
View File

@ -0,0 +1,15 @@
*.pyc
*.o
*.so
*.gcno
*.swp
*.egg-info
*.egg
*~
build
dist
lib/test
doc/_build
*.log
*env
*ENV

24
cmd_line_args_1_sysarg.py Normal file
View File

@ -0,0 +1,24 @@
# Getting command line arguments via sys.arg
# sr 11/30/2013
import sys
def error(msg):
"""Prints error message, sends it to stderr, and quites the program."""
sys.exit(msg)
args = sys.argv[1:] # sys.argv[0] is the name of the python script itself
try:
arg1 = int(args[0])
arg2 = args[1]
arg3 = args[2]
print("Everything okay!")
except ValueError:
error("First argument must be integer type!")
except IndexError:
error("Requires 3 arguments!")

101
reg_expr_1_basics.py Normal file
View File

@ -0,0 +1,101 @@
# Examples for using Python's Regular expression module "re"
# sr 11/30/2013
import re
'''OVERVIEW
'|' means 'or'
'.' matches any single character
'()' groups into substrings
'''
# read in data
fileobj = '''abc mno
def pqr
ghi stu
jkl vwx'''
data = fileobj.strip().split('\n')
# A >> if 's' in line
print (50*'-' + '\nA\n' + 50*'-')
for line in data:
if re.search('abc', line):
print(">>", line)
else:
print(" ", line)
'''
--------------------------------------------------
A
--------------------------------------------------
>> abc mno
def pqr
ghi stu
jkl vwx'''
# B >> if 's' in line or 'r' in line
print (50*'-' + '\nB\n' + 50*'-')
for line in data:
if re.search('abc|efg', line):
print(">>", line)
else:
print(" ", line)
'''
--------------------------------------------------
B
--------------------------------------------------
>> abc mno
def pqr
ghi stu
jkl vwx
---------------'''
# C >>
# use () to remember which object was found and return a match object
print (50*'-' + '\nC\n' + 50*'-')
for line in data:
match = re.search('(abc|efg)', line) # note the parantheses
if match:
print(match.group(1)) # prints 'abc' if found, else None
# match.group(0) is the whole pattern that matched
'''
--------------------------------------------------
C
--------------------------------------------------
abc'''
# read in data
fileobj = '''2013-01-01
2012-02-02
ghi stu
2012-03-03'''
data = fileobj.strip().split('\n')
# D >> use '.' to match 'any character'
print (50*'-' + '\nD\n' + 50*'-')
for line in data:
match = re.search('(2012)-(..)-(..)', line) # note the parantheses
if match:
print(match.group(1), match.group(2), match.group(3))
'''
--------------------------------------------------
D
--------------------------------------------------
2012 02 02
2012 03 03'''

118
reg_expr_2_operators.py Normal file
View File

@ -0,0 +1,118 @@
# Examples for using Python's Regular expression module "re"
# sr 11/30/2013
import re
'''OVERVIEW
'*' matches all characters that follow (0 or more)
'+' matches all characters that follow (1 or more)
'?' makes the previous character optional
'{4}' previous character must match exactly 4 times
'{2-4}' previous character must match exactly 2-4 times
'[0-9]' matches all characters in the set of numbers 0 to 9
'[A-Z]' matches all characters in the set of A to Z
'''
data = '''2013-01-01
2012-02-02
aaaa-02-02
aa-02-02
-04-04
2000 02-02
ghi stu
2012-03-03'''.strip().split('\n')
# A >> '*' matches all characters that follow (0 or more)
print (50*'-' + '\nA\n' + 50*'-')
for line in data:
match = re.search('(.*)-(..)-(..)', line) # note the parantheses
if match:
print(match.group(1), match.group(2), match.group(3))
'''
--------------------------------------------------
A
--------------------------------------------------
2013 01 01
2012 02 02
aaaa 02 02
aa 02 02
04 04
2012 03 03
'''
# B >> '+' matches all characters that follow (1 or more)
print (50*'-' + '\nB\n' + 50*'-')
for line in data:
match = re.search('(.+)-(..)-(..)', line) # note the parantheses
if match:
print(match.group(1), match.group(2), match.group(3))
'''
--------------------------------------------------
B
--------------------------------------------------
2013 01 01
2012 02 02
aaaa 02 02
aa 02 02
2012 03 03
'''
# C >> '?' makes the previous character optional
print (50*'-' + '\nC\n' + 50*'-')
for line in data:
match = re.search('(.+)-?(..)-(..)', line) # note the parantheses
if match:
print(match.group(1), match.group(2), match.group(3))
'''
--------------------------------------------------
C
--------------------------------------------------
2013- 01 01
2012- 02 02
aaaa- 02 02
aa- 02 02
- 04 04
2000 02 02
2012- 03 03
'''
# D >> '{4}' previous character must match exactly 4 times
print (50*'-' + '\nD\n' + 50*'-')
for line in data:
match = re.search('(a{4})-(..)-(..)', line) # note the parantheses
if match:
print(match.group(1), match.group(2), match.group(3))
'''
--------------------------------------------------
D
--------------------------------------------------
aaaa 02 02
'''
# E >>'{2-4}' previous character must match exactly 2-4 times
print (50*'-' + '\nE\n' + 50*'-')
for line in data:
match = re.search('(a{2,4})-(..)-(..)', line) # note the parantheses
if match:
print(match.group(1), match.group(2), match.group(3))
'''
--------------------------------------------------
E
--------------------------------------------------
aaaa 02 02
aa 02 02
'''