mirror of
https://github.com/rasbt/python_reference.git
synced 2024-11-23 20:11:13 +00:00
regular expressions
This commit is contained in:
parent
1fade9f841
commit
e007b92563
15
.gitignore
vendored
Executable file
15
.gitignore
vendored
Executable file
|
@ -0,0 +1,15 @@
|
|||
*.pyc
|
||||
*.o
|
||||
*.so
|
||||
*.gcno
|
||||
*.swp
|
||||
*.egg-info
|
||||
*.egg
|
||||
*~
|
||||
build
|
||||
dist
|
||||
lib/test
|
||||
doc/_build
|
||||
*.log
|
||||
*env
|
||||
*ENV
|
24
cmd_line_args_1_sysarg.py
Normal file
24
cmd_line_args_1_sysarg.py
Normal file
|
@ -0,0 +1,24 @@
|
|||
# Getting command line arguments via sys.arg
|
||||
# sr 11/30/2013
|
||||
|
||||
import sys
|
||||
|
||||
def error(msg):
|
||||
"""Prints error message, sends it to stderr, and quites the program."""
|
||||
sys.exit(msg)
|
||||
|
||||
|
||||
args = sys.argv[1:] # sys.argv[0] is the name of the python script itself
|
||||
|
||||
try:
|
||||
arg1 = int(args[0])
|
||||
arg2 = args[1]
|
||||
arg3 = args[2]
|
||||
print("Everything okay!")
|
||||
|
||||
except ValueError:
|
||||
error("First argument must be integer type!")
|
||||
|
||||
except IndexError:
|
||||
error("Requires 3 arguments!")
|
||||
|
101
reg_expr_1_basics.py
Normal file
101
reg_expr_1_basics.py
Normal file
|
@ -0,0 +1,101 @@
|
|||
# Examples for using Python's Regular expression module "re"
|
||||
# sr 11/30/2013
|
||||
|
||||
import re
|
||||
|
||||
'''OVERVIEW
|
||||
'|' means 'or'
|
||||
'.' matches any single character
|
||||
'()' groups into substrings
|
||||
'''
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# read in data
|
||||
fileobj = '''abc mno
|
||||
def pqr
|
||||
ghi stu
|
||||
jkl vwx'''
|
||||
|
||||
data = fileobj.strip().split('\n')
|
||||
|
||||
|
||||
# A >> if 's' in line
|
||||
print (50*'-' + '\nA\n' + 50*'-')
|
||||
for line in data:
|
||||
if re.search('abc', line):
|
||||
print(">>", line)
|
||||
else:
|
||||
print(" ", line)
|
||||
|
||||
'''
|
||||
--------------------------------------------------
|
||||
A
|
||||
--------------------------------------------------
|
||||
>> abc mno
|
||||
def pqr
|
||||
ghi stu
|
||||
jkl vwx'''
|
||||
|
||||
|
||||
|
||||
# B >> if 's' in line or 'r' in line
|
||||
print (50*'-' + '\nB\n' + 50*'-')
|
||||
for line in data:
|
||||
if re.search('abc|efg', line):
|
||||
print(">>", line)
|
||||
else:
|
||||
print(" ", line)
|
||||
|
||||
'''
|
||||
--------------------------------------------------
|
||||
B
|
||||
--------------------------------------------------
|
||||
>> abc mno
|
||||
def pqr
|
||||
ghi stu
|
||||
jkl vwx
|
||||
---------------'''
|
||||
|
||||
|
||||
# C >>
|
||||
# use () to remember which object was found and return a match object
|
||||
print (50*'-' + '\nC\n' + 50*'-')
|
||||
for line in data:
|
||||
match = re.search('(abc|efg)', line) # note the parantheses
|
||||
if match:
|
||||
print(match.group(1)) # prints 'abc' if found, else None
|
||||
# match.group(0) is the whole pattern that matched
|
||||
|
||||
'''
|
||||
--------------------------------------------------
|
||||
C
|
||||
--------------------------------------------------
|
||||
abc'''
|
||||
|
||||
|
||||
|
||||
# read in data
|
||||
fileobj = '''2013-01-01
|
||||
2012-02-02
|
||||
ghi stu
|
||||
2012-03-03'''
|
||||
|
||||
data = fileobj.strip().split('\n')
|
||||
|
||||
|
||||
# D >> use '.' to match 'any character'
|
||||
print (50*'-' + '\nD\n' + 50*'-')
|
||||
for line in data:
|
||||
match = re.search('(2012)-(..)-(..)', line) # note the parantheses
|
||||
if match:
|
||||
print(match.group(1), match.group(2), match.group(3))
|
||||
|
||||
'''
|
||||
--------------------------------------------------
|
||||
D
|
||||
--------------------------------------------------
|
||||
2012 02 02
|
||||
2012 03 03'''
|
118
reg_expr_2_operators.py
Normal file
118
reg_expr_2_operators.py
Normal file
|
@ -0,0 +1,118 @@
|
|||
# Examples for using Python's Regular expression module "re"
|
||||
# sr 11/30/2013
|
||||
|
||||
import re
|
||||
|
||||
'''OVERVIEW
|
||||
'*' matches all characters that follow (0 or more)
|
||||
'+' matches all characters that follow (1 or more)
|
||||
'?' makes the previous character optional
|
||||
'{4}' previous character must match exactly 4 times
|
||||
'{2-4}' previous character must match exactly 2-4 times
|
||||
'[0-9]' matches all characters in the set of numbers 0 to 9
|
||||
'[A-Z]' matches all characters in the set of A to Z
|
||||
|
||||
'''
|
||||
|
||||
data = '''2013-01-01
|
||||
2012-02-02
|
||||
aaaa-02-02
|
||||
aa-02-02
|
||||
-04-04
|
||||
2000 02-02
|
||||
ghi stu
|
||||
2012-03-03'''.strip().split('\n')
|
||||
|
||||
|
||||
# A >> '*' matches all characters that follow (0 or more)
|
||||
print (50*'-' + '\nA\n' + 50*'-')
|
||||
|
||||
for line in data:
|
||||
match = re.search('(.*)-(..)-(..)', line) # note the parantheses
|
||||
if match:
|
||||
print(match.group(1), match.group(2), match.group(3))
|
||||
|
||||
'''
|
||||
--------------------------------------------------
|
||||
A
|
||||
--------------------------------------------------
|
||||
2013 01 01
|
||||
2012 02 02
|
||||
aaaa 02 02
|
||||
aa 02 02
|
||||
04 04
|
||||
2012 03 03
|
||||
'''
|
||||
|
||||
|
||||
# B >> '+' matches all characters that follow (1 or more)
|
||||
print (50*'-' + '\nB\n' + 50*'-')
|
||||
|
||||
for line in data:
|
||||
match = re.search('(.+)-(..)-(..)', line) # note the parantheses
|
||||
if match:
|
||||
print(match.group(1), match.group(2), match.group(3))
|
||||
|
||||
'''
|
||||
--------------------------------------------------
|
||||
B
|
||||
--------------------------------------------------
|
||||
2013 01 01
|
||||
2012 02 02
|
||||
aaaa 02 02
|
||||
aa 02 02
|
||||
2012 03 03
|
||||
'''
|
||||
|
||||
|
||||
# C >> '?' makes the previous character optional
|
||||
print (50*'-' + '\nC\n' + 50*'-')
|
||||
|
||||
for line in data:
|
||||
match = re.search('(.+)-?(..)-(..)', line) # note the parantheses
|
||||
if match:
|
||||
print(match.group(1), match.group(2), match.group(3))
|
||||
|
||||
'''
|
||||
--------------------------------------------------
|
||||
C
|
||||
--------------------------------------------------
|
||||
2013- 01 01
|
||||
2012- 02 02
|
||||
aaaa- 02 02
|
||||
aa- 02 02
|
||||
- 04 04
|
||||
2000 02 02
|
||||
2012- 03 03
|
||||
'''
|
||||
|
||||
# D >> '{4}' previous character must match exactly 4 times
|
||||
print (50*'-' + '\nD\n' + 50*'-')
|
||||
|
||||
for line in data:
|
||||
match = re.search('(a{4})-(..)-(..)', line) # note the parantheses
|
||||
if match:
|
||||
print(match.group(1), match.group(2), match.group(3))
|
||||
|
||||
'''
|
||||
--------------------------------------------------
|
||||
D
|
||||
--------------------------------------------------
|
||||
aaaa 02 02
|
||||
'''
|
||||
|
||||
# E >>'{2-4}' previous character must match exactly 2-4 times
|
||||
print (50*'-' + '\nE\n' + 50*'-')
|
||||
|
||||
for line in data:
|
||||
match = re.search('(a{2,4})-(..)-(..)', line) # note the parantheses
|
||||
if match:
|
||||
print(match.group(1), match.group(2), match.group(3))
|
||||
|
||||
'''
|
||||
--------------------------------------------------
|
||||
E
|
||||
--------------------------------------------------
|
||||
aaaa 02 02
|
||||
aa 02 02
|
||||
'''
|
Loading…
Reference in New Issue
Block a user