From 27283e2422d4773456419bf7cb8ee6845a54aff6 Mon Sep 17 00:00:00 2001 From: rasbt Date: Sat, 30 Nov 2013 20:32:42 -0500 Subject: [PATCH] regular expressions --- .gitignore | 15 +++++ cmd_line_args_1_sysarg.py | 24 ++++++++ reg_expr_1_basics.py | 101 +++++++++++++++++++++++++++++++++ reg_expr_2_operators.py | 115 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 255 insertions(+) create mode 100755 .gitignore create mode 100644 cmd_line_args_1_sysarg.py create mode 100644 reg_expr_1_basics.py create mode 100644 reg_expr_2_operators.py diff --git a/.gitignore b/.gitignore new file mode 100755 index 0000000..d6aa04d --- /dev/null +++ b/.gitignore @@ -0,0 +1,15 @@ +*.pyc +*.o +*.so +*.gcno +*.swp +*.egg-info +*.egg +*~ +build +dist +lib/test +doc/_build +*.log +*env +*ENV diff --git a/cmd_line_args_1_sysarg.py b/cmd_line_args_1_sysarg.py new file mode 100644 index 0000000..b8f8cbf --- /dev/null +++ b/cmd_line_args_1_sysarg.py @@ -0,0 +1,24 @@ +# Getting command line arguments via sys.arg +# sr 11/30/2013 + +import sys + +def error(msg): + """Prints error message, sends it to stderr, and quites the program.""" + sys.exit(msg) + + +args = sys.argv[1:] # sys.argv[0] is the name of the python script itself + +try: + arg1 = int(args[0]) + arg2 = args[1] + arg3 = args[2] + print("Everything okay!") + +except ValueError: + error("First argument must be integer type!") + +except IndexError: + error("Requires 3 arguments!") + diff --git a/reg_expr_1_basics.py b/reg_expr_1_basics.py new file mode 100644 index 0000000..5fafab8 --- /dev/null +++ b/reg_expr_1_basics.py @@ -0,0 +1,101 @@ +# Examples for using Python's Regular expression module "re" +# sr 11/30/2013 + +import re + +'''OVERVIEW + '|' means 'or' + '.' matches any single character + '()' groups into substrings +''' + + + + + +# read in data +fileobj = '''abc mno +def pqr +ghi stu +jkl vwx''' + +data = fileobj.strip().split('\n') + + +# A >> if 's' in line +print (50*'-' + '\nA\n' + 50*'-') +for line in data: + if re.search('abc', line): + print(">>", line) + else: + print(" ", line) + +''' +-------------------------------------------------- +A +-------------------------------------------------- +>> abc mno + def pqr + ghi stu + jkl vwx''' + + + +# B >> if 's' in line or 'r' in line +print (50*'-' + '\nB\n' + 50*'-') +for line in data: + if re.search('abc|efg', line): + print(">>", line) + else: + print(" ", line) + +''' +-------------------------------------------------- +B +-------------------------------------------------- +>> abc mno + def pqr + ghi stu + jkl vwx +---------------''' + + +# C >> +# use () to remember which object was found and return a match object +print (50*'-' + '\nC\n' + 50*'-') +for line in data: + match = re.search('(abc|efg)', line) # note the parantheses + if match: + print(match.group(1)) # prints 'abc' if found, else None + # match.group(0) is the whole pattern that matched + +''' +-------------------------------------------------- +C +-------------------------------------------------- +abc''' + + + +# read in data +fileobj = '''2013-01-01 +2012-02-02 +ghi stu +2012-03-03''' + +data = fileobj.strip().split('\n') + + +# D >> use '.' to match 'any character' +print (50*'-' + '\nD\n' + 50*'-') +for line in data: + match = re.search('(2012)-(..)-(..)', line) # note the parantheses + if match: + print(match.group(1), match.group(2), match.group(3)) + +''' +-------------------------------------------------- +D +-------------------------------------------------- +2012 02 02 +2012 03 03''' diff --git a/reg_expr_2_operators.py b/reg_expr_2_operators.py new file mode 100644 index 0000000..40eaf2c --- /dev/null +++ b/reg_expr_2_operators.py @@ -0,0 +1,115 @@ +# Examples for using Python's Regular expression module "re" +# sr 11/30/2013 + +import re + +'''OVERVIEW + '*' matches all characters that follow (0 or more) + '+' matches all characters that follow (1 or more) + '?' makes the previous character optional + '{4}' previous character must match exactly 4 times + '{2-4}' previous character must match exactly 2-4 times +''' + +data = '''2013-01-01 +2012-02-02 +aaaa-02-02 +aa-02-02 +-04-04 +2000 02-02 +ghi stu +2012-03-03'''.strip().split('\n') + + +# A >> '*' matches all characters that follow (0 or more) +print (50*'-' + '\nA\n' + 50*'-') + +for line in data: + match = re.search('(.*)-(..)-(..)', line) # note the parantheses + if match: + print(match.group(1), match.group(2), match.group(3)) + +''' +-------------------------------------------------- +A +-------------------------------------------------- +2013 01 01 +2012 02 02 +aaaa 02 02 +aa 02 02 + 04 04 +2012 03 03 +''' + + +# B >> '+' matches all characters that follow (1 or more) +print (50*'-' + '\nB\n' + 50*'-') + +for line in data: + match = re.search('(.+)-(..)-(..)', line) # note the parantheses + if match: + print(match.group(1), match.group(2), match.group(3)) + +''' +-------------------------------------------------- +B +-------------------------------------------------- +2013 01 01 +2012 02 02 +aaaa 02 02 +aa 02 02 +2012 03 03 +''' + + +# C >> '?' makes the previous character optional +print (50*'-' + '\nC\n' + 50*'-') + +for line in data: + match = re.search('(.+)-?(..)-(..)', line) # note the parantheses + if match: + print(match.group(1), match.group(2), match.group(3)) + +''' +-------------------------------------------------- +C +-------------------------------------------------- +2013- 01 01 +2012- 02 02 +aaaa- 02 02 +aa- 02 02 +- 04 04 +2000 02 02 +2012- 03 03 +''' + +# D >> '{4}' previous character must match exactly 4 times +print (50*'-' + '\nD\n' + 50*'-') + +for line in data: + match = re.search('(a{4})-(..)-(..)', line) # note the parantheses + if match: + print(match.group(1), match.group(2), match.group(3)) + +''' +-------------------------------------------------- +D +-------------------------------------------------- +aaaa 02 02 +''' + +# E >>'{2-4}' previous character must match exactly 2-4 times +print (50*'-' + '\nE\n' + 50*'-') + +for line in data: + match = re.search('(a{2,4})-(..)-(..)', line) # note the parantheses + if match: + print(match.group(1), match.group(2), match.group(3)) + +''' +-------------------------------------------------- +E +-------------------------------------------------- +aaaa 02 02 +aa 02 02 +'''