diff --git a/tutorials/useful_regex.ipynb b/tutorials/useful_regex.ipynb
index 1f4c880..91e7dc7 100644
--- a/tutorials/useful_regex.ipynb
+++ b/tutorials/useful_regex.ipynb
@@ -1,7 +1,7 @@
{
"metadata": {
"name": "",
- "signature": "sha256:9fd7d5201ce5b97fadad65f2c30cfec993fc83907e04418b032bd1bbdac05ff4"
+ "signature": "sha256:8f1ee7a7bfaeca0ee3e12b2387445faa10632d57277d59af6dbfdca9732e5910"
},
"nbformat": 3,
"nbformat_minor": 0,
@@ -41,7 +41,7 @@
"output_type": "stream",
"stream": "stdout",
"text": [
- "Last updated: 06/07/2014 10:07:02 EDT\n",
+ "Last updated: 06/07/2014 12:24:58 EDT\n",
"\n",
"CPython 3.4.1\n",
"IPython 2.1.0\n"
@@ -264,7 +264,9 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "A regular expression to check for file extensions."
+ "A regular expression to check for file extensions. \n",
+ "\n",
+ "Note: This approach is not recommended for thorough limitation of file types (parse the file header instead). However, this regex is still a useful alternative to e.g., a Python's `endswith` approach for quick pre-filtering for certain files of interest."
]
},
{
@@ -746,14 +748,6 @@
"
"
]
},
- {
- "cell_type": "heading",
- "level": 2,
- "metadata": {},
- "source": [
- "Time"
- ]
- },
{
"cell_type": "markdown",
"metadata": {},
@@ -810,6 +804,49 @@
"\n",
"for t in str_true:\n",
" assert(bool(re.match(pattern, t)) == True), '%s is not True' %t\n",
+ "for f in str_false:\n",
+ " assert(bool(re.match(pattern, f)) == False), '%s is not False' %f"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 18
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "heading",
+ "level": 2,
+ "metadata": {},
+ "source": [
+ "Checking for HTML tags"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "[[back to top](#Sections)]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "pattern = r\"\"\"?\\w+((\\s+\\w+(\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)+\\s*|\\s*)/?>\"\"\"\n",
+ "\n",
+ "str_true = ('', '', '', '
')\n",
+ " \n",
+ "str_false = ('a>', '')\n",
+ "\n",
+ "for t in str_true:\n",
+ " assert(bool(re.match(pattern, t)) == True), '%s is not True' %t\n",
"\n",
"for f in str_false:\n",
" assert(bool(re.match(pattern, f)) == False), '%s is not False' %f"
@@ -817,7 +854,14 @@
"language": "python",
"metadata": {},
"outputs": [],
- "prompt_number": 33
+ "prompt_number": 16
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "source: [http://haacked.com/archive/2004/10/25/usingregularexpressionstomatchhtml.aspx/](http://haacked.com/archive/2004/10/25/usingregularexpressionstomatchhtml.aspx/)"
+ ]
}
],
"metadata": {}