diff --git a/tutorials/useful_regex.ipynb b/tutorials/useful_regex.ipynb index 1f4c880..91e7dc7 100644 --- a/tutorials/useful_regex.ipynb +++ b/tutorials/useful_regex.ipynb @@ -1,7 +1,7 @@ { "metadata": { "name": "", - "signature": "sha256:9fd7d5201ce5b97fadad65f2c30cfec993fc83907e04418b032bd1bbdac05ff4" + "signature": "sha256:8f1ee7a7bfaeca0ee3e12b2387445faa10632d57277d59af6dbfdca9732e5910" }, "nbformat": 3, "nbformat_minor": 0, @@ -41,7 +41,7 @@ "output_type": "stream", "stream": "stdout", "text": [ - "Last updated: 06/07/2014 10:07:02 EDT\n", + "Last updated: 06/07/2014 12:24:58 EDT\n", "\n", "CPython 3.4.1\n", "IPython 2.1.0\n" @@ -264,7 +264,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "A regular expression to check for file extensions." + "A regular expression to check for file extensions. \n", + "\n", + "Note: This approach is not recommended for thorough limitation of file types (parse the file header instead). However, this regex is still a useful alternative to e.g., a Python's `endswith` approach for quick pre-filtering for certain files of interest." ] }, { @@ -746,14 +748,6 @@ "
" ] }, - { - "cell_type": "heading", - "level": 2, - "metadata": {}, - "source": [ - "Time" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -810,6 +804,49 @@ "\n", "for t in str_true:\n", " assert(bool(re.match(pattern, t)) == True), '%s is not True' %t\n", + "for f in str_false:\n", + " assert(bool(re.match(pattern, f)) == False), '%s is not False' %f" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 18 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "
" + ] + }, + { + "cell_type": "heading", + "level": 2, + "metadata": {}, + "source": [ + "Checking for HTML tags" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[[back to top](#Sections)]" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "pattern = r\"\"\"\\s]+))?)+\\s*|\\s*)/?>\"\"\"\n", + "\n", + "str_true = ('', '', '', '')\n", + " \n", + "str_false = ('a>', '')\n", + "\n", + "for t in str_true:\n", + " assert(bool(re.match(pattern, t)) == True), '%s is not True' %t\n", "\n", "for f in str_false:\n", " assert(bool(re.match(pattern, f)) == False), '%s is not False' %f" @@ -817,7 +854,14 @@ "language": "python", "metadata": {}, "outputs": [], - "prompt_number": 33 + "prompt_number": 16 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "source: [http://haacked.com/archive/2004/10/25/usingregularexpressionstomatchhtml.aspx/](http://haacked.com/archive/2004/10/25/usingregularexpressionstomatchhtml.aspx/)" + ] } ], "metadata": {}