mirror of
https://github.com/rasbt/python_reference.git
synced 2025-04-15 00:57:38 +00:00
html tag regex
This commit is contained in:
parent
6e5414c44d
commit
76da7ee6e0
@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"name": "",
|
"name": "",
|
||||||
"signature": "sha256:9fd7d5201ce5b97fadad65f2c30cfec993fc83907e04418b032bd1bbdac05ff4"
|
"signature": "sha256:8f1ee7a7bfaeca0ee3e12b2387445faa10632d57277d59af6dbfdca9732e5910"
|
||||||
},
|
},
|
||||||
"nbformat": 3,
|
"nbformat": 3,
|
||||||
"nbformat_minor": 0,
|
"nbformat_minor": 0,
|
||||||
@ -41,7 +41,7 @@
|
|||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"stream": "stdout",
|
"stream": "stdout",
|
||||||
"text": [
|
"text": [
|
||||||
"Last updated: 06/07/2014 10:07:02 EDT\n",
|
"Last updated: 06/07/2014 12:24:58 EDT\n",
|
||||||
"\n",
|
"\n",
|
||||||
"CPython 3.4.1\n",
|
"CPython 3.4.1\n",
|
||||||
"IPython 2.1.0\n"
|
"IPython 2.1.0\n"
|
||||||
@ -264,7 +264,9 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"A regular expression to check for file extensions."
|
"A regular expression to check for file extensions. \n",
|
||||||
|
"\n",
|
||||||
|
"Note: This approach is not recommended for thorough limitation of file types (parse the file header instead). However, this regex is still a useful alternative to e.g., a Python's `endswith` approach for quick pre-filtering for certain files of interest."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -746,14 +748,6 @@
|
|||||||
"<br>"
|
"<br>"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"cell_type": "heading",
|
|
||||||
"level": 2,
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Time"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@ -810,6 +804,49 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"for t in str_true:\n",
|
"for t in str_true:\n",
|
||||||
" assert(bool(re.match(pattern, t)) == True), '%s is not True' %t\n",
|
" assert(bool(re.match(pattern, t)) == True), '%s is not True' %t\n",
|
||||||
|
"for f in str_false:\n",
|
||||||
|
" assert(bool(re.match(pattern, f)) == False), '%s is not False' %f"
|
||||||
|
],
|
||||||
|
"language": "python",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"prompt_number": 18
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"<br>\n",
|
||||||
|
"<br>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "heading",
|
||||||
|
"level": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Checking for HTML tags"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"[[back to top](#Sections)]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"collapsed": false,
|
||||||
|
"input": [
|
||||||
|
"pattern = r\"\"\"</?\\w+((\\s+\\w+(\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)+\\s*|\\s*)/?>\"\"\"\n",
|
||||||
|
"\n",
|
||||||
|
"str_true = ('<a>', '<a href=\"something\">', '</a>', '<img src>')\n",
|
||||||
|
" \n",
|
||||||
|
"str_false = ('a>', '<a ', '< a >')\n",
|
||||||
|
"\n",
|
||||||
|
"for t in str_true:\n",
|
||||||
|
" assert(bool(re.match(pattern, t)) == True), '%s is not True' %t\n",
|
||||||
"\n",
|
"\n",
|
||||||
"for f in str_false:\n",
|
"for f in str_false:\n",
|
||||||
" assert(bool(re.match(pattern, f)) == False), '%s is not False' %f"
|
" assert(bool(re.match(pattern, f)) == False), '%s is not False' %f"
|
||||||
@ -817,7 +854,14 @@
|
|||||||
"language": "python",
|
"language": "python",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"prompt_number": 33
|
"prompt_number": 16
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"<font size=\"1px\">source: [http://haacked.com/archive/2004/10/25/usingregularexpressionstomatchhtml.aspx/](http://haacked.com/archive/2004/10/25/usingregularexpressionstomatchhtml.aspx/)</font>"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {}
|
"metadata": {}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user