diff --git a/.ipynb_checkpoints/timeit_tests-checkpoint.ipynb b/.ipynb_checkpoints/timeit_tests-checkpoint.ipynb
index f9b9ce7..0e7bd18 100644
--- a/.ipynb_checkpoints/timeit_tests-checkpoint.ipynb
+++ b/.ipynb_checkpoints/timeit_tests-checkpoint.ipynb
@@ -1,7 +1,7 @@
{
"metadata": {
"name": "",
- "signature": "sha256:faa7cde23cd1ec6598ad386b44eabe2d06ad86d6bdd84c7d999dd174c7df48a6"
+ "signature": "sha256:c5925e90d7a8c3e6fb36f4e3ca876248f4ba69529367d87b73ef3da2b5415766"
},
"nbformat": 3,
"nbformat_minor": 0,
@@ -64,7 +64,8 @@
" - [Creating lists using conditional statements](#create_cond_list)\n",
"- [Dictionary operations](#dict_ops) \n",
" - [Adding elements to a dictionary](#adding_dict_elements)\n",
- "- [Comprehensions vs. for-loops](#comprehensions)"
+ "- [Comprehensions vs. for-loops](#comprehensions)\n",
+ "- [Copying files by searching directory trees](#find_copy)"
]
},
{
@@ -1023,6 +1024,14 @@
"# Comprehesions vs. for-loops"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Comprehensions are not only shorter and prettier than ye goode olde for-loop, \n",
+ "but they are also up to ~1.2x faster."
+ ]
+ },
{
"cell_type": "code",
"collapsed": false,
@@ -1126,7 +1135,7 @@
"language": "python",
"metadata": {},
"outputs": [],
- "prompt_number": 11
+ "prompt_number": 23
},
{
"cell_type": "code",
@@ -1138,7 +1147,7 @@
"language": "python",
"metadata": {},
"outputs": [],
- "prompt_number": 12
+ "prompt_number": 24
},
{
"cell_type": "code",
@@ -1154,8 +1163,8 @@
"output_type": "stream",
"stream": "stdout",
"text": [
- "10000 loops, best of 3: 130 \u00b5s per loop\n",
- "10000 loops, best of 3: 114 \u00b5s per loop"
+ "10000 loops, best of 3: 129 \u00b5s per loop\n",
+ "10000 loops, best of 3: 111 \u00b5s per loop"
]
},
{
@@ -1166,7 +1175,7 @@
]
}
],
- "prompt_number": 14
+ "prompt_number": 25
},
{
"cell_type": "markdown",
@@ -1189,7 +1198,7 @@
"language": "python",
"metadata": {},
"outputs": [],
- "prompt_number": 15
+ "prompt_number": 26
},
{
"cell_type": "code",
@@ -1201,7 +1210,7 @@
"language": "python",
"metadata": {},
"outputs": [],
- "prompt_number": 17
+ "prompt_number": 27
},
{
"cell_type": "code",
@@ -1217,8 +1226,8 @@
"output_type": "stream",
"stream": "stdout",
"text": [
- "10000 loops, best of 3: 120 \u00b5s per loop\n",
- "10000 loops, best of 3: 118 \u00b5s per loop"
+ "10000 loops, best of 3: 121 \u00b5s per loop\n",
+ "10000 loops, best of 3: 127 \u00b5s per loop"
]
},
{
@@ -1229,15 +1238,145 @@
]
}
],
- "prompt_number": 18
+ "prompt_number": 28
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n",
+ "
\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Copying files by searching directory trees"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Executing `Unix`/`Linux` shell commands:"
+ ]
},
{
"cell_type": "code",
"collapsed": false,
- "input": [],
+ "input": [
+ "import subprocess\n",
+ "\n",
+ "def subprocess_findcopy(path, search_str, dest): \n",
+ " query = 'find %s -name \"%s\" -exec cp {}\" %s \\;' %(path, search_str, dest)\n",
+ " subprocess.call(query, shell=True)\n",
+ " return "
+ ],
"language": "python",
"metadata": {},
- "outputs": []
+ "outputs": [],
+ "prompt_number": 2
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Using Python's `os.walk()` to search the directory tree recursively and matching patterns via `fnmatch.filter()`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "import shutil\n",
+ "import os\n",
+ "import fnmatch\n",
+ "\n",
+ "def walk_findcopy(path, search_str, dest):\n",
+ " for path, subdirs, files in os.walk(path):\n",
+ " for name in fnmatch.filter(files, search_str):\n",
+ " shutil.copy(os.path.join(path,name), dest)"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 3
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "import timeit\n",
+ "\n",
+ "print('small tree')\n",
+ "inpath = '/Users/sebastian/Desktop/testdir_in'\n",
+ "outpath = '/Users/sebastian/Desktop/testdir_out'\n",
+ "searchstr = '*.png'\n",
+ "%timeit subprocess_findcopy(inpath, searchstr, outpath)\n",
+ "%timeit walk_findcopy(inpath, searchstr, outpath)\n",
+ "\n",
+ "print('\\nlarger tree')\n",
+ "inpath = '/Users/sebastian/Dropbox'\n",
+ "outpath = '/Users/sebastian/Desktop/testdir_out'\n",
+ "searchstr = '*.csv'\n",
+ "%timeit subprocess_findcopy(inpath, searchstr, outpath)\n",
+ "%timeit walk_findcopy(inpath, searchstr, outpath)"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "stream",
+ "stream": "stdout",
+ "text": [
+ "small tree\n",
+ "100 loops, best of 3: 8.48 ms per loop"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "stream": "stdout",
+ "text": [
+ "\n",
+ "100 loops, best of 3: 22.3 ms per loop"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "stream": "stdout",
+ "text": [
+ "\n",
+ "\n",
+ "larger tree\n",
+ "10 loops, best of 3: 7.13 ms per loop"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "stream": "stdout",
+ "text": [
+ "\n",
+ "1 loops, best of 3: 413 ms per loop"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "stream": "stdout",
+ "text": [
+ "\n"
+ ]
+ }
+ ],
+ "prompt_number": 4
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "I have to say that I am really positvively surprised. The shell's `find` scales even better than expected!"
+ ]
}
],
"metadata": {}
diff --git a/benchmarks/timeit_tests.ipynb b/benchmarks/timeit_tests.ipynb
index 06ab26d..0e7bd18 100644
--- a/benchmarks/timeit_tests.ipynb
+++ b/benchmarks/timeit_tests.ipynb
@@ -1,7 +1,7 @@
{
"metadata": {
"name": "",
- "signature": "sha256:4f74947620f3ebd04a28a448392a201107339760170f1eb74e815a3e8b8267e8"
+ "signature": "sha256:c5925e90d7a8c3e6fb36f4e3ca876248f4ba69529367d87b73ef3da2b5415766"
},
"nbformat": 3,
"nbformat_minor": 0,
@@ -64,7 +64,8 @@
" - [Creating lists using conditional statements](#create_cond_list)\n",
"- [Dictionary operations](#dict_ops) \n",
" - [Adding elements to a dictionary](#adding_dict_elements)\n",
- "- [Comprehensions vs. for-loops](#comprehensions)"
+ "- [Comprehensions vs. for-loops](#comprehensions)\n",
+ "- [Copying files by searching directory trees](#find_copy)"
]
},
{
@@ -1239,13 +1240,143 @@
],
"prompt_number": 28
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n",
+ "
\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Copying files by searching directory trees"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Executing `Unix`/`Linux` shell commands:"
+ ]
+ },
{
"cell_type": "code",
"collapsed": false,
- "input": [],
+ "input": [
+ "import subprocess\n",
+ "\n",
+ "def subprocess_findcopy(path, search_str, dest): \n",
+ " query = 'find %s -name \"%s\" -exec cp {}\" %s \\;' %(path, search_str, dest)\n",
+ " subprocess.call(query, shell=True)\n",
+ " return "
+ ],
"language": "python",
"metadata": {},
- "outputs": []
+ "outputs": [],
+ "prompt_number": 2
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Using Python's `os.walk()` to search the directory tree recursively and matching patterns via `fnmatch.filter()`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "import shutil\n",
+ "import os\n",
+ "import fnmatch\n",
+ "\n",
+ "def walk_findcopy(path, search_str, dest):\n",
+ " for path, subdirs, files in os.walk(path):\n",
+ " for name in fnmatch.filter(files, search_str):\n",
+ " shutil.copy(os.path.join(path,name), dest)"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 3
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "import timeit\n",
+ "\n",
+ "print('small tree')\n",
+ "inpath = '/Users/sebastian/Desktop/testdir_in'\n",
+ "outpath = '/Users/sebastian/Desktop/testdir_out'\n",
+ "searchstr = '*.png'\n",
+ "%timeit subprocess_findcopy(inpath, searchstr, outpath)\n",
+ "%timeit walk_findcopy(inpath, searchstr, outpath)\n",
+ "\n",
+ "print('\\nlarger tree')\n",
+ "inpath = '/Users/sebastian/Dropbox'\n",
+ "outpath = '/Users/sebastian/Desktop/testdir_out'\n",
+ "searchstr = '*.csv'\n",
+ "%timeit subprocess_findcopy(inpath, searchstr, outpath)\n",
+ "%timeit walk_findcopy(inpath, searchstr, outpath)"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "stream",
+ "stream": "stdout",
+ "text": [
+ "small tree\n",
+ "100 loops, best of 3: 8.48 ms per loop"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "stream": "stdout",
+ "text": [
+ "\n",
+ "100 loops, best of 3: 22.3 ms per loop"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "stream": "stdout",
+ "text": [
+ "\n",
+ "\n",
+ "larger tree\n",
+ "10 loops, best of 3: 7.13 ms per loop"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "stream": "stdout",
+ "text": [
+ "\n",
+ "1 loops, best of 3: 413 ms per loop"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "stream": "stdout",
+ "text": [
+ "\n"
+ ]
+ }
+ ],
+ "prompt_number": 4
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "I have to say that I am really positvively surprised. The shell's `find` scales even better than expected!"
+ ]
}
],
"metadata": {}