diff --git a/.ipynb_checkpoints/timeit_tests-checkpoint.ipynb b/.ipynb_checkpoints/timeit_tests-checkpoint.ipynb index f9b9ce7..0e7bd18 100644 --- a/.ipynb_checkpoints/timeit_tests-checkpoint.ipynb +++ b/.ipynb_checkpoints/timeit_tests-checkpoint.ipynb @@ -1,7 +1,7 @@ { "metadata": { "name": "", - "signature": "sha256:faa7cde23cd1ec6598ad386b44eabe2d06ad86d6bdd84c7d999dd174c7df48a6" + "signature": "sha256:c5925e90d7a8c3e6fb36f4e3ca876248f4ba69529367d87b73ef3da2b5415766" }, "nbformat": 3, "nbformat_minor": 0, @@ -64,7 +64,8 @@ " - [Creating lists using conditional statements](#create_cond_list)\n", "- [Dictionary operations](#dict_ops) \n", " - [Adding elements to a dictionary](#adding_dict_elements)\n", - "- [Comprehensions vs. for-loops](#comprehensions)" + "- [Comprehensions vs. for-loops](#comprehensions)\n", + "- [Copying files by searching directory trees](#find_copy)" ] }, { @@ -1023,6 +1024,14 @@ "# Comprehesions vs. for-loops" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Comprehensions are not only shorter and prettier than ye goode olde for-loop, \n", + "but they are also up to ~1.2x faster." + ] + }, { "cell_type": "code", "collapsed": false, @@ -1126,7 +1135,7 @@ "language": "python", "metadata": {}, "outputs": [], - "prompt_number": 11 + "prompt_number": 23 }, { "cell_type": "code", @@ -1138,7 +1147,7 @@ "language": "python", "metadata": {}, "outputs": [], - "prompt_number": 12 + "prompt_number": 24 }, { "cell_type": "code", @@ -1154,8 +1163,8 @@ "output_type": "stream", "stream": "stdout", "text": [ - "10000 loops, best of 3: 130 \u00b5s per loop\n", - "10000 loops, best of 3: 114 \u00b5s per loop" + "10000 loops, best of 3: 129 \u00b5s per loop\n", + "10000 loops, best of 3: 111 \u00b5s per loop" ] }, { @@ -1166,7 +1175,7 @@ ] } ], - "prompt_number": 14 + "prompt_number": 25 }, { "cell_type": "markdown", @@ -1189,7 +1198,7 @@ "language": "python", "metadata": {}, "outputs": [], - "prompt_number": 15 + "prompt_number": 26 }, { "cell_type": "code", @@ -1201,7 +1210,7 @@ "language": "python", "metadata": {}, "outputs": [], - "prompt_number": 17 + "prompt_number": 27 }, { "cell_type": "code", @@ -1217,8 +1226,8 @@ "output_type": "stream", "stream": "stdout", "text": [ - "10000 loops, best of 3: 120 \u00b5s per loop\n", - "10000 loops, best of 3: 118 \u00b5s per loop" + "10000 loops, best of 3: 121 \u00b5s per loop\n", + "10000 loops, best of 3: 127 \u00b5s per loop" ] }, { @@ -1229,15 +1238,145 @@ ] } ], - "prompt_number": 18 + "prompt_number": 28 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "
\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Copying files by searching directory trees" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Executing `Unix`/`Linux` shell commands:" + ] }, { "cell_type": "code", "collapsed": false, - "input": [], + "input": [ + "import subprocess\n", + "\n", + "def subprocess_findcopy(path, search_str, dest): \n", + " query = 'find %s -name \"%s\" -exec cp {}\" %s \\;' %(path, search_str, dest)\n", + " subprocess.call(query, shell=True)\n", + " return " + ], "language": "python", "metadata": {}, - "outputs": [] + "outputs": [], + "prompt_number": 2 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Using Python's `os.walk()` to search the directory tree recursively and matching patterns via `fnmatch.filter()`" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import shutil\n", + "import os\n", + "import fnmatch\n", + "\n", + "def walk_findcopy(path, search_str, dest):\n", + " for path, subdirs, files in os.walk(path):\n", + " for name in fnmatch.filter(files, search_str):\n", + " shutil.copy(os.path.join(path,name), dest)" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 3 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import timeit\n", + "\n", + "print('small tree')\n", + "inpath = '/Users/sebastian/Desktop/testdir_in'\n", + "outpath = '/Users/sebastian/Desktop/testdir_out'\n", + "searchstr = '*.png'\n", + "%timeit subprocess_findcopy(inpath, searchstr, outpath)\n", + "%timeit walk_findcopy(inpath, searchstr, outpath)\n", + "\n", + "print('\\nlarger tree')\n", + "inpath = '/Users/sebastian/Dropbox'\n", + "outpath = '/Users/sebastian/Desktop/testdir_out'\n", + "searchstr = '*.csv'\n", + "%timeit subprocess_findcopy(inpath, searchstr, outpath)\n", + "%timeit walk_findcopy(inpath, searchstr, outpath)" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "small tree\n", + "100 loops, best of 3: 8.48 ms per loop" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "100 loops, best of 3: 22.3 ms per loop" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "\n", + "larger tree\n", + "10 loops, best of 3: 7.13 ms per loop" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "1 loops, best of 3: 413 ms per loop" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n" + ] + } + ], + "prompt_number": 4 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "I have to say that I am really positvively surprised. The shell's `find` scales even better than expected!" + ] } ], "metadata": {} diff --git a/benchmarks/timeit_tests.ipynb b/benchmarks/timeit_tests.ipynb index 06ab26d..0e7bd18 100644 --- a/benchmarks/timeit_tests.ipynb +++ b/benchmarks/timeit_tests.ipynb @@ -1,7 +1,7 @@ { "metadata": { "name": "", - "signature": "sha256:4f74947620f3ebd04a28a448392a201107339760170f1eb74e815a3e8b8267e8" + "signature": "sha256:c5925e90d7a8c3e6fb36f4e3ca876248f4ba69529367d87b73ef3da2b5415766" }, "nbformat": 3, "nbformat_minor": 0, @@ -64,7 +64,8 @@ " - [Creating lists using conditional statements](#create_cond_list)\n", "- [Dictionary operations](#dict_ops) \n", " - [Adding elements to a dictionary](#adding_dict_elements)\n", - "- [Comprehensions vs. for-loops](#comprehensions)" + "- [Comprehensions vs. for-loops](#comprehensions)\n", + "- [Copying files by searching directory trees](#find_copy)" ] }, { @@ -1239,13 +1240,143 @@ ], "prompt_number": 28 }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "
\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Copying files by searching directory trees" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Executing `Unix`/`Linux` shell commands:" + ] + }, { "cell_type": "code", "collapsed": false, - "input": [], + "input": [ + "import subprocess\n", + "\n", + "def subprocess_findcopy(path, search_str, dest): \n", + " query = 'find %s -name \"%s\" -exec cp {}\" %s \\;' %(path, search_str, dest)\n", + " subprocess.call(query, shell=True)\n", + " return " + ], "language": "python", "metadata": {}, - "outputs": [] + "outputs": [], + "prompt_number": 2 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Using Python's `os.walk()` to search the directory tree recursively and matching patterns via `fnmatch.filter()`" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import shutil\n", + "import os\n", + "import fnmatch\n", + "\n", + "def walk_findcopy(path, search_str, dest):\n", + " for path, subdirs, files in os.walk(path):\n", + " for name in fnmatch.filter(files, search_str):\n", + " shutil.copy(os.path.join(path,name), dest)" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 3 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import timeit\n", + "\n", + "print('small tree')\n", + "inpath = '/Users/sebastian/Desktop/testdir_in'\n", + "outpath = '/Users/sebastian/Desktop/testdir_out'\n", + "searchstr = '*.png'\n", + "%timeit subprocess_findcopy(inpath, searchstr, outpath)\n", + "%timeit walk_findcopy(inpath, searchstr, outpath)\n", + "\n", + "print('\\nlarger tree')\n", + "inpath = '/Users/sebastian/Dropbox'\n", + "outpath = '/Users/sebastian/Desktop/testdir_out'\n", + "searchstr = '*.csv'\n", + "%timeit subprocess_findcopy(inpath, searchstr, outpath)\n", + "%timeit walk_findcopy(inpath, searchstr, outpath)" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "small tree\n", + "100 loops, best of 3: 8.48 ms per loop" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "100 loops, best of 3: 22.3 ms per loop" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "\n", + "larger tree\n", + "10 loops, best of 3: 7.13 ms per loop" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "1 loops, best of 3: 413 ms per loop" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n" + ] + } + ], + "prompt_number": 4 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "I have to say that I am really positvively surprised. The shell's `find` scales even better than expected!" + ] } ], "metadata": {}