diff --git a/.ipynb_checkpoints/timeit_tests-checkpoint.ipynb b/.ipynb_checkpoints/timeit_tests-checkpoint.ipynb index 12ba838..8371eb3 100644 --- a/.ipynb_checkpoints/timeit_tests-checkpoint.ipynb +++ b/.ipynb_checkpoints/timeit_tests-checkpoint.ipynb @@ -1,7 +1,7 @@ { "metadata": { "name": "", - "signature": "sha256:d5895f75b2ac58db150d7b521682366a447ffb2fb0b7db7e551edd40e6d1ab10" + "signature": "sha256:8dc4f91bc6a88e15ab0d25fac35b9a7645a7149b5ab4e1e15b2b372362e82ae2" }, "nbformat": 3, "nbformat_minor": 0, @@ -855,63 +855,64 @@ "collapsed": false, "input": [ "import random\n", - "import copy\n", "import timeit\n", + "from collections import defaultdict\n", "\n", "\n", - "\n", - "def add_element_check1(my_dict, elements):\n", + "def add_element_check1(elements):\n", + " d = dict()\n", " for e in elements:\n", - " if e not in my_dict:\n", - " my_dict[e] = 1\n", + " if e not in d:\n", + " d[e] = 1\n", " else:\n", - " my_dict[e] += 1\n", + " d[e] += 1\n", + " return d\n", " \n", - "def add_element_check2(my_dict, elements):\n", + "def add_element_check2(elements):\n", + " d = dict()\n", " for e in elements:\n", - " if e not in my_dict:\n", - " my_dict[e] = 0\n", - " my_dict[e] += 1 \n", - "\n", - "def add_element_except(my_dict, elements):\n", + " if e not in d:\n", + " d[e] = 0\n", + " d[e] += 1 \n", + " return d\n", + " \n", + "def add_element_except(elements):\n", + " d = dict()\n", " for e in elements:\n", " try:\n", - " my_dict[e] += 1\n", + " d[e] += 1\n", " except KeyError:\n", - " my_dict[e] = 1\n", + " d[e] = 1\n", + " return d\n", " \n", + "def add_element_defaultdict(elements):\n", + " d = defaultdict(int)\n", + " for e in elements:\n", + " d[e] += 1\n", + " return d\n", "\n", "random.seed(123)\n", - "rand_ints = [random.randrange(1, 10) for i in range(100)]\n", - "empty_dict = {}\n", "\n", "print('Results for 100 integers in range 1-10') \n", - "%timeit add_element_check1(copy.deepcopy(empty_dict), rand_ints)\n", - "%timeit add_element_check2(copy.deepcopy(empty_dict), rand_ints)\n", - "%timeit add_element_except(copy.deepcopy(empty_dict), rand_ints)\n", - " \n", - "print('\\nResults for 1000 integers in range 1-10') \n", - "rand_ints = [random.randrange(1, 10) for i in range(1000)]\n", - "empty_dict = {}\n", + "rand_ints = [random.randrange(1, 10) for i in range(100)]\n", + "%timeit add_element_check1(rand_ints)\n", + "%timeit add_element_check2(rand_ints)\n", + "%timeit add_element_except(rand_ints)\n", + "%timeit add_element_defaultdict(rand_ints)\n", "\n", - "%timeit add_element_check1(copy.deepcopy(empty_dict), rand_ints)\n", - "%timeit add_element_check2(copy.deepcopy(empty_dict), rand_ints)\n", - "%timeit add_element_except(copy.deepcopy(empty_dict), rand_ints)\n", + "print('\\nResults for 1000 integers in range 1-5') \n", + "rand_ints = [random.randrange(1, 5) for i in range(1000)]\n", + "%timeit add_element_check1(rand_ints)\n", + "%timeit add_element_check2(rand_ints)\n", + "%timeit add_element_except(rand_ints)\n", + "%timeit add_element_defaultdict(rand_ints)\n", "\n", "print('\\nResults for 1000 integers in range 1-1000') \n", - "rand_ints = [random.randrange(1, 10) for i in range(1000)]\n", - "empty_dict = {}\n", - "\n", - "%timeit add_element_check1(copy.deepcopy(empty_dict), rand_ints)\n", - "%timeit add_element_check2(copy.deepcopy(empty_dict), rand_ints)\n", - "%timeit add_element_except(copy.deepcopy(empty_dict), rand_ints)\n", - "\n", - "#\n", - "# Python 3.4.0\n", - "# MacOS X 10.9.2\n", - "# 2.5 GHz Intel Core i5\n", - "# 4 GB 1600 Mhz DDR3\n", - "#" + "rand_ints = [random.randrange(1, 1000) for i in range(1000)]\n", + "%timeit add_element_check1(rand_ints)\n", + "%timeit add_element_check2(rand_ints)\n", + "%timeit add_element_except(rand_ints)\n", + "%timeit add_element_defaultdict(rand_ints)" ], "language": "python", "metadata": {}, @@ -921,7 +922,7 @@ "stream": "stdout", "text": [ "Results for 100 integers in range 1-10\n", - "100000 loops, best of 3: 16.6 \u00b5s per loop" + "10000 loops, best of 3: 24.6 \u00b5s per loop" ] }, { @@ -929,7 +930,7 @@ "stream": "stdout", "text": [ "\n", - "100000 loops, best of 3: 17.6 \u00b5s per loop" + "10000 loops, best of 3: 26.2 \u00b5s per loop" ] }, { @@ -937,7 +938,15 @@ "stream": "stdout", "text": [ "\n", - "100000 loops, best of 3: 17.9 \u00b5s per loop" + "10000 loops, best of 3: 25.4 \u00b5s per loop" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "10000 loops, best of 3: 23 \u00b5s per loop" ] }, { @@ -946,8 +955,8 @@ "text": [ "\n", "\n", - "Results for 1000 integers in range 1-10\n", - "10000 loops, best of 3: 135 \u00b5s per loop" + "Results for 1000 integers in range 1-5\n", + "1000 loops, best of 3: 236 \u00b5s per loop" ] }, { @@ -955,7 +964,7 @@ "stream": "stdout", "text": [ "\n", - "10000 loops, best of 3: 125 \u00b5s per loop" + "1000 loops, best of 3: 235 \u00b5s per loop" ] }, { @@ -963,7 +972,15 @@ "stream": "stdout", "text": [ "\n", - "10000 loops, best of 3: 105 \u00b5s per loop" + "1000 loops, best of 3: 207 \u00b5s per loop" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "10000 loops, best of 3: 177 \u00b5s per loop" ] }, { @@ -973,7 +990,7 @@ "\n", "\n", "Results for 1000 integers in range 1-1000\n", - "10000 loops, best of 3: 122 \u00b5s per loop" + "1000 loops, best of 3: 268 \u00b5s per loop" ] }, { @@ -981,7 +998,7 @@ "stream": "stdout", "text": [ "\n", - "10000 loops, best of 3: 123 \u00b5s per loop" + "1000 loops, best of 3: 377 \u00b5s per loop" ] }, { @@ -989,7 +1006,15 @@ "stream": "stdout", "text": [ "\n", - "10000 loops, best of 3: 104 \u00b5s per loop" + "1000 loops, best of 3: 511 \u00b5s per loop" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "1000 loops, best of 3: 410 \u00b5s per loop" ] }, { @@ -1000,14 +1025,21 @@ ] } ], - "prompt_number": 13 + "prompt_number": 16 }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Conclusion\n", - "Interestingly, the `try-except` loop pays off if we have more elements (here: 1000 integers instead of 100) as dictionary keys to check. Also, it doesn't matter much whether the elements exist or do not exist in the dictionary, yet." + "### Conclusion" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We see from the results that the `try-except` variant is faster than then the `if element in my_dict` alternative if we have a low number of unique elements (here: 1000 integers in the range 1-5), which makes sense: the `except`-block is skipped if an element is already added as a key to the dictionary. However, in this case the `collections.defaultdict` has even a better performance. \n", + "However, if we are having a relative large number of unique entries(here: 1000 integers in range 1-1000), the `if element in my_dict` approach outperforms the alternative approaches." ] }, { diff --git a/benchmarks/timeit_tests.ipynb b/benchmarks/timeit_tests.ipynb index 12ba838..8371eb3 100644 --- a/benchmarks/timeit_tests.ipynb +++ b/benchmarks/timeit_tests.ipynb @@ -1,7 +1,7 @@ { "metadata": { "name": "", - "signature": "sha256:d5895f75b2ac58db150d7b521682366a447ffb2fb0b7db7e551edd40e6d1ab10" + "signature": "sha256:8dc4f91bc6a88e15ab0d25fac35b9a7645a7149b5ab4e1e15b2b372362e82ae2" }, "nbformat": 3, "nbformat_minor": 0, @@ -855,63 +855,64 @@ "collapsed": false, "input": [ "import random\n", - "import copy\n", "import timeit\n", + "from collections import defaultdict\n", "\n", "\n", - "\n", - "def add_element_check1(my_dict, elements):\n", + "def add_element_check1(elements):\n", + " d = dict()\n", " for e in elements:\n", - " if e not in my_dict:\n", - " my_dict[e] = 1\n", + " if e not in d:\n", + " d[e] = 1\n", " else:\n", - " my_dict[e] += 1\n", + " d[e] += 1\n", + " return d\n", " \n", - "def add_element_check2(my_dict, elements):\n", + "def add_element_check2(elements):\n", + " d = dict()\n", " for e in elements:\n", - " if e not in my_dict:\n", - " my_dict[e] = 0\n", - " my_dict[e] += 1 \n", - "\n", - "def add_element_except(my_dict, elements):\n", + " if e not in d:\n", + " d[e] = 0\n", + " d[e] += 1 \n", + " return d\n", + " \n", + "def add_element_except(elements):\n", + " d = dict()\n", " for e in elements:\n", " try:\n", - " my_dict[e] += 1\n", + " d[e] += 1\n", " except KeyError:\n", - " my_dict[e] = 1\n", + " d[e] = 1\n", + " return d\n", " \n", + "def add_element_defaultdict(elements):\n", + " d = defaultdict(int)\n", + " for e in elements:\n", + " d[e] += 1\n", + " return d\n", "\n", "random.seed(123)\n", - "rand_ints = [random.randrange(1, 10) for i in range(100)]\n", - "empty_dict = {}\n", "\n", "print('Results for 100 integers in range 1-10') \n", - "%timeit add_element_check1(copy.deepcopy(empty_dict), rand_ints)\n", - "%timeit add_element_check2(copy.deepcopy(empty_dict), rand_ints)\n", - "%timeit add_element_except(copy.deepcopy(empty_dict), rand_ints)\n", - " \n", - "print('\\nResults for 1000 integers in range 1-10') \n", - "rand_ints = [random.randrange(1, 10) for i in range(1000)]\n", - "empty_dict = {}\n", + "rand_ints = [random.randrange(1, 10) for i in range(100)]\n", + "%timeit add_element_check1(rand_ints)\n", + "%timeit add_element_check2(rand_ints)\n", + "%timeit add_element_except(rand_ints)\n", + "%timeit add_element_defaultdict(rand_ints)\n", "\n", - "%timeit add_element_check1(copy.deepcopy(empty_dict), rand_ints)\n", - "%timeit add_element_check2(copy.deepcopy(empty_dict), rand_ints)\n", - "%timeit add_element_except(copy.deepcopy(empty_dict), rand_ints)\n", + "print('\\nResults for 1000 integers in range 1-5') \n", + "rand_ints = [random.randrange(1, 5) for i in range(1000)]\n", + "%timeit add_element_check1(rand_ints)\n", + "%timeit add_element_check2(rand_ints)\n", + "%timeit add_element_except(rand_ints)\n", + "%timeit add_element_defaultdict(rand_ints)\n", "\n", "print('\\nResults for 1000 integers in range 1-1000') \n", - "rand_ints = [random.randrange(1, 10) for i in range(1000)]\n", - "empty_dict = {}\n", - "\n", - "%timeit add_element_check1(copy.deepcopy(empty_dict), rand_ints)\n", - "%timeit add_element_check2(copy.deepcopy(empty_dict), rand_ints)\n", - "%timeit add_element_except(copy.deepcopy(empty_dict), rand_ints)\n", - "\n", - "#\n", - "# Python 3.4.0\n", - "# MacOS X 10.9.2\n", - "# 2.5 GHz Intel Core i5\n", - "# 4 GB 1600 Mhz DDR3\n", - "#" + "rand_ints = [random.randrange(1, 1000) for i in range(1000)]\n", + "%timeit add_element_check1(rand_ints)\n", + "%timeit add_element_check2(rand_ints)\n", + "%timeit add_element_except(rand_ints)\n", + "%timeit add_element_defaultdict(rand_ints)" ], "language": "python", "metadata": {}, @@ -921,7 +922,7 @@ "stream": "stdout", "text": [ "Results for 100 integers in range 1-10\n", - "100000 loops, best of 3: 16.6 \u00b5s per loop" + "10000 loops, best of 3: 24.6 \u00b5s per loop" ] }, { @@ -929,7 +930,7 @@ "stream": "stdout", "text": [ "\n", - "100000 loops, best of 3: 17.6 \u00b5s per loop" + "10000 loops, best of 3: 26.2 \u00b5s per loop" ] }, { @@ -937,7 +938,15 @@ "stream": "stdout", "text": [ "\n", - "100000 loops, best of 3: 17.9 \u00b5s per loop" + "10000 loops, best of 3: 25.4 \u00b5s per loop" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "10000 loops, best of 3: 23 \u00b5s per loop" ] }, { @@ -946,8 +955,8 @@ "text": [ "\n", "\n", - "Results for 1000 integers in range 1-10\n", - "10000 loops, best of 3: 135 \u00b5s per loop" + "Results for 1000 integers in range 1-5\n", + "1000 loops, best of 3: 236 \u00b5s per loop" ] }, { @@ -955,7 +964,7 @@ "stream": "stdout", "text": [ "\n", - "10000 loops, best of 3: 125 \u00b5s per loop" + "1000 loops, best of 3: 235 \u00b5s per loop" ] }, { @@ -963,7 +972,15 @@ "stream": "stdout", "text": [ "\n", - "10000 loops, best of 3: 105 \u00b5s per loop" + "1000 loops, best of 3: 207 \u00b5s per loop" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "10000 loops, best of 3: 177 \u00b5s per loop" ] }, { @@ -973,7 +990,7 @@ "\n", "\n", "Results for 1000 integers in range 1-1000\n", - "10000 loops, best of 3: 122 \u00b5s per loop" + "1000 loops, best of 3: 268 \u00b5s per loop" ] }, { @@ -981,7 +998,7 @@ "stream": "stdout", "text": [ "\n", - "10000 loops, best of 3: 123 \u00b5s per loop" + "1000 loops, best of 3: 377 \u00b5s per loop" ] }, { @@ -989,7 +1006,15 @@ "stream": "stdout", "text": [ "\n", - "10000 loops, best of 3: 104 \u00b5s per loop" + "1000 loops, best of 3: 511 \u00b5s per loop" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "1000 loops, best of 3: 410 \u00b5s per loop" ] }, { @@ -1000,14 +1025,21 @@ ] } ], - "prompt_number": 13 + "prompt_number": 16 }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Conclusion\n", - "Interestingly, the `try-except` loop pays off if we have more elements (here: 1000 integers instead of 100) as dictionary keys to check. Also, it doesn't matter much whether the elements exist or do not exist in the dictionary, yet." + "### Conclusion" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We see from the results that the `try-except` variant is faster than then the `if element in my_dict` alternative if we have a low number of unique elements (here: 1000 integers in the range 1-5), which makes sense: the `except`-block is skipped if an element is already added as a key to the dictionary. However, in this case the `collections.defaultdict` has even a better performance. \n", + "However, if we are having a relative large number of unique entries(here: 1000 integers in range 1-1000), the `if element in my_dict` approach outperforms the alternative approaches." ] }, {