defaultdict

This commit is contained in:
rasbt 2014-05-01 21:25:08 -04:00
parent eb35644f7b
commit 409d953a22
2 changed files with 170 additions and 106 deletions

View File

@ -1,7 +1,7 @@
{
"metadata": {
"name": "",
"signature": "sha256:d5895f75b2ac58db150d7b521682366a447ffb2fb0b7db7e551edd40e6d1ab10"
"signature": "sha256:8dc4f91bc6a88e15ab0d25fac35b9a7645a7149b5ab4e1e15b2b372362e82ae2"
},
"nbformat": 3,
"nbformat_minor": 0,
@ -855,63 +855,64 @@
"collapsed": false,
"input": [
"import random\n",
"import copy\n",
"import timeit\n",
"from collections import defaultdict\n",
"\n",
"\n",
"\n",
"def add_element_check1(my_dict, elements):\n",
"def add_element_check1(elements):\n",
" d = dict()\n",
" for e in elements:\n",
" if e not in my_dict:\n",
" my_dict[e] = 1\n",
" if e not in d:\n",
" d[e] = 1\n",
" else:\n",
" my_dict[e] += 1\n",
" d[e] += 1\n",
" return d\n",
" \n",
"def add_element_check2(my_dict, elements):\n",
"def add_element_check2(elements):\n",
" d = dict()\n",
" for e in elements:\n",
" if e not in my_dict:\n",
" my_dict[e] = 0\n",
" my_dict[e] += 1 \n",
"\n",
"def add_element_except(my_dict, elements):\n",
" if e not in d:\n",
" d[e] = 0\n",
" d[e] += 1 \n",
" return d\n",
" \n",
"def add_element_except(elements):\n",
" d = dict()\n",
" for e in elements:\n",
" try:\n",
" my_dict[e] += 1\n",
" d[e] += 1\n",
" except KeyError:\n",
" my_dict[e] = 1\n",
" d[e] = 1\n",
" return d\n",
" \n",
"def add_element_defaultdict(elements):\n",
" d = defaultdict(int)\n",
" for e in elements:\n",
" d[e] += 1\n",
" return d\n",
"\n",
"random.seed(123)\n",
"rand_ints = [random.randrange(1, 10) for i in range(100)]\n",
"empty_dict = {}\n",
"\n",
"print('Results for 100 integers in range 1-10') \n",
"%timeit add_element_check1(copy.deepcopy(empty_dict), rand_ints)\n",
"%timeit add_element_check2(copy.deepcopy(empty_dict), rand_ints)\n",
"%timeit add_element_except(copy.deepcopy(empty_dict), rand_ints)\n",
" \n",
"print('\\nResults for 1000 integers in range 1-10') \n",
"rand_ints = [random.randrange(1, 10) for i in range(1000)]\n",
"empty_dict = {}\n",
"rand_ints = [random.randrange(1, 10) for i in range(100)]\n",
"%timeit add_element_check1(rand_ints)\n",
"%timeit add_element_check2(rand_ints)\n",
"%timeit add_element_except(rand_ints)\n",
"%timeit add_element_defaultdict(rand_ints)\n",
"\n",
"%timeit add_element_check1(copy.deepcopy(empty_dict), rand_ints)\n",
"%timeit add_element_check2(copy.deepcopy(empty_dict), rand_ints)\n",
"%timeit add_element_except(copy.deepcopy(empty_dict), rand_ints)\n",
"print('\\nResults for 1000 integers in range 1-5') \n",
"rand_ints = [random.randrange(1, 5) for i in range(1000)]\n",
"%timeit add_element_check1(rand_ints)\n",
"%timeit add_element_check2(rand_ints)\n",
"%timeit add_element_except(rand_ints)\n",
"%timeit add_element_defaultdict(rand_ints)\n",
"\n",
"print('\\nResults for 1000 integers in range 1-1000') \n",
"rand_ints = [random.randrange(1, 10) for i in range(1000)]\n",
"empty_dict = {}\n",
"\n",
"%timeit add_element_check1(copy.deepcopy(empty_dict), rand_ints)\n",
"%timeit add_element_check2(copy.deepcopy(empty_dict), rand_ints)\n",
"%timeit add_element_except(copy.deepcopy(empty_dict), rand_ints)\n",
"\n",
"#\n",
"# Python 3.4.0\n",
"# MacOS X 10.9.2\n",
"# 2.5 GHz Intel Core i5\n",
"# 4 GB 1600 Mhz DDR3\n",
"#"
"rand_ints = [random.randrange(1, 1000) for i in range(1000)]\n",
"%timeit add_element_check1(rand_ints)\n",
"%timeit add_element_check2(rand_ints)\n",
"%timeit add_element_except(rand_ints)\n",
"%timeit add_element_defaultdict(rand_ints)"
],
"language": "python",
"metadata": {},
@ -921,7 +922,7 @@
"stream": "stdout",
"text": [
"Results for 100 integers in range 1-10\n",
"100000 loops, best of 3: 16.6 \u00b5s per loop"
"10000 loops, best of 3: 24.6 \u00b5s per loop"
]
},
{
@ -929,7 +930,7 @@
"stream": "stdout",
"text": [
"\n",
"100000 loops, best of 3: 17.6 \u00b5s per loop"
"10000 loops, best of 3: 26.2 \u00b5s per loop"
]
},
{
@ -937,7 +938,15 @@
"stream": "stdout",
"text": [
"\n",
"100000 loops, best of 3: 17.9 \u00b5s per loop"
"10000 loops, best of 3: 25.4 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"10000 loops, best of 3: 23 \u00b5s per loop"
]
},
{
@ -946,8 +955,8 @@
"text": [
"\n",
"\n",
"Results for 1000 integers in range 1-10\n",
"10000 loops, best of 3: 135 \u00b5s per loop"
"Results for 1000 integers in range 1-5\n",
"1000 loops, best of 3: 236 \u00b5s per loop"
]
},
{
@ -955,7 +964,7 @@
"stream": "stdout",
"text": [
"\n",
"10000 loops, best of 3: 125 \u00b5s per loop"
"1000 loops, best of 3: 235 \u00b5s per loop"
]
},
{
@ -963,7 +972,15 @@
"stream": "stdout",
"text": [
"\n",
"10000 loops, best of 3: 105 \u00b5s per loop"
"1000 loops, best of 3: 207 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"10000 loops, best of 3: 177 \u00b5s per loop"
]
},
{
@ -973,7 +990,7 @@
"\n",
"\n",
"Results for 1000 integers in range 1-1000\n",
"10000 loops, best of 3: 122 \u00b5s per loop"
"1000 loops, best of 3: 268 \u00b5s per loop"
]
},
{
@ -981,7 +998,7 @@
"stream": "stdout",
"text": [
"\n",
"10000 loops, best of 3: 123 \u00b5s per loop"
"1000 loops, best of 3: 377 \u00b5s per loop"
]
},
{
@ -989,7 +1006,15 @@
"stream": "stdout",
"text": [
"\n",
"10000 loops, best of 3: 104 \u00b5s per loop"
"1000 loops, best of 3: 511 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"1000 loops, best of 3: 410 \u00b5s per loop"
]
},
{
@ -1000,14 +1025,21 @@
]
}
],
"prompt_number": 13
"prompt_number": 16
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Conclusion\n",
"Interestingly, the `try-except` loop pays off if we have more elements (here: 1000 integers instead of 100) as dictionary keys to check. Also, it doesn't matter much whether the elements exist or do not exist in the dictionary, yet."
"### Conclusion"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We see from the results that the `try-except` variant is faster than then the `if element in my_dict` alternative if we have a low number of unique elements (here: 1000 integers in the range 1-5), which makes sense: the `except`-block is skipped if an element is already added as a key to the dictionary. However, in this case the `collections.defaultdict` has even a better performance. \n",
"However, if we are having a relative large number of unique entries(here: 1000 integers in range 1-1000), the `if element in my_dict` approach outperforms the alternative approaches."
]
},
{

View File

@ -1,7 +1,7 @@
{
"metadata": {
"name": "",
"signature": "sha256:d5895f75b2ac58db150d7b521682366a447ffb2fb0b7db7e551edd40e6d1ab10"
"signature": "sha256:8dc4f91bc6a88e15ab0d25fac35b9a7645a7149b5ab4e1e15b2b372362e82ae2"
},
"nbformat": 3,
"nbformat_minor": 0,
@ -855,63 +855,64 @@
"collapsed": false,
"input": [
"import random\n",
"import copy\n",
"import timeit\n",
"from collections import defaultdict\n",
"\n",
"\n",
"\n",
"def add_element_check1(my_dict, elements):\n",
"def add_element_check1(elements):\n",
" d = dict()\n",
" for e in elements:\n",
" if e not in my_dict:\n",
" my_dict[e] = 1\n",
" if e not in d:\n",
" d[e] = 1\n",
" else:\n",
" my_dict[e] += 1\n",
" d[e] += 1\n",
" return d\n",
" \n",
"def add_element_check2(my_dict, elements):\n",
"def add_element_check2(elements):\n",
" d = dict()\n",
" for e in elements:\n",
" if e not in my_dict:\n",
" my_dict[e] = 0\n",
" my_dict[e] += 1 \n",
"\n",
"def add_element_except(my_dict, elements):\n",
" if e not in d:\n",
" d[e] = 0\n",
" d[e] += 1 \n",
" return d\n",
" \n",
"def add_element_except(elements):\n",
" d = dict()\n",
" for e in elements:\n",
" try:\n",
" my_dict[e] += 1\n",
" d[e] += 1\n",
" except KeyError:\n",
" my_dict[e] = 1\n",
" d[e] = 1\n",
" return d\n",
" \n",
"def add_element_defaultdict(elements):\n",
" d = defaultdict(int)\n",
" for e in elements:\n",
" d[e] += 1\n",
" return d\n",
"\n",
"random.seed(123)\n",
"rand_ints = [random.randrange(1, 10) for i in range(100)]\n",
"empty_dict = {}\n",
"\n",
"print('Results for 100 integers in range 1-10') \n",
"%timeit add_element_check1(copy.deepcopy(empty_dict), rand_ints)\n",
"%timeit add_element_check2(copy.deepcopy(empty_dict), rand_ints)\n",
"%timeit add_element_except(copy.deepcopy(empty_dict), rand_ints)\n",
" \n",
"print('\\nResults for 1000 integers in range 1-10') \n",
"rand_ints = [random.randrange(1, 10) for i in range(1000)]\n",
"empty_dict = {}\n",
"rand_ints = [random.randrange(1, 10) for i in range(100)]\n",
"%timeit add_element_check1(rand_ints)\n",
"%timeit add_element_check2(rand_ints)\n",
"%timeit add_element_except(rand_ints)\n",
"%timeit add_element_defaultdict(rand_ints)\n",
"\n",
"%timeit add_element_check1(copy.deepcopy(empty_dict), rand_ints)\n",
"%timeit add_element_check2(copy.deepcopy(empty_dict), rand_ints)\n",
"%timeit add_element_except(copy.deepcopy(empty_dict), rand_ints)\n",
"print('\\nResults for 1000 integers in range 1-5') \n",
"rand_ints = [random.randrange(1, 5) for i in range(1000)]\n",
"%timeit add_element_check1(rand_ints)\n",
"%timeit add_element_check2(rand_ints)\n",
"%timeit add_element_except(rand_ints)\n",
"%timeit add_element_defaultdict(rand_ints)\n",
"\n",
"print('\\nResults for 1000 integers in range 1-1000') \n",
"rand_ints = [random.randrange(1, 10) for i in range(1000)]\n",
"empty_dict = {}\n",
"\n",
"%timeit add_element_check1(copy.deepcopy(empty_dict), rand_ints)\n",
"%timeit add_element_check2(copy.deepcopy(empty_dict), rand_ints)\n",
"%timeit add_element_except(copy.deepcopy(empty_dict), rand_ints)\n",
"\n",
"#\n",
"# Python 3.4.0\n",
"# MacOS X 10.9.2\n",
"# 2.5 GHz Intel Core i5\n",
"# 4 GB 1600 Mhz DDR3\n",
"#"
"rand_ints = [random.randrange(1, 1000) for i in range(1000)]\n",
"%timeit add_element_check1(rand_ints)\n",
"%timeit add_element_check2(rand_ints)\n",
"%timeit add_element_except(rand_ints)\n",
"%timeit add_element_defaultdict(rand_ints)"
],
"language": "python",
"metadata": {},
@ -921,7 +922,7 @@
"stream": "stdout",
"text": [
"Results for 100 integers in range 1-10\n",
"100000 loops, best of 3: 16.6 \u00b5s per loop"
"10000 loops, best of 3: 24.6 \u00b5s per loop"
]
},
{
@ -929,7 +930,7 @@
"stream": "stdout",
"text": [
"\n",
"100000 loops, best of 3: 17.6 \u00b5s per loop"
"10000 loops, best of 3: 26.2 \u00b5s per loop"
]
},
{
@ -937,7 +938,15 @@
"stream": "stdout",
"text": [
"\n",
"100000 loops, best of 3: 17.9 \u00b5s per loop"
"10000 loops, best of 3: 25.4 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"10000 loops, best of 3: 23 \u00b5s per loop"
]
},
{
@ -946,8 +955,8 @@
"text": [
"\n",
"\n",
"Results for 1000 integers in range 1-10\n",
"10000 loops, best of 3: 135 \u00b5s per loop"
"Results for 1000 integers in range 1-5\n",
"1000 loops, best of 3: 236 \u00b5s per loop"
]
},
{
@ -955,7 +964,7 @@
"stream": "stdout",
"text": [
"\n",
"10000 loops, best of 3: 125 \u00b5s per loop"
"1000 loops, best of 3: 235 \u00b5s per loop"
]
},
{
@ -963,7 +972,15 @@
"stream": "stdout",
"text": [
"\n",
"10000 loops, best of 3: 105 \u00b5s per loop"
"1000 loops, best of 3: 207 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"10000 loops, best of 3: 177 \u00b5s per loop"
]
},
{
@ -973,7 +990,7 @@
"\n",
"\n",
"Results for 1000 integers in range 1-1000\n",
"10000 loops, best of 3: 122 \u00b5s per loop"
"1000 loops, best of 3: 268 \u00b5s per loop"
]
},
{
@ -981,7 +998,7 @@
"stream": "stdout",
"text": [
"\n",
"10000 loops, best of 3: 123 \u00b5s per loop"
"1000 loops, best of 3: 377 \u00b5s per loop"
]
},
{
@ -989,7 +1006,15 @@
"stream": "stdout",
"text": [
"\n",
"10000 loops, best of 3: 104 \u00b5s per loop"
"1000 loops, best of 3: 511 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"1000 loops, best of 3: 410 \u00b5s per loop"
]
},
{
@ -1000,14 +1025,21 @@
]
}
],
"prompt_number": 13
"prompt_number": 16
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Conclusion\n",
"Interestingly, the `try-except` loop pays off if we have more elements (here: 1000 integers instead of 100) as dictionary keys to check. Also, it doesn't matter much whether the elements exist or do not exist in the dictionary, yet."
"### Conclusion"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We see from the results that the `try-except` variant is faster than then the `if element in my_dict` alternative if we have a low number of unique elements (here: 1000 integers in the range 1-5), which makes sense: the `except`-block is skipped if an element is already added as a key to the dictionary. However, in this case the `collections.defaultdict` has even a better performance. \n",
"However, if we are having a relative large number of unique entries(here: 1000 integers in range 1-1000), the `if element in my_dict` approach outperforms the alternative approaches."
]
},
{