mirror of
https://github.com/rasbt/python_reference.git
synced 2025-01-18 07:17:06 +00:00
defaultdict
This commit is contained in:
parent
eb35644f7b
commit
409d953a22
|
@ -1,7 +1,7 @@
|
|||
{
|
||||
"metadata": {
|
||||
"name": "",
|
||||
"signature": "sha256:d5895f75b2ac58db150d7b521682366a447ffb2fb0b7db7e551edd40e6d1ab10"
|
||||
"signature": "sha256:8dc4f91bc6a88e15ab0d25fac35b9a7645a7149b5ab4e1e15b2b372362e82ae2"
|
||||
},
|
||||
"nbformat": 3,
|
||||
"nbformat_minor": 0,
|
||||
|
@ -855,63 +855,64 @@
|
|||
"collapsed": false,
|
||||
"input": [
|
||||
"import random\n",
|
||||
"import copy\n",
|
||||
"import timeit\n",
|
||||
"from collections import defaultdict\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def add_element_check1(my_dict, elements):\n",
|
||||
"def add_element_check1(elements):\n",
|
||||
" d = dict()\n",
|
||||
" for e in elements:\n",
|
||||
" if e not in my_dict:\n",
|
||||
" my_dict[e] = 1\n",
|
||||
" if e not in d:\n",
|
||||
" d[e] = 1\n",
|
||||
" else:\n",
|
||||
" my_dict[e] += 1\n",
|
||||
" d[e] += 1\n",
|
||||
" return d\n",
|
||||
" \n",
|
||||
"def add_element_check2(my_dict, elements):\n",
|
||||
"def add_element_check2(elements):\n",
|
||||
" d = dict()\n",
|
||||
" for e in elements:\n",
|
||||
" if e not in my_dict:\n",
|
||||
" my_dict[e] = 0\n",
|
||||
" my_dict[e] += 1 \n",
|
||||
"\n",
|
||||
"def add_element_except(my_dict, elements):\n",
|
||||
" if e not in d:\n",
|
||||
" d[e] = 0\n",
|
||||
" d[e] += 1 \n",
|
||||
" return d\n",
|
||||
" \n",
|
||||
"def add_element_except(elements):\n",
|
||||
" d = dict()\n",
|
||||
" for e in elements:\n",
|
||||
" try:\n",
|
||||
" my_dict[e] += 1\n",
|
||||
" d[e] += 1\n",
|
||||
" except KeyError:\n",
|
||||
" my_dict[e] = 1\n",
|
||||
" d[e] = 1\n",
|
||||
" return d\n",
|
||||
" \n",
|
||||
"def add_element_defaultdict(elements):\n",
|
||||
" d = defaultdict(int)\n",
|
||||
" for e in elements:\n",
|
||||
" d[e] += 1\n",
|
||||
" return d\n",
|
||||
"\n",
|
||||
"random.seed(123)\n",
|
||||
"rand_ints = [random.randrange(1, 10) for i in range(100)]\n",
|
||||
"empty_dict = {}\n",
|
||||
"\n",
|
||||
"print('Results for 100 integers in range 1-10') \n",
|
||||
"%timeit add_element_check1(copy.deepcopy(empty_dict), rand_ints)\n",
|
||||
"%timeit add_element_check2(copy.deepcopy(empty_dict), rand_ints)\n",
|
||||
"%timeit add_element_except(copy.deepcopy(empty_dict), rand_ints)\n",
|
||||
" \n",
|
||||
"print('\\nResults for 1000 integers in range 1-10') \n",
|
||||
"rand_ints = [random.randrange(1, 10) for i in range(1000)]\n",
|
||||
"empty_dict = {}\n",
|
||||
"rand_ints = [random.randrange(1, 10) for i in range(100)]\n",
|
||||
"%timeit add_element_check1(rand_ints)\n",
|
||||
"%timeit add_element_check2(rand_ints)\n",
|
||||
"%timeit add_element_except(rand_ints)\n",
|
||||
"%timeit add_element_defaultdict(rand_ints)\n",
|
||||
"\n",
|
||||
"%timeit add_element_check1(copy.deepcopy(empty_dict), rand_ints)\n",
|
||||
"%timeit add_element_check2(copy.deepcopy(empty_dict), rand_ints)\n",
|
||||
"%timeit add_element_except(copy.deepcopy(empty_dict), rand_ints)\n",
|
||||
"print('\\nResults for 1000 integers in range 1-5') \n",
|
||||
"rand_ints = [random.randrange(1, 5) for i in range(1000)]\n",
|
||||
"%timeit add_element_check1(rand_ints)\n",
|
||||
"%timeit add_element_check2(rand_ints)\n",
|
||||
"%timeit add_element_except(rand_ints)\n",
|
||||
"%timeit add_element_defaultdict(rand_ints)\n",
|
||||
"\n",
|
||||
"print('\\nResults for 1000 integers in range 1-1000') \n",
|
||||
"rand_ints = [random.randrange(1, 10) for i in range(1000)]\n",
|
||||
"empty_dict = {}\n",
|
||||
"\n",
|
||||
"%timeit add_element_check1(copy.deepcopy(empty_dict), rand_ints)\n",
|
||||
"%timeit add_element_check2(copy.deepcopy(empty_dict), rand_ints)\n",
|
||||
"%timeit add_element_except(copy.deepcopy(empty_dict), rand_ints)\n",
|
||||
"\n",
|
||||
"#\n",
|
||||
"# Python 3.4.0\n",
|
||||
"# MacOS X 10.9.2\n",
|
||||
"# 2.5 GHz Intel Core i5\n",
|
||||
"# 4 GB 1600 Mhz DDR3\n",
|
||||
"#"
|
||||
"rand_ints = [random.randrange(1, 1000) for i in range(1000)]\n",
|
||||
"%timeit add_element_check1(rand_ints)\n",
|
||||
"%timeit add_element_check2(rand_ints)\n",
|
||||
"%timeit add_element_except(rand_ints)\n",
|
||||
"%timeit add_element_defaultdict(rand_ints)"
|
||||
],
|
||||
"language": "python",
|
||||
"metadata": {},
|
||||
|
@ -921,7 +922,7 @@
|
|||
"stream": "stdout",
|
||||
"text": [
|
||||
"Results for 100 integers in range 1-10\n",
|
||||
"100000 loops, best of 3: 16.6 \u00b5s per loop"
|
||||
"10000 loops, best of 3: 24.6 \u00b5s per loop"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -929,7 +930,7 @@
|
|||
"stream": "stdout",
|
||||
"text": [
|
||||
"\n",
|
||||
"100000 loops, best of 3: 17.6 \u00b5s per loop"
|
||||
"10000 loops, best of 3: 26.2 \u00b5s per loop"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -937,7 +938,15 @@
|
|||
"stream": "stdout",
|
||||
"text": [
|
||||
"\n",
|
||||
"100000 loops, best of 3: 17.9 \u00b5s per loop"
|
||||
"10000 loops, best of 3: 25.4 \u00b5s per loop"
|
||||
]
|
||||
},
|
||||
{
|
||||
"output_type": "stream",
|
||||
"stream": "stdout",
|
||||
"text": [
|
||||
"\n",
|
||||
"10000 loops, best of 3: 23 \u00b5s per loop"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -946,8 +955,8 @@
|
|||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"Results for 1000 integers in range 1-10\n",
|
||||
"10000 loops, best of 3: 135 \u00b5s per loop"
|
||||
"Results for 1000 integers in range 1-5\n",
|
||||
"1000 loops, best of 3: 236 \u00b5s per loop"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -955,7 +964,7 @@
|
|||
"stream": "stdout",
|
||||
"text": [
|
||||
"\n",
|
||||
"10000 loops, best of 3: 125 \u00b5s per loop"
|
||||
"1000 loops, best of 3: 235 \u00b5s per loop"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -963,7 +972,15 @@
|
|||
"stream": "stdout",
|
||||
"text": [
|
||||
"\n",
|
||||
"10000 loops, best of 3: 105 \u00b5s per loop"
|
||||
"1000 loops, best of 3: 207 \u00b5s per loop"
|
||||
]
|
||||
},
|
||||
{
|
||||
"output_type": "stream",
|
||||
"stream": "stdout",
|
||||
"text": [
|
||||
"\n",
|
||||
"10000 loops, best of 3: 177 \u00b5s per loop"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -973,7 +990,7 @@
|
|||
"\n",
|
||||
"\n",
|
||||
"Results for 1000 integers in range 1-1000\n",
|
||||
"10000 loops, best of 3: 122 \u00b5s per loop"
|
||||
"1000 loops, best of 3: 268 \u00b5s per loop"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -981,7 +998,7 @@
|
|||
"stream": "stdout",
|
||||
"text": [
|
||||
"\n",
|
||||
"10000 loops, best of 3: 123 \u00b5s per loop"
|
||||
"1000 loops, best of 3: 377 \u00b5s per loop"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -989,7 +1006,15 @@
|
|||
"stream": "stdout",
|
||||
"text": [
|
||||
"\n",
|
||||
"10000 loops, best of 3: 104 \u00b5s per loop"
|
||||
"1000 loops, best of 3: 511 \u00b5s per loop"
|
||||
]
|
||||
},
|
||||
{
|
||||
"output_type": "stream",
|
||||
"stream": "stdout",
|
||||
"text": [
|
||||
"\n",
|
||||
"1000 loops, best of 3: 410 \u00b5s per loop"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -1000,14 +1025,21 @@
|
|||
]
|
||||
}
|
||||
],
|
||||
"prompt_number": 13
|
||||
"prompt_number": 16
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Conclusion\n",
|
||||
"Interestingly, the `try-except` loop pays off if we have more elements (here: 1000 integers instead of 100) as dictionary keys to check. Also, it doesn't matter much whether the elements exist or do not exist in the dictionary, yet."
|
||||
"### Conclusion"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We see from the results that the `try-except` variant is faster than then the `if element in my_dict` alternative if we have a low number of unique elements (here: 1000 integers in the range 1-5), which makes sense: the `except`-block is skipped if an element is already added as a key to the dictionary. However, in this case the `collections.defaultdict` has even a better performance. \n",
|
||||
"However, if we are having a relative large number of unique entries(here: 1000 integers in range 1-1000), the `if element in my_dict` approach outperforms the alternative approaches."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
{
|
||||
"metadata": {
|
||||
"name": "",
|
||||
"signature": "sha256:d5895f75b2ac58db150d7b521682366a447ffb2fb0b7db7e551edd40e6d1ab10"
|
||||
"signature": "sha256:8dc4f91bc6a88e15ab0d25fac35b9a7645a7149b5ab4e1e15b2b372362e82ae2"
|
||||
},
|
||||
"nbformat": 3,
|
||||
"nbformat_minor": 0,
|
||||
|
@ -855,63 +855,64 @@
|
|||
"collapsed": false,
|
||||
"input": [
|
||||
"import random\n",
|
||||
"import copy\n",
|
||||
"import timeit\n",
|
||||
"from collections import defaultdict\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def add_element_check1(my_dict, elements):\n",
|
||||
"def add_element_check1(elements):\n",
|
||||
" d = dict()\n",
|
||||
" for e in elements:\n",
|
||||
" if e not in my_dict:\n",
|
||||
" my_dict[e] = 1\n",
|
||||
" if e not in d:\n",
|
||||
" d[e] = 1\n",
|
||||
" else:\n",
|
||||
" my_dict[e] += 1\n",
|
||||
" d[e] += 1\n",
|
||||
" return d\n",
|
||||
" \n",
|
||||
"def add_element_check2(my_dict, elements):\n",
|
||||
"def add_element_check2(elements):\n",
|
||||
" d = dict()\n",
|
||||
" for e in elements:\n",
|
||||
" if e not in my_dict:\n",
|
||||
" my_dict[e] = 0\n",
|
||||
" my_dict[e] += 1 \n",
|
||||
"\n",
|
||||
"def add_element_except(my_dict, elements):\n",
|
||||
" if e not in d:\n",
|
||||
" d[e] = 0\n",
|
||||
" d[e] += 1 \n",
|
||||
" return d\n",
|
||||
" \n",
|
||||
"def add_element_except(elements):\n",
|
||||
" d = dict()\n",
|
||||
" for e in elements:\n",
|
||||
" try:\n",
|
||||
" my_dict[e] += 1\n",
|
||||
" d[e] += 1\n",
|
||||
" except KeyError:\n",
|
||||
" my_dict[e] = 1\n",
|
||||
" d[e] = 1\n",
|
||||
" return d\n",
|
||||
" \n",
|
||||
"def add_element_defaultdict(elements):\n",
|
||||
" d = defaultdict(int)\n",
|
||||
" for e in elements:\n",
|
||||
" d[e] += 1\n",
|
||||
" return d\n",
|
||||
"\n",
|
||||
"random.seed(123)\n",
|
||||
"rand_ints = [random.randrange(1, 10) for i in range(100)]\n",
|
||||
"empty_dict = {}\n",
|
||||
"\n",
|
||||
"print('Results for 100 integers in range 1-10') \n",
|
||||
"%timeit add_element_check1(copy.deepcopy(empty_dict), rand_ints)\n",
|
||||
"%timeit add_element_check2(copy.deepcopy(empty_dict), rand_ints)\n",
|
||||
"%timeit add_element_except(copy.deepcopy(empty_dict), rand_ints)\n",
|
||||
" \n",
|
||||
"print('\\nResults for 1000 integers in range 1-10') \n",
|
||||
"rand_ints = [random.randrange(1, 10) for i in range(1000)]\n",
|
||||
"empty_dict = {}\n",
|
||||
"rand_ints = [random.randrange(1, 10) for i in range(100)]\n",
|
||||
"%timeit add_element_check1(rand_ints)\n",
|
||||
"%timeit add_element_check2(rand_ints)\n",
|
||||
"%timeit add_element_except(rand_ints)\n",
|
||||
"%timeit add_element_defaultdict(rand_ints)\n",
|
||||
"\n",
|
||||
"%timeit add_element_check1(copy.deepcopy(empty_dict), rand_ints)\n",
|
||||
"%timeit add_element_check2(copy.deepcopy(empty_dict), rand_ints)\n",
|
||||
"%timeit add_element_except(copy.deepcopy(empty_dict), rand_ints)\n",
|
||||
"print('\\nResults for 1000 integers in range 1-5') \n",
|
||||
"rand_ints = [random.randrange(1, 5) for i in range(1000)]\n",
|
||||
"%timeit add_element_check1(rand_ints)\n",
|
||||
"%timeit add_element_check2(rand_ints)\n",
|
||||
"%timeit add_element_except(rand_ints)\n",
|
||||
"%timeit add_element_defaultdict(rand_ints)\n",
|
||||
"\n",
|
||||
"print('\\nResults for 1000 integers in range 1-1000') \n",
|
||||
"rand_ints = [random.randrange(1, 10) for i in range(1000)]\n",
|
||||
"empty_dict = {}\n",
|
||||
"\n",
|
||||
"%timeit add_element_check1(copy.deepcopy(empty_dict), rand_ints)\n",
|
||||
"%timeit add_element_check2(copy.deepcopy(empty_dict), rand_ints)\n",
|
||||
"%timeit add_element_except(copy.deepcopy(empty_dict), rand_ints)\n",
|
||||
"\n",
|
||||
"#\n",
|
||||
"# Python 3.4.0\n",
|
||||
"# MacOS X 10.9.2\n",
|
||||
"# 2.5 GHz Intel Core i5\n",
|
||||
"# 4 GB 1600 Mhz DDR3\n",
|
||||
"#"
|
||||
"rand_ints = [random.randrange(1, 1000) for i in range(1000)]\n",
|
||||
"%timeit add_element_check1(rand_ints)\n",
|
||||
"%timeit add_element_check2(rand_ints)\n",
|
||||
"%timeit add_element_except(rand_ints)\n",
|
||||
"%timeit add_element_defaultdict(rand_ints)"
|
||||
],
|
||||
"language": "python",
|
||||
"metadata": {},
|
||||
|
@ -921,7 +922,7 @@
|
|||
"stream": "stdout",
|
||||
"text": [
|
||||
"Results for 100 integers in range 1-10\n",
|
||||
"100000 loops, best of 3: 16.6 \u00b5s per loop"
|
||||
"10000 loops, best of 3: 24.6 \u00b5s per loop"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -929,7 +930,7 @@
|
|||
"stream": "stdout",
|
||||
"text": [
|
||||
"\n",
|
||||
"100000 loops, best of 3: 17.6 \u00b5s per loop"
|
||||
"10000 loops, best of 3: 26.2 \u00b5s per loop"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -937,7 +938,15 @@
|
|||
"stream": "stdout",
|
||||
"text": [
|
||||
"\n",
|
||||
"100000 loops, best of 3: 17.9 \u00b5s per loop"
|
||||
"10000 loops, best of 3: 25.4 \u00b5s per loop"
|
||||
]
|
||||
},
|
||||
{
|
||||
"output_type": "stream",
|
||||
"stream": "stdout",
|
||||
"text": [
|
||||
"\n",
|
||||
"10000 loops, best of 3: 23 \u00b5s per loop"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -946,8 +955,8 @@
|
|||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"Results for 1000 integers in range 1-10\n",
|
||||
"10000 loops, best of 3: 135 \u00b5s per loop"
|
||||
"Results for 1000 integers in range 1-5\n",
|
||||
"1000 loops, best of 3: 236 \u00b5s per loop"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -955,7 +964,7 @@
|
|||
"stream": "stdout",
|
||||
"text": [
|
||||
"\n",
|
||||
"10000 loops, best of 3: 125 \u00b5s per loop"
|
||||
"1000 loops, best of 3: 235 \u00b5s per loop"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -963,7 +972,15 @@
|
|||
"stream": "stdout",
|
||||
"text": [
|
||||
"\n",
|
||||
"10000 loops, best of 3: 105 \u00b5s per loop"
|
||||
"1000 loops, best of 3: 207 \u00b5s per loop"
|
||||
]
|
||||
},
|
||||
{
|
||||
"output_type": "stream",
|
||||
"stream": "stdout",
|
||||
"text": [
|
||||
"\n",
|
||||
"10000 loops, best of 3: 177 \u00b5s per loop"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -973,7 +990,7 @@
|
|||
"\n",
|
||||
"\n",
|
||||
"Results for 1000 integers in range 1-1000\n",
|
||||
"10000 loops, best of 3: 122 \u00b5s per loop"
|
||||
"1000 loops, best of 3: 268 \u00b5s per loop"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -981,7 +998,7 @@
|
|||
"stream": "stdout",
|
||||
"text": [
|
||||
"\n",
|
||||
"10000 loops, best of 3: 123 \u00b5s per loop"
|
||||
"1000 loops, best of 3: 377 \u00b5s per loop"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -989,7 +1006,15 @@
|
|||
"stream": "stdout",
|
||||
"text": [
|
||||
"\n",
|
||||
"10000 loops, best of 3: 104 \u00b5s per loop"
|
||||
"1000 loops, best of 3: 511 \u00b5s per loop"
|
||||
]
|
||||
},
|
||||
{
|
||||
"output_type": "stream",
|
||||
"stream": "stdout",
|
||||
"text": [
|
||||
"\n",
|
||||
"1000 loops, best of 3: 410 \u00b5s per loop"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -1000,14 +1025,21 @@
|
|||
]
|
||||
}
|
||||
],
|
||||
"prompt_number": 13
|
||||
"prompt_number": 16
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Conclusion\n",
|
||||
"Interestingly, the `try-except` loop pays off if we have more elements (here: 1000 integers instead of 100) as dictionary keys to check. Also, it doesn't matter much whether the elements exist or do not exist in the dictionary, yet."
|
||||
"### Conclusion"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We see from the results that the `try-except` variant is faster than then the `if element in my_dict` alternative if we have a low number of unique elements (here: 1000 integers in the range 1-5), which makes sense: the `except`-block is skipped if an element is already added as a key to the dictionary. However, in this case the `collections.defaultdict` has even a better performance. \n",
|
||||
"However, if we are having a relative large number of unique entries(here: 1000 integers in range 1-1000), the `if element in my_dict` approach outperforms the alternative approaches."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue
Block a user