removed temp files

This commit is contained in:
rasbt 2014-05-06 14:45:03 -04:00
parent 667e4f6e32
commit 3db65c7686
8 changed files with 1 additions and 10805 deletions

1
.gitignore vendored
View File

@ -1,3 +1,4 @@
*.ipynb_checkpoints/
.DS_Store
*.DS_Store
*.pyc

File diff suppressed because it is too large Load Diff

View File

@ -1,194 +0,0 @@
{
"metadata": {
"name": "",
"signature": "sha256:6ea19109869c82ee989c8ea0599ec49401e74246a542ad0b7b05f6ef464bda19"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Sebastian Raschka 04/2014\n",
"\n",
"#Timing different Implementations of palindrome functions"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import re\n",
"import timeit\n",
"import string\n",
"\n",
"# All functions return True if an input string is a palindrome. Else returns False.\n",
"\n",
"\n",
"\n",
"####\n",
"#### case-insensitive ignoring punctuation characters\n",
"####\n",
"\n",
"def palindrome_short(my_str):\n",
" stripped_str = \"\".join(l.lower() for l in my_str if l.isalpha())\n",
" return stripped_str == stripped_str[::-1]\n",
"\n",
"def palindrome_regex(my_str):\n",
" return re.sub('\\W', '', my_str.lower()) == re.sub('\\W', '', my_str[::-1].lower())\n",
"\n",
"def palindrome_stringlib(my_str):\n",
" LOWERS = set(string.ascii_lowercase)\n",
" letters = [c for c in my_str.lower() if c in LOWERS]\n",
" return letters == letters[::-1]\n",
"\n",
"LOWERS = set(string.ascii_lowercase)\n",
"def palindrome_stringlib2(my_str):\n",
" letters = [c for c in my_str.lower() if c in LOWERS]\n",
" return letters == letters[::-1]\n",
"\n",
"def palindrome_isalpha(my_str):\n",
" stripped_str = [l for l in my_str.lower() if l.isalpha()]\n",
" return stripped_str == stripped_str[::-1]\n",
"\n",
"\n",
"\n",
"####\n",
"#### functions considering all characters (case-sensitive)\n",
"####\n",
"\n",
"def palindrome_reverse1(my_str):\n",
" return my_str == my_str[::-1]\n",
"\n",
"def palindrome_reverse2(my_str):\n",
" return my_str == ''.join(reversed(my_str))\n",
"\n",
"def palindrome_recurs(my_str):\n",
" if len(my_str) < 2:\n",
" return True\n",
" if my_str[0] != my_str[-1]:\n",
" return False\n",
" return palindrome(my_str[1:-1])\n",
"\n",
"\n",
"\n"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 10
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"test_str = \"Go hang a salami. I'm a lasagna hog.\"\n",
"\n",
"print('case-insensitive functions ignoring punctuation characters')\n",
"%timeit palindrome_short(test_str)\n",
"%timeit palindrome_regex(test_str)\n",
"%timeit palindrome_stringlib(test_str)\n",
"%timeit palindrome_stringlib2(test_str)\n",
"%timeit palindrome_isalpha(test_str)\n",
"\n",
"print('\\n\\nfunctions considering all characters (case-sensitive)')\n",
"%timeit palindrome_reverse1(test_str)\n",
"%timeit palindrome_reverse2(test_str)\n",
"%timeit palindrome_recurs(test_str)\n"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"case-insensitive functions ignoring punctuation characters\n",
"100000 loops, best of 3: 15.3 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"100000 loops, best of 3: 19.9 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"100000 loops, best of 3: 13.5 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"100000 loops, best of 3: 8.58 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"100000 loops, best of 3: 9.42 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"\n",
"\n",
"functions considering all characters (case-sensitive)\n",
"1000000 loops, best of 3: 508 ns per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"100000 loops, best of 3: 3.08 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"1000000 loops, best of 3: 480 ns per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n"
]
}
],
"prompt_number": 11
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}

View File

@ -1,578 +0,0 @@
{
"metadata": {
"name": ""
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Sebastian Raschka, 03/2014 \n",
"Code was executed in Python 3.4.0"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"###`True` and `False` in the `datetime` module\n",
"\n",
"Pointed out in a nice article **\"A false midnight\"** at [http://lwn.net/SubscriberLink/590299/bf73fe823974acea/](http://lwn.net/SubscriberLink/590299/bf73fe823974acea/):\n",
"\n",
"*\"it often comes as a big surprise for programmers to find (sometimes by way of a hard-to-reproduce bug) that, \n",
"unlike any other time value, midnight (i.e. datetime.time(0,0,0)) is False. \n",
"A long discussion on the python-ideas mailing list shows that, while surprising, \n",
"that behavior is desirable\u2014at least in some quarters.\"*"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import datetime\n",
"\n",
"print('\"datetime.time(0,0,0)\" (Midnight) evaluates to', bool(datetime.time(0,0,0)))\n",
"\n",
"print('\"datetime.time(1,0,0)\" (1 am) evaluates to', bool(datetime.time(1,0,0)))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\"datetime.time(0,0,0)\" (Midnight) evaluates to False\n",
"\"datetime.time(1,0,0)\" (1 am) evaluates to True\n"
]
}
],
"prompt_number": 17
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Boolean `True`"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"my_true_val = True\n",
"\n",
"\n",
"print('my_true_val == True:', my_true_val == True)\n",
"print('my_true_val is True:', my_true_val is True)\n",
"\n",
"print('my_true_val == None:', my_true_val == None)\n",
"print('my_true_val is None:', my_true_val is None)\n",
"\n",
"print('my_true_val == False:', my_true_val == False)\n",
"print('my_true_val is False:', my_true_val is False)\n",
"\n",
"print(my_true_val\n",
"if my_true_val:\n",
" print('\"if my_true_val:\" is True')\n",
"else:\n",
" print('\"if my_true_val:\" is False')\n",
" \n",
"if not my_true_val:\n",
" print('\"if not my_true_val:\" is True')\n",
"else:\n",
" print('\"if not my_true_val:\" is False')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"my_true_val == True: True\n",
"my_true_val is True: True\n",
"my_true_val == None: False\n",
"my_true_val is None: False\n",
"my_true_val == False: False\n",
"my_true_val is False: False\n",
"\"if my_true_val:\" is True\n",
"\"if not my_true_val:\" is False\n"
]
}
],
"prompt_number": 83
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Boolean `False`"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"my_false_val = False\n",
"\n",
"\n",
"print('my_false_val == True:', my_false_val == True)\n",
"print('my_false_val is True:', my_false_val is True)\n",
"\n",
"print('my_false_val == None:', my_false_val == None)\n",
"print('my_false_val is None:', my_false_val is None)\n",
"\n",
"print('my_false_val == False:', my_false_val == False)\n",
"print('my_false_val is False:', my_false_val is False)\n",
"\n",
"\n",
"if my_false_val:\n",
" print('\"if my_false_val:\" is True')\n",
"else:\n",
" print('\"if my_false_val:\" is False')\n",
" \n",
"if not my_false_val:\n",
" print('\"if not my_false_val:\" is True')\n",
"else:\n",
" print('\"if not my_false_val:\" is False')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"my_false_val == True: False\n",
"my_false_val is True: False\n",
"my_false_val == None: False\n",
"my_false_val is None: False\n",
"my_false_val == False: True\n",
"my_false_val is False: True\n",
"\"if my_false_val:\" is False\n",
"\"if not my_false_val:\" is True\n"
]
}
],
"prompt_number": 76
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## `None` 'value'"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"my_none_var = None\n",
"\n",
"print('my_none_var == True:', my_none_var == True)\n",
"print('my_none_var is True:', my_none_var is True)\n",
"\n",
"print('my_none_var == None:', my_none_var == None)\n",
"print('my_none_var is None:', my_none_var is None)\n",
"\n",
"print('my_none_var == False:', my_none_var == False)\n",
"print('my_none_var is False:', my_none_var is False)\n",
"\n",
"\n",
"if my_none_var:\n",
" print('\"if my_none_var:\" is True')\n",
"else:\n",
" print('\"if my_none_var:\" is False')\n",
"\n",
"if not my_none_var:\n",
" print('\"if not my_none_var:\" is True')\n",
"else:\n",
" print('\"if not my_none_var:\" is False')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"my_none_var == True: False\n",
"my_none_var is True: False\n",
"my_none_var == None: True\n",
"my_none_var is None: True\n",
"my_none_var == False: False\n",
"my_none_var is False: False\n",
"\"if my_none_var:\" is False\n",
"\"if not my_none_var:\" is True\n"
]
}
],
"prompt_number": 62
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Empty String"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"my_empty_string = \"\"\n",
"\n",
"print('my_empty_string == True:', my_empty_string == True)\n",
"print('my_empty_string is True:', my_empty_string is True)\n",
"\n",
"print('my_empty_string == None:', my_empty_string == None)\n",
"print('my_empty_string is None:', my_empty_string is None)\n",
"\n",
"print('my_empty_string == False:', my_empty_string == False)\n",
"print('my_empty_string is False:', my_empty_string is False)\n",
"\n",
"\n",
"if my_empty_string:\n",
" print('\"if my_empty_string:\" is True')\n",
"else:\n",
" print('\"if my_empty_string:\" is False')\n",
" \n",
"if not my_empty_string:\n",
" print('\"if not my_empty_string:\" is True')\n",
"else:\n",
" print('\"if not my_empty_string:\" is False')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"my_empty_string == True: False\n",
"my_empty_string is True: False\n",
"my_empty_string == None: False\n",
"my_empty_string is None: False\n",
"my_empty_string == False: False\n",
"my_empty_string is False: False\n",
"\"if my_empty_string:\" is False\n",
"\"if my_empty_string:\" is True\n"
]
}
],
"prompt_number": 61
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Empty List\n",
"It is generally not a good idea to use the `==` to check for empty lists..."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"my_empty_list = []\n",
"\n",
"\n",
"print('my_empty_list == True:', my_empty_list == True)\n",
"print('my_empty_list is True:', my_empty_list is True)\n",
"\n",
"print('my_empty_list == None:', my_empty_list == None)\n",
"print('my_empty_list is None:', my_empty_list is None)\n",
"\n",
"print('my_empty_list == False:', my_empty_list == False)\n",
"print('my_empty_list is False:', my_empty_list is False)\n",
"\n",
"\n",
"if my_empty_list:\n",
" print('\"if my_empty_list:\" is True')\n",
"else:\n",
" print('\"if my_empty_list:\" is False')\n",
" \n",
"if not my_empty_list:\n",
" print('\"if not my_empty_list:\" is True')\n",
"else:\n",
" print('\"if not my_empty_list:\" is False')\n",
"\n",
"\n",
" \n"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"my_empty_list == True: False\n",
"my_empty_list is True: False\n",
"my_empty_list == None: False\n",
"my_empty_list is None: False\n",
"my_empty_list == False: False\n",
"my_empty_list is False: False\n",
"\"if my_empty_list:\" is False\n",
"\"if not my_empty_list:\" is True\n"
]
}
],
"prompt_number": 67
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## [0]-List"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"my_zero_list = [0]\n",
"\n",
"\n",
"print('my_zero_list == True:', my_zero_list == True)\n",
"print('my_zero_list is True:', my_zero_list is True)\n",
"\n",
"print('my_zero_list == None:', my_zero_list == None)\n",
"print('my_zero_list is None:', my_zero_list is None)\n",
"\n",
"print('my_zero_list == False:', my_zero_list == False)\n",
"print('my_zero_list is False:', my_zero_list is False)\n",
"\n",
"\n",
"if my_zero_list:\n",
" print('\"if my_zero_list:\" is True')\n",
"else:\n",
" print('\"if my_zero_list:\" is False')\n",
" \n",
"if not my_zero_list:\n",
" print('\"if not my_zero_list:\" is True')\n",
"else:\n",
" print('\"if not my_zero_list:\" is False')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"my_zero_list == True: False\n",
"my_zero_list is True: False\n",
"my_zero_list == None: False\n",
"my_zero_list is None: False\n",
"my_zero_list == False: False\n",
"my_zero_list is False: False\n",
"\"if my_zero_list:\" is True\n",
"\"if not my_zero_list:\" is False\n"
]
}
],
"prompt_number": 70
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## List comparison \n",
"List comparisons are a handy way to show the difference between `==` and `is`. \n",
"While `==` is rather evaluating the equality of the value, `is` is checking if two objects are equal.\n",
"The examples below show that we can assign a pointer to the same list object by using `=`, e.g., `list1 = list2`. \n",
"a) If we want to make a **shallow** copy of the list values, we have to make a little tweak: `list1 = list2[:]`, or \n",
"b) a **deepcopy** via `list1 = copy.deepcopy(list2)`\n",
"\n",
"Possibly the best explanation of shallow vs. deep copies I've read so far:\n",
"\n",
"*** \"Shallow copies duplicate as little as possible. A shallow copy of a collection is a copy of the collection structure, not the elements. With a shallow copy, two collections now share the individual elements.\n",
"Deep copies duplicate everything. A deep copy of a collection is two collections with all of the elements in the original collection duplicated.\"***\n",
"\n",
"(via [S.Lott](http://stackoverflow.com/users/10661/s-lott) on [StackOverflow](http://stackoverflow.com/questions/184710/what-is-the-difference-between-a-deep-copy-and-a-shallow-copy))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"###a) Shallow vs. deep copies for simple elements \n",
"List modification of the original list doesn't affect \n",
"shallow copies or deep copies if the list contains literals."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from copy import deepcopy\n",
"\n",
"my_first_list = [1]\n",
"my_second_list = [1]\n",
"print('my_first_list == my_second_list:', my_first_list == my_second_list)\n",
"print('my_first_list is my_second_list:', my_first_list is my_second_list)\n",
"\n",
"my_third_list = my_first_list\n",
"print('my_first_list == my_third_list:', my_first_list == my_third_list)\n",
"print('my_first_list is my_third_list:', my_first_list is my_third_list)\n",
"\n",
"my_shallow_copy = my_first_list[:]\n",
"print('my_first_list == my_shallow_copy:', my_first_list == my_shallow_copy)\n",
"print('my_first_list is my_shallow_copy:', my_first_list is my_shallow_copy)\n",
"\n",
"my_deep_copy = deepcopy(my_first_list)\n",
"print('my_first_list == my_deep_copy:', my_first_list == my_deep_copy)\n",
"print('my_first_list is my_deep_copy:', my_first_list is my_deep_copy)\n",
"\n",
"print('\\nmy_third_list:', my_third_list)\n",
"print('my_shallow_copy:', my_shallow_copy)\n",
"print('my_deep_copy:', my_deep_copy)\n",
"\n",
"my_first_list[0] = 2\n",
"print('after setting \"my_first_list[0] = 2\"')\n",
"print('my_third_list:', my_third_list)\n",
"print('my_shallow_copy:', my_shallow_copy)\n",
"print('my_deep_copy:', my_deep_copy)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"my_first_list == my_second_list: True\n",
"my_first_list is my_second_list: False\n",
"my_first_list == my_third_list: True\n",
"my_first_list is my_third_list: True\n",
"my_first_list == my_shallow_copy: True\n",
"my_first_list is my_shallow_copy: False\n",
"my_first_list == my_deep_copy: True\n",
"my_first_list is my_deep_copy: False\n",
"\n",
"my_third_list: [1]\n",
"my_shallow_copy: [1]\n",
"my_deep_copy: [1]\n",
"after setting \"my_first_list[0] = 2\"\n",
"my_third_list: [2]\n",
"my_shallow_copy: [1]\n",
"my_deep_copy: [1]\n"
]
}
],
"prompt_number": 11
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### b) Shallow vs. deep copies if list contains other structures and objects\n",
"List modification of the original list does affect \n",
"shallow copies, but not deep copies if the list contains compound objects."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"my_first_list = [[1],[2]]\n",
"my_second_list = [[1],[2]]\n",
"print('my_first_list == my_second_list:', my_first_list == my_second_list)\n",
"print('my_first_list is my_second_list:', my_first_list is my_second_list)\n",
"\n",
"my_third_list = my_first_list\n",
"print('my_first_list == my_third_list:', my_first_list == my_third_list)\n",
"print('my_first_list is my_third_list:', my_first_list is my_third_list)\n",
"\n",
"my_shallow_copy = my_first_list[:]\n",
"print('my_first_list == my_shallow_copy:', my_first_list == my_shallow_copy)\n",
"print('my_first_list is my_shallow_copy:', my_first_list is my_shallow_copy)\n",
"\n",
"my_deep_copy = deepcopy(my_first_list)\n",
"print('my_first_list == my_deep_copy:', my_first_list == my_deep_copy)\n",
"print('my_first_list is my_deep_copy:', my_first_list is my_deep_copy)\n",
"\n",
"print('\\nmy_third_list:', my_third_list)\n",
"print('my_shallow_copy:', my_shallow_copy)\n",
"print('my_deep_copy:', my_deep_copy)\n",
"\n",
"my_first_list[0][0] = 2\n",
"print('after setting \"my_first_list[0][0] = 2\"')\n",
"print('my_third_list:', my_third_list)\n",
"print('my_shallow_copy:', my_shallow_copy)\n",
"print('my_deep_copy:', my_deep_copy)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"my_first_list == my_second_list: True\n",
"my_first_list is my_second_list: False\n",
"my_first_list == my_third_list: True\n",
"my_first_list is my_third_list: True\n",
"my_first_list == my_shallow_copy: True\n",
"my_first_list is my_shallow_copy: False\n",
"my_first_list == my_deep_copy: True\n",
"my_first_list is my_deep_copy: False\n",
"\n",
"my_third_list: [[1], [2]]\n",
"my_shallow_copy: [[1], [2]]\n",
"my_deep_copy: [[1], [2]]\n",
"after setting \"my_first_list[0][0] = 2\"\n",
"my_third_list: [[2], [2]]\n",
"my_shallow_copy: [[2], [2]]\n",
"my_deep_copy: [[1], [2]]\n"
]
}
],
"prompt_number": 13
},
{
"cell_type": "heading",
"level": 2,
"metadata": {},
"source": [
"Some Python oddity:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"a = 1\n",
"b = 1\n",
"print('a is b', bool(a is b))\n",
"True\n",
"\n",
"a = 999\n",
"b = 999\n",
"print('a is b', bool(a is b))\n"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"a is b True\n",
"a is b False\n"
]
}
],
"prompt_number": 1
}
],
"metadata": {}
}
]
}

File diff suppressed because it is too large Load Diff

View File

@ -1,628 +0,0 @@
{
"metadata": {
"name": "",
"signature": "sha256:5a2264b30b9632e14bd425a887a4455658fbdf9f8102fc5703ad982c3fa09b21"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Sebastian Raschka \n",
"last updated: 04/14/2014 \n",
"\n",
"[Link to this IPython Notebook on GitHub](https://github.com/rasbt/python_reference/blob/master/timeit_test.ipynb)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"\n",
"# Python benchmarks via `timeit`"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Sections\n",
"- [String operations](#string_operations)\n",
" - [String formatting: .format() vs. binary operator %s](#str_format_bin)\n",
" - [String reversing: [::-1] vs. `''.join(reversed())`](#str_reverse)\n",
" - [String concatenation: `+=` vs. `''.join()`](#string_concat)\n",
" - [Assembling strings](#string_assembly) \n",
"- [List operations](#list_operations)\n",
" - [List reversing: [::-1] vs. reverse() vs. reversed()](#list_reverse)\n",
" - [Creating lists using conditional statements](#create_cond_list)\n",
"- [Dictionary operations](#dict_ops) \n",
" - [Adding elements to a dictionary](#adding_dict_elements)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<a name='string_operations'></a>\n",
"\n",
"# String operations"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<a name='str_format_bin'></a>\n",
"## String formatting: `.format()` vs. binary operator `%s`\n",
"\n",
"We expect the string .format() method to perform slower than %, because it is doing the formatting for each object itself, where formatting via the binary % is hard-coded for known types. But let's see how big the difference really is..."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import timeit\n",
"\n",
"def test_format():\n",
" return ['{}'.format(i) for i in range(1000000)]\n",
"\n",
"def test_binaryop():\n",
" return ['%s' %i for i in range(1000000)]\n",
"\n",
"%timeit test_format()\n",
"%timeit test_binaryop()\n",
"\n",
"#\n",
"# Python 3.4.0\n",
"# MacOS X 10.9.2\n",
"# 2.5 GHz Intel Core i5\n",
"# 4 GB 1600 Mhz DDR3\n",
"#"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"1 loops, best of 3: 400 ms per loop\n",
"1 loops, best of 3: 241 ms per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n"
]
}
],
"prompt_number": 3
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<a name='str_reverse'></a>\n",
"## String reversing: `[::-1]` vs. `''.join(reversed())`"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import timeit\n",
"\n",
"def reverse_join(my_str):\n",
" return ''.join(reversed(my_str))\n",
" \n",
"def reverse_slizing(my_str):\n",
" return my_str[::-1]\n",
"\n",
"\n",
"# Test to show that both work\n",
"a = reverse_join('abcd')\n",
"b = reverse_slizing('abcd')\n",
"assert(a == b and a == 'dcba')\n",
"\n",
"%timeit reverse_join('abcd')\n",
"%timeit reverse_slizing('abcd')\n",
"\n",
"# Python 3.4.0\n",
"# MacOS X 10.9.2\n",
"# 2.4 GHz Intel Core Duo\n",
"# 8 GB 1067 Mhz DDR3\n",
"#"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"1000000 loops, best of 3: 1.28 \u00b5s per loop\n",
"1000000 loops, best of 3: 337 ns per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n"
]
}
],
"prompt_number": 13
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<a name='string_concat'></a>\n",
"## String concatenation: `+=` vs. `''.join()`"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Strings in Python are immutable objects. So, each time we append a character to a string, it has to be created \u201cfrom scratch\u201d in memory. Thus, the answer to the question \u201cWhat is the most efficient way to concatenate strings?\u201d is a quite obvious, but the relative numbers of performance gains are nonetheless interesting."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import timeit\n",
"\n",
"def string_add(in_chars):\n",
" new_str = ''\n",
" for char in in_chars:\n",
" new_str += char\n",
" return new_str\n",
"\n",
"def string_join(in_chars):\n",
" return ''.join(in_chars)\n",
"\n",
"test_chars = ['a', 'b', 'c', 'd', 'e', 'f']\n",
"\n",
"%timeit string_add(test_chars)\n",
"%timeit string_join(test_chars)\n",
"\n",
"#\n",
"# Python 3.4.0\n",
"# MacOS X 10.9.2\n",
"# 2.5 GHz Intel Core i5\n",
"# 4 GB 1600 Mhz DDR3\n",
"#"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"1000000 loops, best of 3: 595 ns per loop\n",
"1000000 loops, best of 3: 269 ns per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n"
]
}
],
"prompt_number": 16
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<a name='string_assembly'></a>\n",
"## Assembling strings\n",
"\n",
"Next, I wanted to compare different methods string \u201cassembly.\u201d This is different from simple string concatenation, which we have seen in the previous section, since we insert values into a string, e.g., from a variable."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import timeit\n",
"\n",
"def plus_operator():\n",
" return 'a' + str(1) + str(2) \n",
" \n",
"def format_method():\n",
" return 'a{}{}'.format(1,2)\n",
" \n",
"def binary_operator():\n",
" return 'a%s%s' %(1,2)\n",
"\n",
"%timeit plus_operator()\n",
"%timeit format_method()\n",
"%timeit binary_operator()\n",
"\n",
"#\n",
"# Python 3.4.0\n",
"# MacOS X 10.9.2\n",
"# 2.5 GHz Intel Core i5\n",
"# 4 GB 1600 Mhz DDR3\n",
"#"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"1000000 loops, best of 3: 764 ns per loop\n",
"1000000 loops, best of 3: 494 ns per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"10000000 loops, best of 3: 79.3 ns per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n"
]
}
],
"prompt_number": 17
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<a name='list_operations'></a>\n",
"# List operations"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<a name='list_reverse'></a>\n",
"## List reversing - `[::-1]` vs. `reverse()` vs. `reversed()`"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import timeit\n",
"\n",
"def reverse_func(my_list):\n",
" new_list = my_list[:]\n",
" new_list.reverse()\n",
" return new_list\n",
" \n",
"def reversed_func(my_list):\n",
" return list(reversed(my_list))\n",
"\n",
"def reverse_slizing(my_list):\n",
" return my_list[::-1]\n",
"\n",
"%timeit reverse_func([1,2,3,4,5])\n",
"%timeit reversed_func([1,2,3,4,5])\n",
"%timeit reverse_slizing([1,2,3,4,5])\n",
"\n",
"# Python 3.4.0\n",
"# MacOS X 10.9.2\n",
"# 2.4 GHz Intel Core Duo\n",
"# 8 GB 1067 Mhz DDR3\n",
"#"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"1000000 loops, best of 3: 930 ns per loop\n",
"1000000 loops, best of 3: 1.89 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"1000000 loops, best of 3: 775 ns per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n"
]
}
],
"prompt_number": 1
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<a name='create_cond_list'></a>\n",
"## Creating lists using conditional statements\n",
"\n",
"In this test, I attempted to figure out the fastest way to create a new list of elements that meet a certain criterion. For the sake of simplicity, the criterion was to check if an element is even or odd, and only if the element was even, it should be included in the list. For example, the resulting list for numbers in the range from 1 to 10 would be \n",
"[2, 4, 6, 8, 10].\n",
"\n",
"Here, I tested three different approaches: \n",
"1) a simple for loop with an if-statement check (`cond_loop()`) \n",
"2) a list comprehension (`list_compr()`) \n",
"3) the built-in filter() function (`filter_func()`) \n",
"\n",
"Note that the filter() function now returns a generator in Python 3, so I had to wrap it in an additional list() function call."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import timeit\n",
"\n",
"def cond_loop():\n",
" even_nums = []\n",
" for i in range(100):\n",
" if i % 2 == 0:\n",
" even_nums.append(i)\n",
" return even_nums\n",
"\n",
"def list_compr():\n",
" even_nums = [i for i in range(100) if i % 2 == 0]\n",
" return even_nums\n",
" \n",
"def filter_func():\n",
" even_nums = list(filter((lambda x: x % 2 != 0), range(100)))\n",
" return even_nums\n",
"\n",
"%timeit cond_loop()\n",
"%timeit list_compr()\n",
"%timeit filter_func()\n",
"\n",
"#\n",
"# Python 3.4.0\n",
"# MacOS X 10.9.2\n",
"# 2.5 GHz Intel Core i5\n",
"# 4 GB 1600 Mhz DDR3\n",
"#"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"100000 loops, best of 3: 14.4 \u00b5s per loop\n",
"100000 loops, best of 3: 12 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"10000 loops, best of 3: 23.9 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n"
]
}
],
"prompt_number": 14
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<a name='dict_ops'></a>\n",
"# Dictionary operations "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<a name='adding_dict_elements'></a>\n",
"## Adding elements to a Dictionary\n",
"\n",
"All three functions below count how often different elements (values) occur in a list. \n",
"E.g., for the list ['a', 'b', 'a', 'c'], the dictionary would look like this: \n",
"`my_dict = {'a': 2, 'b': 1, 'c': 1}`"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import random\n",
"import copy\n",
"import timeit\n",
"\n",
"\n",
"\n",
"def add_element_check1(my_dict, elements):\n",
" for e in elements:\n",
" if e not in my_dict:\n",
" my_dict[e] = 1\n",
" else:\n",
" my_dict[e] += 1\n",
" \n",
"def add_element_check2(my_dict, elements):\n",
" for e in elements:\n",
" if e not in my_dict:\n",
" my_dict[e] = 0\n",
" my_dict[e] += 1 \n",
"\n",
"def add_element_except(my_dict, elements):\n",
" for e in elements:\n",
" try:\n",
" my_dict[e] += 1\n",
" except KeyError:\n",
" my_dict[e] = 1\n",
" \n",
"\n",
"random.seed(123)\n",
"rand_ints = [random.randrange(1, 10) for i in range(100)]\n",
"empty_dict = {}\n",
"\n",
"print('Results for 100 integers in range 1-10') \n",
"%timeit add_element_check1(copy.deepcopy(empty_dict), rand_ints)\n",
"%timeit add_element_check2(copy.deepcopy(empty_dict), rand_ints)\n",
"%timeit add_element_except(copy.deepcopy(empty_dict), rand_ints)\n",
" \n",
"print('\\nResults for 1000 integers in range 1-10') \n",
"rand_ints = [random.randrange(1, 10) for i in range(1000)]\n",
"empty_dict = {}\n",
"\n",
"%timeit add_element_check1(copy.deepcopy(empty_dict), rand_ints)\n",
"%timeit add_element_check2(copy.deepcopy(empty_dict), rand_ints)\n",
"%timeit add_element_except(copy.deepcopy(empty_dict), rand_ints)\n",
"\n",
"print('\\nResults for 1000 integers in range 1-1000') \n",
"rand_ints = [random.randrange(1, 10) for i in range(1000)]\n",
"empty_dict = {}\n",
"\n",
"%timeit add_element_check1(copy.deepcopy(empty_dict), rand_ints)\n",
"%timeit add_element_check2(copy.deepcopy(empty_dict), rand_ints)\n",
"%timeit add_element_except(copy.deepcopy(empty_dict), rand_ints)\n",
"\n",
"#\n",
"# Python 3.4.0\n",
"# MacOS X 10.9.2\n",
"# 2.5 GHz Intel Core i5\n",
"# 4 GB 1600 Mhz DDR3\n",
"#"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Results for 100 integers in range 1-10\n",
"100000 loops, best of 3: 16.6 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"100000 loops, best of 3: 17.6 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"100000 loops, best of 3: 17.9 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"\n",
"Results for 1000 integers in range 1-10\n",
"10000 loops, best of 3: 135 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"10000 loops, best of 3: 125 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"10000 loops, best of 3: 105 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"\n",
"Results for 1000 integers in range 1-1000\n",
"10000 loops, best of 3: 122 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"10000 loops, best of 3: 123 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"10000 loops, best of 3: 104 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n"
]
}
],
"prompt_number": 13
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Conclusion\n",
"Interestingly, the `try-except` loop pays off if we have more elements (here: 1000 integers instead of 100) as dictionary keys to check. Also, it doesn't matter much whether the elements exist or do not exist in the dictionary, yet."
]
}
],
"metadata": {}
}
]
}

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long