row slice benchmark

This commit is contained in:
rasbt 2014-04-26 14:40:28 -04:00
parent fe21bc4838
commit cabe3a8c55
2 changed files with 476 additions and 20 deletions

View File

@ -1,7 +1,7 @@
{
"metadata": {
"name": "",
"signature": "sha256:9e62469f6250ac6d58e6d3d2c67a5995d1505778b147ae918d71f4ebc1c12fb6"
"signature": "sha256:8294645ddf3a6997a4764ca0bb61458953a6e579d858ecbf59e69882af05d0df"
},
"nbformat": 3,
"nbformat_minor": 0,
@ -65,7 +65,8 @@
"- [Dictionary operations](#dict_ops) \n",
" - [Adding elements to a dictionary](#adding_dict_elements)\n",
"- [Comprehensions vs. for-loops](#comprehensions)\n",
"- [Copying files by searching directory trees](#find_copy)"
"- [Copying files by searching directory trees](#find_copy)\n",
"- [Returning column vectors slicing through a numpy array](#row_vectors)"
]
},
{
@ -1409,14 +1410,6 @@
],
"prompt_number": 35
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
@ -1424,6 +1417,241 @@
"I have to say that I am really positively surprised. The shell's `find` scales even better than expected!"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<br>\n",
"<br>\n",
"<a name='row_vectors'></a>"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Returning column vectors slicing through a numpy array"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Given a numpy matrix, I want to iterate through it and return each column as a 1-column vector. \n",
"E.g., if I want to return the 1st column from matrix A below\n",
"\n",
"<pre>\n",
"A = np.array([ [1,2,3], [4,5,6], [7,8,9] ])\n",
">>> A\n",
"array([[1, 2, 3],\n",
" [4, 5, 6],\n",
" [7, 8, 9]])</pre>\n",
"\n",
"I want my result to be:\n",
"<pre>\n",
"array([[1],\n",
" [4],\n",
" [7]])</pre>\n",
"\n",
"with `.shape` = `(3,1)`\n",
"\n",
"\n",
"However, the default behavior of numpy is to return the column as a row vector:\n",
"\n",
"<pre>\n",
">>> A[:,0]\n",
"array([1, 4, 7])\n",
">>> A[:,0].shape\n",
"(3,)\n",
"</pre>"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import numpy as np\n",
"\n",
"def colvec_method1(A):\n",
" for col in A.T:\n",
" colvec = row[:,np.newaxis]\n",
" yield colvec"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 83
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def colvec_method2(A):\n",
" for idx in range(A.shape[1]):\n",
" colvec = A[:,idx:idx+1]\n",
" yield colvec"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 82
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def colvec_method3(A):\n",
" for idx in range(A.shape[1]):\n",
" colvec = A[:,idx].reshape(A.shape[0],1)\n",
" yield colvec"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 81
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def colvec_method4(A):\n",
" for idx in range(A.shape[1]):\n",
" colvec = np.vstack(A[:,idx])\n",
" yield colvec"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 79
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def colvec_method5(A):\n",
" for idx in range(A.shape[1]):\n",
" colvec = np.row_stack(A[:,idx])\n",
" yield colvec"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 77
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def colvec_method6(A):\n",
" for idx in range(A.shape[1]):\n",
" colvec = np.column_stack((A[:,idx],))\n",
" yield colvec"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 74
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def test_method(method, A):\n",
" for i in method(A): \n",
" assert i.shape == (A.shape[0],1), \"{}, {}\".format(i.shape, A.shape[0],1)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 69
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import timeit\n",
"\n",
"A = np.random.random((300, 3))\n",
"\n",
"for method in [\n",
" colvec_method1, colvec_method2, \n",
" colvec_method3, colvec_method4, \n",
" colvec_method5, colvec_method6]:\n",
" print('\\nTest:', method.__name__)\n",
" %timeit test_method(colvec_method2, A)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Test: colvec_method1\n",
"100000 loops, best of 3: 17.7 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"\n",
"Test: colvec_method2\n",
"10000 loops, best of 3: 16.4 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"\n",
"Test: colvec_method3\n",
"100000 loops, best of 3: 17.3 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"\n",
"Test: colvec_method4\n",
"10000 loops, best of 3: 16.4 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"\n",
"Test: colvec_method5\n",
"100000 loops, best of 3: 17.1 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"\n",
"Test: colvec_method6\n",
"100000 loops, best of 3: 16.6 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n"
]
}
],
"prompt_number": 86
},
{
"cell_type": "code",
"collapsed": false,

View File

@ -1,7 +1,7 @@
{
"metadata": {
"name": "",
"signature": "sha256:9e62469f6250ac6d58e6d3d2c67a5995d1505778b147ae918d71f4ebc1c12fb6"
"signature": "sha256:8294645ddf3a6997a4764ca0bb61458953a6e579d858ecbf59e69882af05d0df"
},
"nbformat": 3,
"nbformat_minor": 0,
@ -65,7 +65,8 @@
"- [Dictionary operations](#dict_ops) \n",
" - [Adding elements to a dictionary](#adding_dict_elements)\n",
"- [Comprehensions vs. for-loops](#comprehensions)\n",
"- [Copying files by searching directory trees](#find_copy)"
"- [Copying files by searching directory trees](#find_copy)\n",
"- [Returning column vectors slicing through a numpy array](#row_vectors)"
]
},
{
@ -1409,14 +1410,6 @@
],
"prompt_number": 35
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
@ -1424,6 +1417,241 @@
"I have to say that I am really positively surprised. The shell's `find` scales even better than expected!"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<br>\n",
"<br>\n",
"<a name='row_vectors'></a>"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Returning column vectors slicing through a numpy array"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Given a numpy matrix, I want to iterate through it and return each column as a 1-column vector. \n",
"E.g., if I want to return the 1st column from matrix A below\n",
"\n",
"<pre>\n",
"A = np.array([ [1,2,3], [4,5,6], [7,8,9] ])\n",
">>> A\n",
"array([[1, 2, 3],\n",
" [4, 5, 6],\n",
" [7, 8, 9]])</pre>\n",
"\n",
"I want my result to be:\n",
"<pre>\n",
"array([[1],\n",
" [4],\n",
" [7]])</pre>\n",
"\n",
"with `.shape` = `(3,1)`\n",
"\n",
"\n",
"However, the default behavior of numpy is to return the column as a row vector:\n",
"\n",
"<pre>\n",
">>> A[:,0]\n",
"array([1, 4, 7])\n",
">>> A[:,0].shape\n",
"(3,)\n",
"</pre>"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import numpy as np\n",
"\n",
"def colvec_method1(A):\n",
" for col in A.T:\n",
" colvec = row[:,np.newaxis]\n",
" yield colvec"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 83
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def colvec_method2(A):\n",
" for idx in range(A.shape[1]):\n",
" colvec = A[:,idx:idx+1]\n",
" yield colvec"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 82
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def colvec_method3(A):\n",
" for idx in range(A.shape[1]):\n",
" colvec = A[:,idx].reshape(A.shape[0],1)\n",
" yield colvec"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 81
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def colvec_method4(A):\n",
" for idx in range(A.shape[1]):\n",
" colvec = np.vstack(A[:,idx])\n",
" yield colvec"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 79
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def colvec_method5(A):\n",
" for idx in range(A.shape[1]):\n",
" colvec = np.row_stack(A[:,idx])\n",
" yield colvec"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 77
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def colvec_method6(A):\n",
" for idx in range(A.shape[1]):\n",
" colvec = np.column_stack((A[:,idx],))\n",
" yield colvec"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 74
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def test_method(method, A):\n",
" for i in method(A): \n",
" assert i.shape == (A.shape[0],1), \"{}, {}\".format(i.shape, A.shape[0],1)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 69
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import timeit\n",
"\n",
"A = np.random.random((300, 3))\n",
"\n",
"for method in [\n",
" colvec_method1, colvec_method2, \n",
" colvec_method3, colvec_method4, \n",
" colvec_method5, colvec_method6]:\n",
" print('\\nTest:', method.__name__)\n",
" %timeit test_method(colvec_method2, A)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Test: colvec_method1\n",
"100000 loops, best of 3: 17.7 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"\n",
"Test: colvec_method2\n",
"10000 loops, best of 3: 16.4 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"\n",
"Test: colvec_method3\n",
"100000 loops, best of 3: 17.3 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"\n",
"Test: colvec_method4\n",
"10000 loops, best of 3: 16.4 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"\n",
"Test: colvec_method5\n",
"100000 loops, best of 3: 17.1 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"\n",
"Test: colvec_method6\n",
"100000 loops, best of 3: 16.6 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n"
]
}
],
"prompt_number": 86
},
{
"cell_type": "code",
"collapsed": false,