From 24c5642d240ce9ab3db7acd138a2e5dd2131099a Mon Sep 17 00:00:00 2001 From: rasbt Date: Sat, 24 May 2014 20:06:31 -0400 Subject: [PATCH] added unicode section --- ...ey_differences_between_python_2_and_3.html | 352 ++++++++++++++++++ ...y_differences_between_python_2_and_3.ipynb | 235 +++++++++++- 2 files changed, 586 insertions(+), 1 deletion(-) diff --git a/tutorials/key_differences_between_python_2_and_3.html b/tutorials/key_differences_between_python_2_and_3.html index fca0c8f..815982a 100644 --- a/tutorials/key_differences_between_python_2_and_3.html +++ b/tutorials/key_differences_between_python_2_and_3.html @@ -1754,6 +1754,7 @@ document.write(''+'email'+'<\/'+'a'+'>');
  • Using the __future__ module

  • The print function

  • Integer division

  • +
  • Unicode

  • xrange

  • Raising exceptions

  • Handling exceptions

  • @@ -2250,6 +2251,357 @@ Python 3.4.1 + +
    +
    +
    +
    +
    +



    +
    +
    +
    +
    +
    +
    +
    +
    +

    Unicode

    +
    +
    +
    + +
    +
    +
    + +
    +
    +
    +
    +
    +
    +

    Python 2 has ASCII str() types, separate unicode(), but no byte type.

    +

    Now, in Python 3, we finally have Unicode (utf-8) strings, and 2 byte classes: byte and bytearrays.

    +
    +
    +
    +
    +
    +
    +
    +
    +

    Python 2

    +
    +
    +
    +
    +
    +
    +In [2]: +
    +
    +
    +
    print 'Python', python_version()
    +
    + +
    +
    +
    + +
    +
    + + +
    +
    +
    +Python 2.7.6
    +
    +
    +
    +
    + +
    +
    + +
    +
    +
    +
    +In [3]: +
    +
    +
    +
    print type(unicode('this is like a python3 str type'))
    +
    + +
    +
    +
    + +
    +
    + + +
    +
    +
    +<type 'unicode'>
    +
    +
    +
    +
    + +
    +
    + +
    +
    +
    +
    +In [4]: +
    +
    +
    +
    print type(b'byte type does not exist')
    +
    + +
    +
    +
    + +
    +
    + + +
    +
    +
    +<type 'str'>
    +
    +
    +
    +
    + +
    +
    + +
    +
    +
    +
    +In [5]: +
    +
    +
    +
    print 'they are really' + b' the same'
    +
    + +
    +
    +
    + +
    +
    + + +
    +
    +
    +they are really the same
    +
    +
    +
    +
    + +
    +
    + +
    +
    +
    +
    +In [7]: +
    +
    +
    +
    print type(bytearray(b'bytearray oddly does exist though'))
    +
    + +
    +
    +
    + +
    +
    + + +
    +
    +
    +<type 'bytearray'>
    +
    +
    +
    +
    + +
    +
    + +
    +
    +
    +
    +
    +
    +


    +
    +
    +
    +
    +
    +
    +
    +
    +

    Python 3

    +
    +
    +
    +
    +
    +
    +In [6]: +
    +
    +
    +
    print('Python', python_version())
    +print('strings are now utf-8 \u03BCnico\u0394é!')
    +
    + +
    +
    +
    + +
    +
    + + +
    +
    +
    +Python 3.4.1
    +strings are now utf-8 μnicoΔé!
    +
    +
    +
    +
    + +
    +
    + +
    +
    +
    +
    +In [8]: +
    +
    +
    +
    print('Python', python_version(), end="")
    +print(' has', type(b' bytes for storing data'))
    +
    + +
    +
    +
    + +
    +
    + + +
    +
    +
    +Python 3.4.1 has <class 'bytes'>
    +
    +
    +
    +
    + +
    +
    + +
    +
    +
    +
    +In [11]: +
    +
    +
    +
    print('and Python', python_version(), end="")
    +print(' also has', type(bytearray(b'bytearrays')))
    +
    + +
    +
    +
    + +
    +
    + + +
    +
    +
    +and Python 3.4.1 also has <class 'bytearray'>
    +
    +
    +
    +
    + +
    +
    + +
    +
    +
    +
    +In [13]: +
    +
    +
    +
    'note that we cannot add a string' + b'bytes for data'
    +
    + +
    +
    +
    + +
    +
    + + +
    +
    +
    +---------------------------------------------------------------------------
    +TypeError                                 Traceback (most recent call last)
    +<ipython-input-13-d3e8942ccf81> in <module>()
    +----> 1 'note that we cannot add a string' + b'bytes for data'
    +
    +TypeError: Can't convert 'bytes' object to str implicitly
    +
    +
    + +
    +
    +
    diff --git a/tutorials/key_differences_between_python_2_and_3.ipynb b/tutorials/key_differences_between_python_2_and_3.ipynb index 3486254..92ee2ab 100644 --- a/tutorials/key_differences_between_python_2_and_3.ipynb +++ b/tutorials/key_differences_between_python_2_and_3.ipynb @@ -1,7 +1,7 @@ { "metadata": { "name": "", - "signature": "sha256:b9ba6aad52c6458ba2d9ee8691fcace515fc01f1029499253de1f48cc664fbc0" + "signature": "sha256:bce64714d9af46abdf20f98e6b5b0b51cd1240612c1dbd99a40d812aea22dcdf" }, "nbformat": 3, "nbformat_minor": 0, @@ -79,6 +79,8 @@ "\n", "- [Integer division](#Integer-division)\n", "\n", + "- [Unicode](#Unicode)\n", + "\n", "- [xrange](#xrange)\n", "\n", "- [Raising exceptions](#Raising-exceptions)\n", @@ -434,6 +436,237 @@ "
    " ] }, + { + "cell_type": "heading", + "level": 2, + "metadata": {}, + "source": [ + "Unicode" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[[back to the section-overview](#Sections)]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Python 2 has ASCII `str()` types, separate `unicode()`, but no `byte` type. \n", + "\n", + "Now, in Python 3, we finally have Unicode (utf-8) `str`ings, and 2 byte classes: `byte` and `bytearray`s." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Python 2" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "print 'Python', python_version()" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "Python 2.7.6\n" + ] + } + ], + "prompt_number": 2 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "print type(unicode('this is like a python3 str type'))" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n" + ] + } + ], + "prompt_number": 3 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "print type(b'byte type does not exist')" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n" + ] + } + ], + "prompt_number": 4 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "print 'they are really' + b' the same'" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "they are really the same\n" + ] + } + ], + "prompt_number": 5 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "print type(bytearray(b'bytearray oddly does exist though'))" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n" + ] + } + ], + "prompt_number": 7 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Python 3" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "print('Python', python_version())\n", + "print('strings are now utf-8 \\u03BCnico\\u0394\u00e9!')" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "Python 3.4.1\n", + "strings are now utf-8 \u03bcnico\u0394\u00e9!\n" + ] + } + ], + "prompt_number": 6 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "print('Python', python_version(), end=\"\")\n", + "print(' has', type(b' bytes for storing data'))" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "Python 3.4.1 has \n" + ] + } + ], + "prompt_number": 8 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "print('and Python', python_version(), end=\"\")\n", + "print(' also has', type(bytearray(b'bytearrays')))" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "and Python 3.4.1 also has \n" + ] + } + ], + "prompt_number": 11 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "'note that we cannot add a string' + b'bytes for data'" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "Can't convert 'bytes' object to str implicitly", + "output_type": "pyerr", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;34m'note that we cannot add a string'\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34mb'bytes for data'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m: Can't convert 'bytes' object to str implicitly" + ] + } + ], + "prompt_number": 13 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
    \n", + "
    " + ] + }, { "cell_type": "heading", "level": 2,