diff --git a/python_patterns/patterns.ipynb b/python_patterns/patterns.ipynb
index fc3e45f..2a769ba 100644
--- a/python_patterns/patterns.ipynb
+++ b/python_patterns/patterns.ipynb
@@ -1,7 +1,7 @@
{
"metadata": {
"name": "",
- "signature": "sha256:0c9d8c8b65b0eec5bb7c2a2790f08a1e49daf27dac2c9dcfe8d85ce958046a2c"
+ "signature": "sha256:714a46a359c5b1c3e7e7bd4d19d73221f9def5bcb806840be82541070041d29e"
},
"nbformat": 3,
"nbformat_minor": 0,
@@ -57,6 +57,7 @@
"- [Differences between 2 files](#Differences-between-2-files)\n",
"- [Differences between successive elements in a list](#Differences-between-successive-elements-in-a-list)\n",
"- [Doctest example](#Doctest-example)\n",
+ "- [English language detection](#English-language-detection)\n",
"- [File browsing basics](#File-browsing-basics)\n",
"- [File reading basics](#File-reading-basics)\n",
"- [Indices of min and max elements from a list](#Indices-of-min-and-max-elements-from-a-list)\n",
@@ -595,6 +596,61 @@
"
"
]
},
+ {
+ "cell_type": "heading",
+ "level": 2,
+ "metadata": {},
+ "source": [
+ "English language detection"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "[back to top](#Table-of-Contents)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "import nltk\n",
+ "\n",
+ "def eng_ratio(text):\n",
+ " ''' Returns the ratio of non-English to English words from a text '''\n",
+ "\n",
+ " english_vocab = set(w.lower() for w in nltk.corpus.words.words()) \n",
+ " text_vocab = set(w.lower() for w in text.split() if w.lower().isalpha()) \n",
+ " unusual = text_vocab.difference(english_vocab)\n",
+ " diff = len(unusual)/len(text_vocab)\n",
+ " return diff\n",
+ " \n",
+ "text = 'This is a test fahrrad'\n",
+ "\n",
+ "print(eng_ratio(text))"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "stream",
+ "stream": "stdout",
+ "text": [
+ "0.2\n"
+ ]
+ }
+ ],
+ "prompt_number": 1
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "
"
+ ]
+ },
{
"cell_type": "heading",
"level": 2,