{ "cells": [ { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "view-in-github" }, "source": [ "<a href=\"https://colab.research.google.com/github/riyajaiswal25/MLProjects/blob/main/DigitRecognitionusingRandomForestClassifier.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" ] }, { "cell_type": "markdown", "metadata": { "id": "hdd4dapuroBk" }, "source": [ "# Digit Recognition using Random Forest Classifier" ] }, { "cell_type": "markdown", "metadata": { "id": "k_cWcYTUsWdE" }, "source": [ "**Import Basic Library**" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "id": "t6uu8CVZrllI" }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n" ] }, { "cell_type": "markdown", "metadata": { "id": "S_X9qpm0s4uq" }, "source": [ "**Choosing Dataset**" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 73 }, "id": "ERRZ3tkOOYFA", "outputId": "5f8f4aae-398b-4e33-e2c2-53de23174401" }, "outputs": [ { "data": { "text/html": [ "\n", " <input type=\"file\" id=\"files-7634f8da-a56b-480e-a6d2-1cc2533a486a\" name=\"files[]\" multiple disabled\n", " style=\"border:none\" />\n", " <output id=\"result-7634f8da-a56b-480e-a6d2-1cc2533a486a\">\n", " Upload widget is only available when the cell has been executed in the\n", " current browser session. Please rerun this cell to enable.\n", " </output>\n", " <script>// Copyright 2017 Google LLC\n", "//\n", "// Licensed under the Apache License, Version 2.0 (the \"License\");\n", "// you may not use this file except in compliance with the License.\n", "// You may obtain a copy of the License at\n", "//\n", "// http://www.apache.org/licenses/LICENSE-2.0\n", "//\n", "// Unless required by applicable law or agreed to in writing, software\n", "// distributed under the License is distributed on an \"AS IS\" BASIS,\n", "// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", "// See the License for the specific language governing permissions and\n", "// limitations under the License.\n", "\n", "/**\n", " * @fileoverview Helpers for google.colab Python module.\n", " */\n", "(function(scope) {\n", "function span(text, styleAttributes = {}) {\n", " const element = document.createElement('span');\n", " element.textContent = text;\n", " for (const key of Object.keys(styleAttributes)) {\n", " element.style[key] = styleAttributes[key];\n", " }\n", " return element;\n", "}\n", "\n", "// Max number of bytes which will be uploaded at a time.\n", "const MAX_PAYLOAD_SIZE = 100 * 1024;\n", "\n", "function _uploadFiles(inputId, outputId) {\n", " const steps = uploadFilesStep(inputId, outputId);\n", " const outputElement = document.getElementById(outputId);\n", " // Cache steps on the outputElement to make it available for the next call\n", " // to uploadFilesContinue from Python.\n", " outputElement.steps = steps;\n", "\n", " return _uploadFilesContinue(outputId);\n", "}\n", "\n", "// This is roughly an async generator (not supported in the browser yet),\n", "// where there are multiple asynchronous steps and the Python side is going\n", "// to poll for completion of each step.\n", "// This uses a Promise to block the python side on completion of each step,\n", "// then passes the result of the previous step as the input to the next step.\n", "function _uploadFilesContinue(outputId) {\n", " const outputElement = document.getElementById(outputId);\n", " const steps = outputElement.steps;\n", "\n", " const next = steps.next(outputElement.lastPromiseValue);\n", " return Promise.resolve(next.value.promise).then((value) => {\n", " // Cache the last promise value to make it available to the next\n", " // step of the generator.\n", " outputElement.lastPromiseValue = value;\n", " return next.value.response;\n", " });\n", "}\n", "\n", "/**\n", " * Generator function which is called between each async step of the upload\n", " * process.\n", " * @param {string} inputId Element ID of the input file picker element.\n", " * @param {string} outputId Element ID of the output display.\n", " * @return {!Iterable<!Object>} Iterable of next steps.\n", " */\n", "function* uploadFilesStep(inputId, outputId) {\n", " const inputElement = document.getElementById(inputId);\n", " inputElement.disabled = false;\n", "\n", " const outputElement = document.getElementById(outputId);\n", " outputElement.innerHTML = '';\n", "\n", " const pickedPromise = new Promise((resolve) => {\n", " inputElement.addEventListener('change', (e) => {\n", " resolve(e.target.files);\n", " });\n", " });\n", "\n", " const cancel = document.createElement('button');\n", " inputElement.parentElement.appendChild(cancel);\n", " cancel.textContent = 'Cancel upload';\n", " const cancelPromise = new Promise((resolve) => {\n", " cancel.onclick = () => {\n", " resolve(null);\n", " };\n", " });\n", "\n", " // Wait for the user to pick the files.\n", " const files = yield {\n", " promise: Promise.race([pickedPromise, cancelPromise]),\n", " response: {\n", " action: 'starting',\n", " }\n", " };\n", "\n", " cancel.remove();\n", "\n", " // Disable the input element since further picks are not allowed.\n", " inputElement.disabled = true;\n", "\n", " if (!files) {\n", " return {\n", " response: {\n", " action: 'complete',\n", " }\n", " };\n", " }\n", "\n", " for (const file of files) {\n", " const li = document.createElement('li');\n", " li.append(span(file.name, {fontWeight: 'bold'}));\n", " li.append(span(\n", " `(${file.type || 'n/a'}) - ${file.size} bytes, ` +\n", " `last modified: ${\n", " file.lastModifiedDate ? file.lastModifiedDate.toLocaleDateString() :\n", " 'n/a'} - `));\n", " const percent = span('0% done');\n", " li.appendChild(percent);\n", "\n", " outputElement.appendChild(li);\n", "\n", " const fileDataPromise = new Promise((resolve) => {\n", " const reader = new FileReader();\n", " reader.onload = (e) => {\n", " resolve(e.target.result);\n", " };\n", " reader.readAsArrayBuffer(file);\n", " });\n", " // Wait for the data to be ready.\n", " let fileData = yield {\n", " promise: fileDataPromise,\n", " response: {\n", " action: 'continue',\n", " }\n", " };\n", "\n", " // Use a chunked sending to avoid message size limits. See b/62115660.\n", " let position = 0;\n", " do {\n", " const length = Math.min(fileData.byteLength - position, MAX_PAYLOAD_SIZE);\n", " const chunk = new Uint8Array(fileData, position, length);\n", " position += length;\n", "\n", " const base64 = btoa(String.fromCharCode.apply(null, chunk));\n", " yield {\n", " response: {\n", " action: 'append',\n", " file: file.name,\n", " data: base64,\n", " },\n", " };\n", "\n", " let percentDone = fileData.byteLength === 0 ?\n", " 100 :\n", " Math.round((position / fileData.byteLength) * 100);\n", " percent.textContent = `${percentDone}% done`;\n", "\n", " } while (position < fileData.byteLength);\n", " }\n", "\n", " // All done.\n", " yield {\n", " response: {\n", " action: 'complete',\n", " }\n", " };\n", "}\n", "\n", "scope.google = scope.google || {};\n", "scope.google.colab = scope.google.colab || {};\n", "scope.google.colab._files = {\n", " _uploadFiles,\n", " _uploadFilesContinue,\n", "};\n", "})(self);\n", "</script> " ], "text/plain": [ "<IPython.core.display.HTML object>" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Saving train[1].csv to train[1].csv\n" ] } ], "source": [ "from google.colab import files\n", "uploaded = files.upload()" ] }, { "cell_type": "markdown", "metadata": { "id": "TJRApm0w0Dct" }, "source": [ "**Load Dataset**" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "id": "GyOvJOoR0Lhq" }, "outputs": [], "source": [ "dataset = pd.read_csv('train.csv')" ] }, { "cell_type": "markdown", "metadata": { "id": "0txmydWY0ZEH" }, "source": [ "**Summarize dataset**" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "AW-9ITV10cIY", "outputId": "dce2cb6d-2bdb-41e5-de9e-baf122900140" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(42000, 785)\n", " label pixel0 pixel1 pixel2 pixel3 pixel4 pixel5 pixel6 pixel7 \\\n", "0 1 0 0 0 0 0 0 0 0 \n", "1 0 0 0 0 0 0 0 0 0 \n", "2 1 0 0 0 0 0 0 0 0 \n", "3 4 0 0 0 0 0 0 0 0 \n", "4 0 0 0 0 0 0 0 0 0 \n", "\n", " pixel8 ... pixel774 pixel775 pixel776 pixel777 pixel778 pixel779 \\\n", "0 0 ... 0 0 0 0 0 0 \n", "1 0 ... 0 0 0 0 0 0 \n", "2 0 ... 0 0 0 0 0 0 \n", "3 0 ... 0 0 0 0 0 0 \n", "4 0 ... 0 0 0 0 0 0 \n", "\n", " pixel780 pixel781 pixel782 pixel783 \n", "0 0 0 0 0 \n", "1 0 0 0 0 \n", "2 0 0 0 0 \n", "3 0 0 0 0 \n", "4 0 0 0 0 \n", "\n", "[5 rows x 785 columns]\n" ] } ], "source": [ "print(dataset.shape)\n", "print(dataset.head(5))" ] }, { "cell_type": "markdown", "metadata": { "id": "QUh5BKq20viv" }, "source": [ "**Segregate Dataset into X(Input/Independent Variable) & Y(Output/Dependent Variable)**" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "OP2TX3iX09ND", "outputId": "9c8f44e2-a503-4acf-8978-f6576706e402" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " pixel0 pixel1 pixel2 pixel3 pixel4 pixel5 pixel6 pixel7 pixel8 \\\n", "0 0 0 0 0 0 0 0 0 0 \n", "1 0 0 0 0 0 0 0 0 0 \n", "2 0 0 0 0 0 0 0 0 0 \n", "3 0 0 0 0 0 0 0 0 0 \n", "4 0 0 0 0 0 0 0 0 0 \n", "... ... ... ... ... ... ... ... ... ... \n", "41995 0 0 0 0 0 0 0 0 0 \n", "41996 0 0 0 0 0 0 0 0 0 \n", "41997 0 0 0 0 0 0 0 0 0 \n", "41998 0 0 0 0 0 0 0 0 0 \n", "41999 0 0 0 0 0 0 0 0 0 \n", "\n", " pixel9 ... pixel774 pixel775 pixel776 pixel777 pixel778 \\\n", "0 0 ... 0 0 0 0 0 \n", "1 0 ... 0 0 0 0 0 \n", "2 0 ... 0 0 0 0 0 \n", "3 0 ... 0 0 0 0 0 \n", "4 0 ... 0 0 0 0 0 \n", "... ... ... ... ... ... ... ... \n", "41995 0 ... 0 0 0 0 0 \n", "41996 0 ... 0 0 0 0 0 \n", "41997 0 ... 0 0 0 0 0 \n", "41998 0 ... 0 0 0 0 0 \n", "41999 0 ... 0 0 0 0 0 \n", "\n", " pixel779 pixel780 pixel781 pixel782 pixel783 \n", "0 0 0 0 0 0 \n", "1 0 0 0 0 0 \n", "2 0 0 0 0 0 \n", "3 0 0 0 0 0 \n", "4 0 0 0 0 0 \n", "... ... ... ... ... ... \n", "41995 0 0 0 0 0 \n", "41996 0 0 0 0 0 \n", "41997 0 0 0 0 0 \n", "41998 0 0 0 0 0 \n", "41999 0 0 0 0 0 \n", "\n", "[42000 rows x 784 columns]\n", "(42000, 784)\n" ] } ], "source": [ "X = dataset.iloc[:,1:]\n", "print(X)\n", "print(X.shape)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "2RuBl7671GH4", "outputId": "96d6afef-f2ed-420f-d95c-826a287fa8dd" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0 1\n", "1 0\n", "2 1\n", "3 4\n", "4 0\n", " ..\n", "41995 0\n", "41996 1\n", "41997 7\n", "41998 6\n", "41999 9\n", "Name: label, Length: 42000, dtype: int64\n", "(42000,)\n" ] } ], "source": [ "Y = dataset.iloc[:,0]\n", "print(Y)\n", "print(Y.shape)" ] }, { "cell_type": "markdown", "metadata": { "id": "o1j-AGZd1OQV" }, "source": [ "**Splitting Dataset into Test and Train**" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "id": "U_c_R4HA1SeZ" }, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "X_train, X_test, y_train, y_test = train_test_split(X,Y, test_size = 0.25, random_state = 0)" ] }, { "cell_type": "markdown", "metadata": { "id": "Gf6EgvAc1vjh" }, "source": [ "**Training**" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "RS4TAnDh1yUU", "outputId": "4803259d-f3a1-461f-d3d0-939bc4495a64" }, "outputs": [ { "data": { "text/plain": [ "RandomForestClassifier()" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.ensemble import RandomForestClassifier\n", "model = RandomForestClassifier()\n", "model.fit(X_train, y_train)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "id": "SljeEEbs2JFT" }, "outputs": [], "source": [ "y_pred = model.predict(X_test)" ] }, { "cell_type": "markdown", "metadata": { "id": "4XEvHILm2OF-" }, "source": [ "**Model Accuracy**" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "sHEVc1Qq2Rqy", "outputId": "06be6e32-1ba4-4035-eafb-3b3c2023abd6" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy of the Model: 96.31428571428572%\n" ] } ], "source": [ "from sklearn.metrics import accuracy_score\n", "print(\"Accuracy of the Model: {0}%\".format(accuracy_score(y_test, y_pred)*100))" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 283 }, "id": "iymJ1Zpj20gk", "outputId": "ae21ce24-b957-4a30-8f04-ec5c77dd5a53" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Predicted 7\n" ] }, { "data": { "text/plain": [ "<matplotlib.image.AxesImage at 0x7ff128cbac90>" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAOcAAADnCAYAAADl9EEgAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAF8UlEQVR4nO3dPWtUaxuG4XdJ4kelJqJVEAuriAja2NhaWSmxs7Ky1UoJCAEhRdKKhWBjIdqJaUQEsRFELNQ/IDaCgqIEP3DtOuyse9zzTjLXTI6j9GKShXLuB/bDzDRt2/4PyLNt2A8ArE+cEEqcEEqcEEqcEGqiGpum8b9yYYO1bdus9+dOTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgg1MewHGEcLCwvl/vPnz85tdXW1fO21a9fKfc+ePeX+58+fch+mGzdudG7z8/Ob+CQZnJwQSpwQSpwQSpwQSpwQSpwQSpwQqmnbtntsmu5xjO3evbvcFxcXy/3ChQvlvn379v/8TH+raZpyr/69h+3Nmzed27FjxzbxSTZX27br/qM5OSGUOCGUOCGUOCGUOCGUOCGUOCHUlrznPHXqVLnfvn273A8dOjTIxxmoUb7n/P79e+fW6+55lLnnhBEjTgglTgglTgglTgglTgglTgg1tp9be+LEic7t0aNH5Wt37do16McZmF6fa/vt27dy/3/uObdtq/9bvm/fvr5/Nv/m5IRQ4oRQ4oRQ4oRQ4oRQ4oRQ4oRQY3vPWX3OafI95srKSrlfv3693F+9ejXAp1lr586d5f748eNyP3nyZLlPTk52brOzs+Vr3759W+6jyMkJocQJocQJocQJocQJocQJocb2KmVubm5ov/v379/lfuvWrc5tfn6+fO3Xr1/7eqZR8OvXr85tHK9KenFyQihxQihxQihxQihxQihxQihxQqixvec8cODAhv3sXh9PefHixXK/d+/eIB9n00xPT5d7r7eE9VLdc25FTk4IJU4IJU4IJU4IJU4IJU4IJU4INbb3nA8ePOjcpqamytfevXu33JeXl8v948eP5T6qqr/TQTh37tyG/vxR4+SEUOKEUOKEUOKEUOKEUOKEUOKEUE3btt1j03SPI2xmZqbc379/v0lPkufgwYOd27t378rX7tixo9w/fPhQ7kePHu3cvnz5Ur52lLVt26z3505OCCVOCCVOCCVOCCVOCCVOCCVOCDW27+esbOV7zF6uXLnSufW6x+zl2bNn5T7Od5n9cHJCKHFCKHFCKHFCKHFCKHFCqC15lTLOJicny31ubq7cL1261Pfv/vHjR7k/efKk75+9FTk5IZQ4IZQ4IZQ4IZQ4IZQ4IZQ4IZR7zjHT655yaWmp3KuPSu1lYWGh3O/cudP3z96KnJwQSpwQSpwQSpwQSpwQSpwQSpwQakt+BeAoO3LkSLk/ffq03Pfu3dv37/78+XO5Hz58uNx99OX6fAUgjBhxQihxQihxQihxQihxQihxQijv5wxz/Pjxcl9ZWSn3qampcu/1fs3V1dXO7fz58+Vr3WMOlpMTQokTQokTQokTQokTQokTQrlKGYKZmZnO7eHDh+Vrp6enB/04aywvL3duvd6OxmA5OSGUOCGUOCGUOCGUOCGUOCGUOCGUe84NMDs7W+5Xr17t3Pbv3z/ox1nj5cuX5b64uLihv5+/5+SEUOKEUOKEUOKEUOKEUOKEUOKEUL4CsA8TE/X18P3798v9zJkzg3ycNT59+lTup0+fLvfXr18P8nH4C74CEEaMOCGUOCGUOCGUOCGUOCGUOCGU93P24ebNm+U+zHvMy5cvl7t7zNHh5IRQ4oRQ4oRQ4oRQ4oRQ4oRQ4oRQ7jn70OtzaTfS2bNny/358+eb9CRsNCcnhBInhBInhBInhBInhBInhHKVEmZpaancX7x4sUlPwrA5OSGUOCGUOCGUOCGUOCGUOCGUOCGUrwCEIfMVgDBixAmhxAmhxAmhxAmhxAmhxAmhyntOYHicnBBKnBBKnBBKnBBKnBBKnBDqH2Wm9vKr3NQPAAAAAElFTkSuQmCC", "text/plain": [ "<Figure size 432x288 with 1 Axes>" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import matplotlib.pyplot as plt\n", "index=10\n", "print(\"Predicted \" +str(model.predict(X_test)[index]))\n", "plt.axis('off')\n", "plt.imshow(X_test.iloc[index].values.reshape((28,28)),cmap='gray')" ] } ], "metadata": { "colab": { "authorship_tag": "ABX9TyOBzEe2vR1rQh4B8yWT0mhr", "include_colab_link": true, "provenance": [] }, "kernelspec": { "display_name": "Python 3.8.9 64-bit", "language": "python", "name": "python3" }, "language_info": { "name": "python", "version": "3.8.9" }, "vscode": { "interpreter": { "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" } } }, "nbformat": 4, "nbformat_minor": 0 }