{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/riyajaiswal25/MLProjects/blob/main/DigitRecognitionusingRandomForestClassifier.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "hdd4dapuroBk"
      },
      "source": [
        "#  Digit Recognition using Random Forest Classifier"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "k_cWcYTUsWdE"
      },
      "source": [
        "**Import Basic Library**"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {
        "id": "t6uu8CVZrllI"
      },
      "outputs": [],
      "source": [
        "import pandas as pd\n",
        "import numpy as np\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "S_X9qpm0s4uq"
      },
      "source": [
        "**Choosing Dataset**"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 2,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 73
        },
        "id": "ERRZ3tkOOYFA",
        "outputId": "5f8f4aae-398b-4e33-e2c2-53de23174401"
      },
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ],
            "text/html": [
              "\n",
              "     <input type=\"file\" id=\"files-7634f8da-a56b-480e-a6d2-1cc2533a486a\" name=\"files[]\" multiple disabled\n",
              "        style=\"border:none\" />\n",
              "     <output id=\"result-7634f8da-a56b-480e-a6d2-1cc2533a486a\">\n",
              "      Upload widget is only available when the cell has been executed in the\n",
              "      current browser session. Please rerun this cell to enable.\n",
              "      </output>\n",
              "      <script>// Copyright 2017 Google LLC\n",
              "//\n",
              "// Licensed under the Apache License, Version 2.0 (the \"License\");\n",
              "// you may not use this file except in compliance with the License.\n",
              "// You may obtain a copy of the License at\n",
              "//\n",
              "//      http://www.apache.org/licenses/LICENSE-2.0\n",
              "//\n",
              "// Unless required by applicable law or agreed to in writing, software\n",
              "// distributed under the License is distributed on an \"AS IS\" BASIS,\n",
              "// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
              "// See the License for the specific language governing permissions and\n",
              "// limitations under the License.\n",
              "\n",
              "/**\n",
              " * @fileoverview Helpers for google.colab Python module.\n",
              " */\n",
              "(function(scope) {\n",
              "function span(text, styleAttributes = {}) {\n",
              "  const element = document.createElement('span');\n",
              "  element.textContent = text;\n",
              "  for (const key of Object.keys(styleAttributes)) {\n",
              "    element.style[key] = styleAttributes[key];\n",
              "  }\n",
              "  return element;\n",
              "}\n",
              "\n",
              "// Max number of bytes which will be uploaded at a time.\n",
              "const MAX_PAYLOAD_SIZE = 100 * 1024;\n",
              "\n",
              "function _uploadFiles(inputId, outputId) {\n",
              "  const steps = uploadFilesStep(inputId, outputId);\n",
              "  const outputElement = document.getElementById(outputId);\n",
              "  // Cache steps on the outputElement to make it available for the next call\n",
              "  // to uploadFilesContinue from Python.\n",
              "  outputElement.steps = steps;\n",
              "\n",
              "  return _uploadFilesContinue(outputId);\n",
              "}\n",
              "\n",
              "// This is roughly an async generator (not supported in the browser yet),\n",
              "// where there are multiple asynchronous steps and the Python side is going\n",
              "// to poll for completion of each step.\n",
              "// This uses a Promise to block the python side on completion of each step,\n",
              "// then passes the result of the previous step as the input to the next step.\n",
              "function _uploadFilesContinue(outputId) {\n",
              "  const outputElement = document.getElementById(outputId);\n",
              "  const steps = outputElement.steps;\n",
              "\n",
              "  const next = steps.next(outputElement.lastPromiseValue);\n",
              "  return Promise.resolve(next.value.promise).then((value) => {\n",
              "    // Cache the last promise value to make it available to the next\n",
              "    // step of the generator.\n",
              "    outputElement.lastPromiseValue = value;\n",
              "    return next.value.response;\n",
              "  });\n",
              "}\n",
              "\n",
              "/**\n",
              " * Generator function which is called between each async step of the upload\n",
              " * process.\n",
              " * @param {string} inputId Element ID of the input file picker element.\n",
              " * @param {string} outputId Element ID of the output display.\n",
              " * @return {!Iterable<!Object>} Iterable of next steps.\n",
              " */\n",
              "function* uploadFilesStep(inputId, outputId) {\n",
              "  const inputElement = document.getElementById(inputId);\n",
              "  inputElement.disabled = false;\n",
              "\n",
              "  const outputElement = document.getElementById(outputId);\n",
              "  outputElement.innerHTML = '';\n",
              "\n",
              "  const pickedPromise = new Promise((resolve) => {\n",
              "    inputElement.addEventListener('change', (e) => {\n",
              "      resolve(e.target.files);\n",
              "    });\n",
              "  });\n",
              "\n",
              "  const cancel = document.createElement('button');\n",
              "  inputElement.parentElement.appendChild(cancel);\n",
              "  cancel.textContent = 'Cancel upload';\n",
              "  const cancelPromise = new Promise((resolve) => {\n",
              "    cancel.onclick = () => {\n",
              "      resolve(null);\n",
              "    };\n",
              "  });\n",
              "\n",
              "  // Wait for the user to pick the files.\n",
              "  const files = yield {\n",
              "    promise: Promise.race([pickedPromise, cancelPromise]),\n",
              "    response: {\n",
              "      action: 'starting',\n",
              "    }\n",
              "  };\n",
              "\n",
              "  cancel.remove();\n",
              "\n",
              "  // Disable the input element since further picks are not allowed.\n",
              "  inputElement.disabled = true;\n",
              "\n",
              "  if (!files) {\n",
              "    return {\n",
              "      response: {\n",
              "        action: 'complete',\n",
              "      }\n",
              "    };\n",
              "  }\n",
              "\n",
              "  for (const file of files) {\n",
              "    const li = document.createElement('li');\n",
              "    li.append(span(file.name, {fontWeight: 'bold'}));\n",
              "    li.append(span(\n",
              "        `(${file.type || 'n/a'}) - ${file.size} bytes, ` +\n",
              "        `last modified: ${\n",
              "            file.lastModifiedDate ? file.lastModifiedDate.toLocaleDateString() :\n",
              "                                    'n/a'} - `));\n",
              "    const percent = span('0% done');\n",
              "    li.appendChild(percent);\n",
              "\n",
              "    outputElement.appendChild(li);\n",
              "\n",
              "    const fileDataPromise = new Promise((resolve) => {\n",
              "      const reader = new FileReader();\n",
              "      reader.onload = (e) => {\n",
              "        resolve(e.target.result);\n",
              "      };\n",
              "      reader.readAsArrayBuffer(file);\n",
              "    });\n",
              "    // Wait for the data to be ready.\n",
              "    let fileData = yield {\n",
              "      promise: fileDataPromise,\n",
              "      response: {\n",
              "        action: 'continue',\n",
              "      }\n",
              "    };\n",
              "\n",
              "    // Use a chunked sending to avoid message size limits. See b/62115660.\n",
              "    let position = 0;\n",
              "    do {\n",
              "      const length = Math.min(fileData.byteLength - position, MAX_PAYLOAD_SIZE);\n",
              "      const chunk = new Uint8Array(fileData, position, length);\n",
              "      position += length;\n",
              "\n",
              "      const base64 = btoa(String.fromCharCode.apply(null, chunk));\n",
              "      yield {\n",
              "        response: {\n",
              "          action: 'append',\n",
              "          file: file.name,\n",
              "          data: base64,\n",
              "        },\n",
              "      };\n",
              "\n",
              "      let percentDone = fileData.byteLength === 0 ?\n",
              "          100 :\n",
              "          Math.round((position / fileData.byteLength) * 100);\n",
              "      percent.textContent = `${percentDone}% done`;\n",
              "\n",
              "    } while (position < fileData.byteLength);\n",
              "  }\n",
              "\n",
              "  // All done.\n",
              "  yield {\n",
              "    response: {\n",
              "      action: 'complete',\n",
              "    }\n",
              "  };\n",
              "}\n",
              "\n",
              "scope.google = scope.google || {};\n",
              "scope.google.colab = scope.google.colab || {};\n",
              "scope.google.colab._files = {\n",
              "  _uploadFiles,\n",
              "  _uploadFilesContinue,\n",
              "};\n",
              "})(self);\n",
              "</script> "
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Saving train[1].csv to train[1].csv\n"
          ]
        }
      ],
      "source": [
        "from google.colab import files\n",
        "uploaded = files.upload()"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "**Load Dataset**"
      ],
      "metadata": {
        "id": "TJRApm0w0Dct"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "dataset = pd.read_csv('train.csv')"
      ],
      "metadata": {
        "id": "GyOvJOoR0Lhq"
      },
      "execution_count": 4,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "**Summarize dataset**"
      ],
      "metadata": {
        "id": "0txmydWY0ZEH"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "print(dataset.shape)\n",
        "print(dataset.head(5))"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "AW-9ITV10cIY",
        "outputId": "dce2cb6d-2bdb-41e5-de9e-baf122900140"
      },
      "execution_count": 5,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "(42000, 785)\n",
            "   label  pixel0  pixel1  pixel2  pixel3  pixel4  pixel5  pixel6  pixel7  \\\n",
            "0      1       0       0       0       0       0       0       0       0   \n",
            "1      0       0       0       0       0       0       0       0       0   \n",
            "2      1       0       0       0       0       0       0       0       0   \n",
            "3      4       0       0       0       0       0       0       0       0   \n",
            "4      0       0       0       0       0       0       0       0       0   \n",
            "\n",
            "   pixel8  ...  pixel774  pixel775  pixel776  pixel777  pixel778  pixel779  \\\n",
            "0       0  ...         0         0         0         0         0         0   \n",
            "1       0  ...         0         0         0         0         0         0   \n",
            "2       0  ...         0         0         0         0         0         0   \n",
            "3       0  ...         0         0         0         0         0         0   \n",
            "4       0  ...         0         0         0         0         0         0   \n",
            "\n",
            "   pixel780  pixel781  pixel782  pixel783  \n",
            "0         0         0         0         0  \n",
            "1         0         0         0         0  \n",
            "2         0         0         0         0  \n",
            "3         0         0         0         0  \n",
            "4         0         0         0         0  \n",
            "\n",
            "[5 rows x 785 columns]\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "**Segregate Dataset into X(Input/Independent Variable) & Y(Output/Dependent Variable)**"
      ],
      "metadata": {
        "id": "QUh5BKq20viv"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "X = dataset.iloc[:,1:]\n",
        "print(X)\n",
        "print(X.shape)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "OP2TX3iX09ND",
        "outputId": "9c8f44e2-a503-4acf-8978-f6576706e402"
      },
      "execution_count": 6,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "       pixel0  pixel1  pixel2  pixel3  pixel4  pixel5  pixel6  pixel7  pixel8  \\\n",
            "0           0       0       0       0       0       0       0       0       0   \n",
            "1           0       0       0       0       0       0       0       0       0   \n",
            "2           0       0       0       0       0       0       0       0       0   \n",
            "3           0       0       0       0       0       0       0       0       0   \n",
            "4           0       0       0       0       0       0       0       0       0   \n",
            "...       ...     ...     ...     ...     ...     ...     ...     ...     ...   \n",
            "41995       0       0       0       0       0       0       0       0       0   \n",
            "41996       0       0       0       0       0       0       0       0       0   \n",
            "41997       0       0       0       0       0       0       0       0       0   \n",
            "41998       0       0       0       0       0       0       0       0       0   \n",
            "41999       0       0       0       0       0       0       0       0       0   \n",
            "\n",
            "       pixel9  ...  pixel774  pixel775  pixel776  pixel777  pixel778  \\\n",
            "0           0  ...         0         0         0         0         0   \n",
            "1           0  ...         0         0         0         0         0   \n",
            "2           0  ...         0         0         0         0         0   \n",
            "3           0  ...         0         0         0         0         0   \n",
            "4           0  ...         0         0         0         0         0   \n",
            "...       ...  ...       ...       ...       ...       ...       ...   \n",
            "41995       0  ...         0         0         0         0         0   \n",
            "41996       0  ...         0         0         0         0         0   \n",
            "41997       0  ...         0         0         0         0         0   \n",
            "41998       0  ...         0         0         0         0         0   \n",
            "41999       0  ...         0         0         0         0         0   \n",
            "\n",
            "       pixel779  pixel780  pixel781  pixel782  pixel783  \n",
            "0             0         0         0         0         0  \n",
            "1             0         0         0         0         0  \n",
            "2             0         0         0         0         0  \n",
            "3             0         0         0         0         0  \n",
            "4             0         0         0         0         0  \n",
            "...         ...       ...       ...       ...       ...  \n",
            "41995         0         0         0         0         0  \n",
            "41996         0         0         0         0         0  \n",
            "41997         0         0         0         0         0  \n",
            "41998         0         0         0         0         0  \n",
            "41999         0         0         0         0         0  \n",
            "\n",
            "[42000 rows x 784 columns]\n",
            "(42000, 784)\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "Y = dataset.iloc[:,0]\n",
        "print(Y)\n",
        "print(Y.shape)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "2RuBl7671GH4",
        "outputId": "96d6afef-f2ed-420f-d95c-826a287fa8dd"
      },
      "execution_count": 7,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "0        1\n",
            "1        0\n",
            "2        1\n",
            "3        4\n",
            "4        0\n",
            "        ..\n",
            "41995    0\n",
            "41996    1\n",
            "41997    7\n",
            "41998    6\n",
            "41999    9\n",
            "Name: label, Length: 42000, dtype: int64\n",
            "(42000,)\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "**Splitting Dataset into Test and Train**"
      ],
      "metadata": {
        "id": "o1j-AGZd1OQV"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.model_selection import train_test_split\n",
        "X_train, X_test, y_train, y_test = train_test_split(X,Y, test_size = 0.25, random_state = 0)"
      ],
      "metadata": {
        "id": "U_c_R4HA1SeZ"
      },
      "execution_count": 8,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "**Training**"
      ],
      "metadata": {
        "id": "Gf6EgvAc1vjh"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.ensemble import RandomForestClassifier\n",
        "model = RandomForestClassifier()\n",
        "model.fit(X_train, y_train)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "RS4TAnDh1yUU",
        "outputId": "4803259d-f3a1-461f-d3d0-939bc4495a64"
      },
      "execution_count": 9,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "RandomForestClassifier()"
            ]
          },
          "metadata": {},
          "execution_count": 9
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "y_pred = model.predict(X_test)"
      ],
      "metadata": {
        "id": "SljeEEbs2JFT"
      },
      "execution_count": 10,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "**Model Accuracy**"
      ],
      "metadata": {
        "id": "4XEvHILm2OF-"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.metrics import accuracy_score\n",
        "print(\"Accuracy of the Model: {0}%\".format(accuracy_score(y_test, y_pred)*100))"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "sHEVc1Qq2Rqy",
        "outputId": "06be6e32-1ba4-4035-eafb-3b3c2023abd6"
      },
      "execution_count": 11,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Accuracy of the Model: 96.31428571428572%\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import matplotlib.pyplot as plt\n",
        "index=10\n",
        "print(\"Predicted \" +str(model.predict(X_test)[index]))\n",
        "plt.axis('off')\n",
        "plt.imshow(X_test.iloc[index].values.reshape((28,28)),cmap='gray')"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 283
        },
        "id": "iymJ1Zpj20gk",
        "outputId": "ae21ce24-b957-4a30-8f04-ec5c77dd5a53"
      },
      "execution_count": 13,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Predicted 7\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "<matplotlib.image.AxesImage at 0x7ff128cbac90>"
            ]
          },
          "metadata": {},
          "execution_count": 13
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<Figure size 432x288 with 1 Axes>"
            ],
            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAOcAAADnCAYAAADl9EEgAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAF8UlEQVR4nO3dPWtUaxuG4XdJ4kelJqJVEAuriAja2NhaWSmxs7Ky1UoJCAEhRdKKhWBjIdqJaUQEsRFELNQ/IDaCgqIEP3DtOuyse9zzTjLXTI6j9GKShXLuB/bDzDRt2/4PyLNt2A8ArE+cEEqcEEqcEEqcEGqiGpum8b9yYYO1bdus9+dOTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgg1MewHGEcLCwvl/vPnz85tdXW1fO21a9fKfc+ePeX+58+fch+mGzdudG7z8/Ob+CQZnJwQSpwQSpwQSpwQSpwQSpwQSpwQqmnbtntsmu5xjO3evbvcFxcXy/3ChQvlvn379v/8TH+raZpyr/69h+3Nmzed27FjxzbxSTZX27br/qM5OSGUOCGUOCGUOCGUOCGUOCGUOCHUlrznPHXqVLnfvn273A8dOjTIxxmoUb7n/P79e+fW6+55lLnnhBEjTgglTgglTgglTgglTgglTgg1tp9be+LEic7t0aNH5Wt37do16McZmF6fa/vt27dy/3/uObdtq/9bvm/fvr5/Nv/m5IRQ4oRQ4oRQ4oRQ4oRQ4oRQ4oRQY3vPWX3OafI95srKSrlfv3693F+9ejXAp1lr586d5f748eNyP3nyZLlPTk52brOzs+Vr3759W+6jyMkJocQJocQJocQJocQJocQJocb2KmVubm5ov/v379/lfuvWrc5tfn6+fO3Xr1/7eqZR8OvXr85tHK9KenFyQihxQihxQihxQihxQihxQihxQqixvec8cODAhv3sXh9PefHixXK/d+/eIB9n00xPT5d7r7eE9VLdc25FTk4IJU4IJU4IJU4IJU4IJU4IJU4INbb3nA8ePOjcpqamytfevXu33JeXl8v948eP5T6qqr/TQTh37tyG/vxR4+SEUOKEUOKEUOKEUOKEUOKEUOKEUE3btt1j03SPI2xmZqbc379/v0lPkufgwYOd27t378rX7tixo9w/fPhQ7kePHu3cvnz5Ur52lLVt26z3505OCCVOCCVOCCVOCCVOCCVOCCVOCDW27+esbOV7zF6uXLnSufW6x+zl2bNn5T7Od5n9cHJCKHFCKHFCKHFCKHFCKHFCqC15lTLOJicny31ubq7cL1261Pfv/vHjR7k/efKk75+9FTk5IZQ4IZQ4IZQ4IZQ4IZQ4IZQ4IZR7zjHT655yaWmp3KuPSu1lYWGh3O/cudP3z96KnJwQSpwQSpwQSpwQSpwQSpwQSpwQakt+BeAoO3LkSLk/ffq03Pfu3dv37/78+XO5Hz58uNx99OX6fAUgjBhxQihxQihxQihxQihxQihxQijv5wxz/Pjxcl9ZWSn3qampcu/1fs3V1dXO7fz58+Vr3WMOlpMTQokTQokTQokTQokTQokTQrlKGYKZmZnO7eHDh+Vrp6enB/04aywvL3duvd6OxmA5OSGUOCGUOCGUOCGUOCGUOCGUOCGUe84NMDs7W+5Xr17t3Pbv3z/ox1nj5cuX5b64uLihv5+/5+SEUOKEUOKEUOKEUOKEUOKEUOKEUL4CsA8TE/X18P3798v9zJkzg3ycNT59+lTup0+fLvfXr18P8nH4C74CEEaMOCGUOCGUOCGUOCGUOCGUOCGU93P24ebNm+U+zHvMy5cvl7t7zNHh5IRQ4oRQ4oRQ4oRQ4oRQ4oRQ4oRQ7jn70OtzaTfS2bNny/358+eb9CRsNCcnhBInhBInhBInhBInhBInhHKVEmZpaancX7x4sUlPwrA5OSGUOCGUOCGUOCGUOCGUOCGUOCGUrwCEIfMVgDBixAmhxAmhxAmhxAmhxAmhxAmhyntOYHicnBBKnBBKnBBKnBBKnBBKnBDqH2Wm9vKr3NQPAAAAAElFTkSuQmCC\n"
          },
          "metadata": {
            "needs_background": "light"
          }
        }
      ]
    }
  ],
  "metadata": {
    "colab": {
      "provenance": [],
      "authorship_tag": "ABX9TyOBzEe2vR1rQh4B8yWT0mhr",
      "include_colab_link": true
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}