diff --git a/Projects/DigitRecognitionusingRandomForestClassifier.ipynb b/Projects/DigitRecognitionusingRandomForestClassifier.ipynb new file mode 100644 index 0000000..b440c98 --- /dev/null +++ b/Projects/DigitRecognitionusingRandomForestClassifier.ipynb @@ -0,0 +1,626 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hdd4dapuroBk" + }, + "source": [ + "# Digit Recognition using Random Forest Classifier" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "k_cWcYTUsWdE" + }, + "source": [ + "**Import Basic Library**" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "t6uu8CVZrllI" + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "S_X9qpm0s4uq" + }, + "source": [ + "**Choosing Dataset**" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 73 + }, + "id": "ERRZ3tkOOYFA", + "outputId": "5f8f4aae-398b-4e33-e2c2-53de23174401" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + " \n", + " \n", + " Upload widget is only available when the cell has been executed in the\n", + " current browser session. Please rerun this cell to enable.\n", + " \n", + " " + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Saving train[1].csv to train[1].csv\n" + ] + } + ], + "source": [ + "from google.colab import files\n", + "uploaded = files.upload()" + ] + }, + { + "cell_type": "markdown", + "source": [ + "**Load Dataset**" + ], + "metadata": { + "id": "TJRApm0w0Dct" + } + }, + { + "cell_type": "code", + "source": [ + "dataset = pd.read_csv('train.csv')" + ], + "metadata": { + "id": "GyOvJOoR0Lhq" + }, + "execution_count": 4, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "**Summarize dataset**" + ], + "metadata": { + "id": "0txmydWY0ZEH" + } + }, + { + "cell_type": "code", + "source": [ + "print(dataset.shape)\n", + "print(dataset.head(5))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "AW-9ITV10cIY", + "outputId": "dce2cb6d-2bdb-41e5-de9e-baf122900140" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(42000, 785)\n", + " label pixel0 pixel1 pixel2 pixel3 pixel4 pixel5 pixel6 pixel7 \\\n", + "0 1 0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 0 0 \n", + "2 1 0 0 0 0 0 0 0 0 \n", + "3 4 0 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 0 0 \n", + "\n", + " pixel8 ... pixel774 pixel775 pixel776 pixel777 pixel778 pixel779 \\\n", + "0 0 ... 0 0 0 0 0 0 \n", + "1 0 ... 0 0 0 0 0 0 \n", + "2 0 ... 0 0 0 0 0 0 \n", + "3 0 ... 0 0 0 0 0 0 \n", + "4 0 ... 0 0 0 0 0 0 \n", + "\n", + " pixel780 pixel781 pixel782 pixel783 \n", + "0 0 0 0 0 \n", + "1 0 0 0 0 \n", + "2 0 0 0 0 \n", + "3 0 0 0 0 \n", + "4 0 0 0 0 \n", + "\n", + "[5 rows x 785 columns]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "**Segregate Dataset into X(Input/Independent Variable) & Y(Output/Dependent Variable)**" + ], + "metadata": { + "id": "QUh5BKq20viv" + } + }, + { + "cell_type": "code", + "source": [ + "X = dataset.iloc[:,1:]\n", + "print(X)\n", + "print(X.shape)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "OP2TX3iX09ND", + "outputId": "9c8f44e2-a503-4acf-8978-f6576706e402" + }, + "execution_count": 6, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " pixel0 pixel1 pixel2 pixel3 pixel4 pixel5 pixel6 pixel7 pixel8 \\\n", + "0 0 0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 0 0 \n", + "... ... ... ... ... ... ... ... ... ... \n", + "41995 0 0 0 0 0 0 0 0 0 \n", + "41996 0 0 0 0 0 0 0 0 0 \n", + "41997 0 0 0 0 0 0 0 0 0 \n", + "41998 0 0 0 0 0 0 0 0 0 \n", + "41999 0 0 0 0 0 0 0 0 0 \n", + "\n", + " pixel9 ... pixel774 pixel775 pixel776 pixel777 pixel778 \\\n", + "0 0 ... 0 0 0 0 0 \n", + "1 0 ... 0 0 0 0 0 \n", + "2 0 ... 0 0 0 0 0 \n", + "3 0 ... 0 0 0 0 0 \n", + "4 0 ... 0 0 0 0 0 \n", + "... ... ... ... ... ... ... ... \n", + "41995 0 ... 0 0 0 0 0 \n", + "41996 0 ... 0 0 0 0 0 \n", + "41997 0 ... 0 0 0 0 0 \n", + "41998 0 ... 0 0 0 0 0 \n", + "41999 0 ... 0 0 0 0 0 \n", + "\n", + " pixel779 pixel780 pixel781 pixel782 pixel783 \n", + "0 0 0 0 0 0 \n", + "1 0 0 0 0 0 \n", + "2 0 0 0 0 0 \n", + "3 0 0 0 0 0 \n", + "4 0 0 0 0 0 \n", + "... ... ... ... ... ... \n", + "41995 0 0 0 0 0 \n", + "41996 0 0 0 0 0 \n", + "41997 0 0 0 0 0 \n", + "41998 0 0 0 0 0 \n", + "41999 0 0 0 0 0 \n", + "\n", + "[42000 rows x 784 columns]\n", + "(42000, 784)\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "Y = dataset.iloc[:,0]\n", + "print(Y)\n", + "print(Y.shape)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "2RuBl7671GH4", + "outputId": "96d6afef-f2ed-420f-d95c-826a287fa8dd" + }, + "execution_count": 7, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "0 1\n", + "1 0\n", + "2 1\n", + "3 4\n", + "4 0\n", + " ..\n", + "41995 0\n", + "41996 1\n", + "41997 7\n", + "41998 6\n", + "41999 9\n", + "Name: label, Length: 42000, dtype: int64\n", + "(42000,)\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "**Splitting Dataset into Test and Train**" + ], + "metadata": { + "id": "o1j-AGZd1OQV" + } + }, + { + "cell_type": "code", + "source": [ + "from sklearn.model_selection import train_test_split\n", + "X_train, X_test, y_train, y_test = train_test_split(X,Y, test_size = 0.25, random_state = 0)" + ], + "metadata": { + "id": "U_c_R4HA1SeZ" + }, + "execution_count": 8, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "**Training**" + ], + "metadata": { + "id": "Gf6EgvAc1vjh" + } + }, + { + "cell_type": "code", + "source": [ + "from sklearn.ensemble import RandomForestClassifier\n", + "model = RandomForestClassifier()\n", + "model.fit(X_train, y_train)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "RS4TAnDh1yUU", + "outputId": "4803259d-f3a1-461f-d3d0-939bc4495a64" + }, + "execution_count": 9, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "RandomForestClassifier()" + ] + }, + "metadata": {}, + "execution_count": 9 + } + ] + }, + { + "cell_type": "code", + "source": [ + "y_pred = model.predict(X_test)" + ], + "metadata": { + "id": "SljeEEbs2JFT" + }, + "execution_count": 10, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "**Model Accuracy**" + ], + "metadata": { + "id": "4XEvHILm2OF-" + } + }, + { + "cell_type": "code", + "source": [ + "from sklearn.metrics import accuracy_score\n", + "print(\"Accuracy of the Model: {0}%\".format(accuracy_score(y_test, y_pred)*100))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "sHEVc1Qq2Rqy", + "outputId": "06be6e32-1ba4-4035-eafb-3b3c2023abd6" + }, + "execution_count": 11, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Accuracy of the Model: 96.31428571428572%\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import matplotlib.pyplot as plt\n", + "index=10\n", + "print(\"Predicted \" +str(model.predict(X_test)[index]))\n", + "plt.axis('off')\n", + "plt.imshow(X_test.iloc[index].values.reshape((28,28)),cmap='gray')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 283 + }, + "id": "iymJ1Zpj20gk", + "outputId": "ae21ce24-b957-4a30-8f04-ec5c77dd5a53" + }, + "execution_count": 13, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Predicted 7\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 13 + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAOcAAADnCAYAAADl9EEgAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAF8UlEQVR4nO3dPWtUaxuG4XdJ4kelJqJVEAuriAja2NhaWSmxs7Ky1UoJCAEhRdKKhWBjIdqJaUQEsRFELNQ/IDaCgqIEP3DtOuyse9zzTjLXTI6j9GKShXLuB/bDzDRt2/4PyLNt2A8ArE+cEEqcEEqcEEqcEGqiGpum8b9yYYO1bdus9+dOTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgglTgg1MewHGEcLCwvl/vPnz85tdXW1fO21a9fKfc+ePeX+58+fch+mGzdudG7z8/Ob+CQZnJwQSpwQSpwQSpwQSpwQSpwQSpwQqmnbtntsmu5xjO3evbvcFxcXy/3ChQvlvn379v/8TH+raZpyr/69h+3Nmzed27FjxzbxSTZX27br/qM5OSGUOCGUOCGUOCGUOCGUOCGUOCHUlrznPHXqVLnfvn273A8dOjTIxxmoUb7n/P79e+fW6+55lLnnhBEjTgglTgglTgglTgglTgglTgg1tp9be+LEic7t0aNH5Wt37do16McZmF6fa/vt27dy/3/uObdtq/9bvm/fvr5/Nv/m5IRQ4oRQ4oRQ4oRQ4oRQ4oRQ4oRQY3vPWX3OafI95srKSrlfv3693F+9ejXAp1lr586d5f748eNyP3nyZLlPTk52brOzs+Vr3759W+6jyMkJocQJocQJocQJocQJocQJocb2KmVubm5ov/v379/lfuvWrc5tfn6+fO3Xr1/7eqZR8OvXr85tHK9KenFyQihxQihxQihxQihxQihxQihxQqixvec8cODAhv3sXh9PefHixXK/d+/eIB9n00xPT5d7r7eE9VLdc25FTk4IJU4IJU4IJU4IJU4IJU4IJU4INbb3nA8ePOjcpqamytfevXu33JeXl8v948eP5T6qqr/TQTh37tyG/vxR4+SEUOKEUOKEUOKEUOKEUOKEUOKEUE3btt1j03SPI2xmZqbc379/v0lPkufgwYOd27t378rX7tixo9w/fPhQ7kePHu3cvnz5Ur52lLVt26z3505OCCVOCCVOCCVOCCVOCCVOCCVOCDW27+esbOV7zF6uXLnSufW6x+zl2bNn5T7Od5n9cHJCKHFCKHFCKHFCKHFCKHFCqC15lTLOJicny31ubq7cL1261Pfv/vHjR7k/efKk75+9FTk5IZQ4IZQ4IZQ4IZQ4IZQ4IZQ4IZR7zjHT655yaWmp3KuPSu1lYWGh3O/cudP3z96KnJwQSpwQSpwQSpwQSpwQSpwQSpwQakt+BeAoO3LkSLk/ffq03Pfu3dv37/78+XO5Hz58uNx99OX6fAUgjBhxQihxQihxQihxQihxQihxQijv5wxz/Pjxcl9ZWSn3qampcu/1fs3V1dXO7fz58+Vr3WMOlpMTQokTQokTQokTQokTQokTQrlKGYKZmZnO7eHDh+Vrp6enB/04aywvL3duvd6OxmA5OSGUOCGUOCGUOCGUOCGUOCGUOCGUe84NMDs7W+5Xr17t3Pbv3z/ox1nj5cuX5b64uLihv5+/5+SEUOKEUOKEUOKEUOKEUOKEUOKEUL4CsA8TE/X18P3798v9zJkzg3ycNT59+lTup0+fLvfXr18P8nH4C74CEEaMOCGUOCGUOCGUOCGUOCGUOCGU93P24ebNm+U+zHvMy5cvl7t7zNHh5IRQ4oRQ4oRQ4oRQ4oRQ4oRQ4oRQ7jn70OtzaTfS2bNny/358+eb9CRsNCcnhBInhBInhBInhBInhBInhHKVEmZpaancX7x4sUlPwrA5OSGUOCGUOCGUOCGUOCGUOCGUOCGUrwCEIfMVgDBixAmhxAmhxAmhxAmhxAmhxAmhyntOYHicnBBKnBBKnBBKnBBKnBBKnBDqH2Wm9vKr3NQPAAAAAElFTkSuQmCC\n" + }, + "metadata": { + "needs_background": "light" + } + } + ] + } + ], + "metadata": { + "colab": { + "provenance": [], + "authorship_tag": "ABX9TyOBzEe2vR1rQh4B8yWT0mhr", + "include_colab_link": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file