From 8a0c02e7f4914a26f13e7bac657dfd606ddddf4c Mon Sep 17 00:00:00 2001
From: Riya Jaiswal <84279900+riyajaiswal25@users.noreply.github.com>
Date: Fri, 7 Oct 2022 21:49:36 +0530
Subject: [PATCH] ML based project on algo Random Forest Classifier
---
...cognitionusingRandomForestClassifier.ipynb | 626 ++++++++++++++++++
1 file changed, 626 insertions(+)
create mode 100644 Projects/DigitRecognitionusingRandomForestClassifier.ipynb
diff --git a/Projects/DigitRecognitionusingRandomForestClassifier.ipynb b/Projects/DigitRecognitionusingRandomForestClassifier.ipynb
new file mode 100644
index 0000000..b440c98
--- /dev/null
+++ b/Projects/DigitRecognitionusingRandomForestClassifier.ipynb
@@ -0,0 +1,626 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "view-in-github",
+ "colab_type": "text"
+ },
+ "source": [
+ ""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "hdd4dapuroBk"
+ },
+ "source": [
+ "# Digit Recognition using Random Forest Classifier"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "k_cWcYTUsWdE"
+ },
+ "source": [
+ "**Import Basic Library**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "id": "t6uu8CVZrllI"
+ },
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "S_X9qpm0s4uq"
+ },
+ "source": [
+ "**Choosing Dataset**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 73
+ },
+ "id": "ERRZ3tkOOYFA",
+ "outputId": "5f8f4aae-398b-4e33-e2c2-53de23174401"
+ },
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ ""
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ " "
+ ]
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Saving train[1].csv to train[1].csv\n"
+ ]
+ }
+ ],
+ "source": [
+ "from google.colab import files\n",
+ "uploaded = files.upload()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "**Load Dataset**"
+ ],
+ "metadata": {
+ "id": "TJRApm0w0Dct"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "dataset = pd.read_csv('train.csv')"
+ ],
+ "metadata": {
+ "id": "GyOvJOoR0Lhq"
+ },
+ "execution_count": 4,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "**Summarize dataset**"
+ ],
+ "metadata": {
+ "id": "0txmydWY0ZEH"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "print(dataset.shape)\n",
+ "print(dataset.head(5))"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "AW-9ITV10cIY",
+ "outputId": "dce2cb6d-2bdb-41e5-de9e-baf122900140"
+ },
+ "execution_count": 5,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "(42000, 785)\n",
+ " label pixel0 pixel1 pixel2 pixel3 pixel4 pixel5 pixel6 pixel7 \\\n",
+ "0 1 0 0 0 0 0 0 0 0 \n",
+ "1 0 0 0 0 0 0 0 0 0 \n",
+ "2 1 0 0 0 0 0 0 0 0 \n",
+ "3 4 0 0 0 0 0 0 0 0 \n",
+ "4 0 0 0 0 0 0 0 0 0 \n",
+ "\n",
+ " pixel8 ... pixel774 pixel775 pixel776 pixel777 pixel778 pixel779 \\\n",
+ "0 0 ... 0 0 0 0 0 0 \n",
+ "1 0 ... 0 0 0 0 0 0 \n",
+ "2 0 ... 0 0 0 0 0 0 \n",
+ "3 0 ... 0 0 0 0 0 0 \n",
+ "4 0 ... 0 0 0 0 0 0 \n",
+ "\n",
+ " pixel780 pixel781 pixel782 pixel783 \n",
+ "0 0 0 0 0 \n",
+ "1 0 0 0 0 \n",
+ "2 0 0 0 0 \n",
+ "3 0 0 0 0 \n",
+ "4 0 0 0 0 \n",
+ "\n",
+ "[5 rows x 785 columns]\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "**Segregate Dataset into X(Input/Independent Variable) & Y(Output/Dependent Variable)**"
+ ],
+ "metadata": {
+ "id": "QUh5BKq20viv"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "X = dataset.iloc[:,1:]\n",
+ "print(X)\n",
+ "print(X.shape)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "OP2TX3iX09ND",
+ "outputId": "9c8f44e2-a503-4acf-8978-f6576706e402"
+ },
+ "execution_count": 6,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ " pixel0 pixel1 pixel2 pixel3 pixel4 pixel5 pixel6 pixel7 pixel8 \\\n",
+ "0 0 0 0 0 0 0 0 0 0 \n",
+ "1 0 0 0 0 0 0 0 0 0 \n",
+ "2 0 0 0 0 0 0 0 0 0 \n",
+ "3 0 0 0 0 0 0 0 0 0 \n",
+ "4 0 0 0 0 0 0 0 0 0 \n",
+ "... ... ... ... ... ... ... ... ... ... \n",
+ "41995 0 0 0 0 0 0 0 0 0 \n",
+ "41996 0 0 0 0 0 0 0 0 0 \n",
+ "41997 0 0 0 0 0 0 0 0 0 \n",
+ "41998 0 0 0 0 0 0 0 0 0 \n",
+ "41999 0 0 0 0 0 0 0 0 0 \n",
+ "\n",
+ " pixel9 ... pixel774 pixel775 pixel776 pixel777 pixel778 \\\n",
+ "0 0 ... 0 0 0 0 0 \n",
+ "1 0 ... 0 0 0 0 0 \n",
+ "2 0 ... 0 0 0 0 0 \n",
+ "3 0 ... 0 0 0 0 0 \n",
+ "4 0 ... 0 0 0 0 0 \n",
+ "... ... ... ... ... ... ... ... \n",
+ "41995 0 ... 0 0 0 0 0 \n",
+ "41996 0 ... 0 0 0 0 0 \n",
+ "41997 0 ... 0 0 0 0 0 \n",
+ "41998 0 ... 0 0 0 0 0 \n",
+ "41999 0 ... 0 0 0 0 0 \n",
+ "\n",
+ " pixel779 pixel780 pixel781 pixel782 pixel783 \n",
+ "0 0 0 0 0 0 \n",
+ "1 0 0 0 0 0 \n",
+ "2 0 0 0 0 0 \n",
+ "3 0 0 0 0 0 \n",
+ "4 0 0 0 0 0 \n",
+ "... ... ... ... ... ... \n",
+ "41995 0 0 0 0 0 \n",
+ "41996 0 0 0 0 0 \n",
+ "41997 0 0 0 0 0 \n",
+ "41998 0 0 0 0 0 \n",
+ "41999 0 0 0 0 0 \n",
+ "\n",
+ "[42000 rows x 784 columns]\n",
+ "(42000, 784)\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "Y = dataset.iloc[:,0]\n",
+ "print(Y)\n",
+ "print(Y.shape)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "2RuBl7671GH4",
+ "outputId": "96d6afef-f2ed-420f-d95c-826a287fa8dd"
+ },
+ "execution_count": 7,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "0 1\n",
+ "1 0\n",
+ "2 1\n",
+ "3 4\n",
+ "4 0\n",
+ " ..\n",
+ "41995 0\n",
+ "41996 1\n",
+ "41997 7\n",
+ "41998 6\n",
+ "41999 9\n",
+ "Name: label, Length: 42000, dtype: int64\n",
+ "(42000,)\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "**Splitting Dataset into Test and Train**"
+ ],
+ "metadata": {
+ "id": "o1j-AGZd1OQV"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "from sklearn.model_selection import train_test_split\n",
+ "X_train, X_test, y_train, y_test = train_test_split(X,Y, test_size = 0.25, random_state = 0)"
+ ],
+ "metadata": {
+ "id": "U_c_R4HA1SeZ"
+ },
+ "execution_count": 8,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "**Training**"
+ ],
+ "metadata": {
+ "id": "Gf6EgvAc1vjh"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "from sklearn.ensemble import RandomForestClassifier\n",
+ "model = RandomForestClassifier()\n",
+ "model.fit(X_train, y_train)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "RS4TAnDh1yUU",
+ "outputId": "4803259d-f3a1-461f-d3d0-939bc4495a64"
+ },
+ "execution_count": 9,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "RandomForestClassifier()"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 9
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "y_pred = model.predict(X_test)"
+ ],
+ "metadata": {
+ "id": "SljeEEbs2JFT"
+ },
+ "execution_count": 10,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "**Model Accuracy**"
+ ],
+ "metadata": {
+ "id": "4XEvHILm2OF-"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "from sklearn.metrics import accuracy_score\n",
+ "print(\"Accuracy of the Model: {0}%\".format(accuracy_score(y_test, y_pred)*100))"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "sHEVc1Qq2Rqy",
+ "outputId": "06be6e32-1ba4-4035-eafb-3b3c2023abd6"
+ },
+ "execution_count": 11,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Accuracy of the Model: 96.31428571428572%\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import matplotlib.pyplot as plt\n",
+ "index=10\n",
+ "print(\"Predicted \" +str(model.predict(X_test)[index]))\n",
+ "plt.axis('off')\n",
+ "plt.imshow(X_test.iloc[index].values.reshape((28,28)),cmap='gray')"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 283
+ },
+ "id": "iymJ1Zpj20gk",
+ "outputId": "ae21ce24-b957-4a30-8f04-ec5c77dd5a53"
+ },
+ "execution_count": 13,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Predicted 7\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "execution_count": 13
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "