python-scripts/scripts/Linear Regression/Linear Regression.ipynb

285 lines
50 KiB
Plaintext
Raw Permalink Normal View History

{
"cells": [
{
"cell_type": "code",
"execution_count": 34,
"id": "05f41765",
"metadata": {},
"outputs": [],
"source": [
"import pandas\n",
"from pandas import DataFrame\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.linear_model import LinearRegression"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "7f82f718",
"metadata": {},
"outputs": [],
"source": [
"data = pandas.read_csv('data.csv')"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "656efe3f",
"metadata": {},
"outputs": [],
"source": [
"x= DataFrame(data, columns = ['col_1'])\n",
"y= DataFrame(data, columns = ['col_2'])"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "6db0a71e",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAmkAAAGDCAYAAABwRoerAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAACC/ElEQVR4nO39eZxk2XXQ+/7WGWLOObPmuXqoHmyVpHKrpe6WZSxZni3A0DZGBl+BDB/DB4FtLvB4cOHBgwsWGDBGbmw/28KYNlfGLey2ZQlsqaulllTdakk9VXfNlVWVVTlGZMxn2O+PcyIqh4jMyKyMysjM9f186lOVEZER+8SJyrNy7bXXFmMMSimllFKqt1ibPQCllFJKKbWcBmlKKaWUUj1IgzSllFJKqR6kQZpSSimlVA/SIE0ppZRSqgdpkKaUUkop1YM0SFNKKaWU6kEapCmllFJK9SAN0pRSd0RE/oKInBGRoojcEJE/EJHH7/A5L4nI+zdgXP91Ha9biY9lQkR+TURydzIOpZRaLw3SlFLrJiJ/B/h54P8L7AYOAb8I/NAmDqvhe4Fn1/F9P2CMyQEngbcDf38jB6WUUp3SIE0ptS4iMgD8U+CnjDG/Y4wpGWM8Y8z/NMb8bPyYpIj8vIhcj//8vIgk4/tGReT3RGRORGZE5DkRsUTkk0TB3v+MM1p/t8Vrvy4i37/ga0dEpkTkHfHXFvAB4A9FJCUi/0VEpuPX+qqI7F7t+IwxE8BniIK1xus8KiJfjJ/n6yLyvvj2HxGRM0vG+LdF5NML3oefE5ErInJTRD4hIun4vveJyLiI/LSI3IqzkT+x4Hn+RET+yoKv/7KInF7w9QkR+Wz8Hp4VkT+/2rEppbaGLRmkicivxj/MXungsYdF5H+JyDfiH3YH7sYYldoB3g2kgP+xwmP+X8CjRIHO24BHgH8Y3/fTwDgwRpSF+weAMcZ8GLhCnNEyxvyrFs/7W8CPLvj6g8CUMeal+OtHgAvGmCngLwEDwEFgBPhrQGW1g4t/VnwPcC7+ej/w+8A/A4aBnwE+JSJjwKeB+0Xk3gVP8ReAxnTr/w3cF78P9wD7gX+04LF74jHuBz4C/EcRGepgjFngs/Hr7CJ6T35RRB5a7XuVUr1vSwZpwK8B393hY38O+A1jzLcS/db/L7o1KKV2mBGiwMhf4TE/BvxTY8wtY8wk8E+AD8f3ecBe4HCcgXvOGGM6fO3/CvygiGTirxcGRADfx+2pTi8e6z3GmMAY86IxprDCc/+uiMwDV4FbwD+Ob/+LwLPGmGeNMaEx5rPAGeB7jTFl4BniwDEO1k4AnxYRAf4q8LeNMTPGmHmi6eEfWfCaXvw+ecaYZ4EicH8H78P3A5eMMf8/Y4wfB6mfAn64g+9VSvW4LRmkGWO+AMwsvE1EjovIH4rIi/G0yYn4rgeB/xX/+4/pjVoZpbaDaWBURJwVHrMPuLzg68vxbQD/mihL9UcickFE/l6nL2yMOQe8DvxAHKj9IIuDtIX1aJ8kmrb8b/GU678SEXeFp/+QMaYPeB9RoDUa334Y+HPxVOeciMwBjxMFmsSv38ju/QXgd+PgbQzIAC8u+L4/jG9vmF4S7JaBThYsHAbetWRMP0aUmVNKbXFbMkhr4yngbxpj3kk0DfGL8e1fB/5s/O8/DfSJyMgmjE+p7eZLQBX40AqPuU4USDQcim/DGDNvjPlpY8wx4AeAvyMi3xk/rpOMWmPK84eA1+LADRHZQxQ4vRS/jmeM+SfGmAeB9xBln358tSc3xnyeKGv/c/FNV4FPGmMGF/zJGmP+ZXz/HxEFrSfjcTWCximi6dWHFnzfQLw4oRMloiCvYWEAdhX4/JIx5Ywxf73D51ZK9bBtEaTFS+TfA/x3EXkZ+CVu/3b7M8C3i8jXgG8HrgErTc8opTpgjMkT1VX9RxH5kIhkRMQVke8RkUYd2W8B/1BExkRkNH78fwEQke8XkXvi6cACEMR/AG4Cx1YZwn8Dvgv46yzPov1hY+pURL5DRL5FROz4dbwFr7Oanwc+EAde/4Uoc/dBEbHjBQnva9S5xpmw/4coQzhMVCuGMSYE/jPwb0VkVzym/SLywQ7H8DLwZ+L39x6imrWG3wPuE5EPx++9KyLfJiIPdPjcSqketi2CNKLjmDPGnFzw5wEAY8x1Y8yfMca8naiIuXFxUUrdIWPMvwH+DtFigEmizM7fAH43fsg/I6rb+gbwTaLs1j+L77sX+BxR/dWXgF80xvxJfN+/IAru5kTkZ9q89o34+94DPL3grqWtN/YQBU8FoinSzxMHih0c3yTwG8D/2xhzlShr9w8WHOvPsvjn6H8F3g/89yXTl/8n0dTuCyJSiI+7k5ozgH8L1IkC118HfnPB+OaJAtUfIcpQThAtUkh2+NxKqR4mndfp9hYROQL8njHm4fjrLwL/1hjz3+PfzL/VGPP1+Lf3GWNMKCL/HAiMMf+o/TMrpbaquD5uAjiuv4wppba6rmXS4qmAr8S9hF4VkX/S4jEiIv9eRM7FLTLe0eFz/xbRb9D3x/2FPkJULPsREfk68Cq3Fwi8DzgrIm8SLfP/5xtweEqp3jRMlPXSAE0pteV1LZMWZ7OyxphivJLqNPC3jDEvLHjM9wJ/k2h64l3AvzPGvKsrA1JKKaWU2kK6lkkzkWL8pRv/WRoR/hBRDzMTB2+DIrIXpZRSSqkdrqsLB+IVUC8TNYT8rDHmy0sesp+o+LZhPL5NKaWUUmpHW6kJ5R0zxgTASREZBP6HiDxsjFm4lZO0+ralN4jIR4GPAmSz2XeeOHFi2TcppZRSSvWaF198ccoYM7b6I5frapDWYIyZE5E/IdrKaWGQNk60n17DAeJGl0u+/ymiZrWcOnXKnDlzZulDlFJKKaV6johcXv1RrXVzdedYnEFDRNJEvYPeWPKwTwM/Hq/yfBTIx72PlFJKKaV2tG5m0vYCvx53+baA3zbG/J6I/DUAY8wniBpOfi9Rk8cy8BNdHI9SSiml1JbRtSDNGPMN4O0tbv/Egn8b4Ke6NQallFJKqa1qu2wLpZRSSim1rWiQppRSSinVgzRIU0oppZTqQRqkKaWUUkr1IA3SlFJKKaV6kAZpSimllFI9SIM0pZRSSqkepEGaUkoppVQP0iBNKaWUUqoHaZCmlFJKKdWDNEhTSimllOpBGqQppZRSSvUgDdKUUkoppXqQBmlKKaWUUj1IgzSllFJKqR7kbPYAlFJKKaW64eJkkdPnp7mZr7J7IMXjx0c4Opbb7GF1TDNpSimllNp2Lk4WefrMOKWqz56BFKWqz9Nnxrk4WdzsoXVMgzSllFJKbTunz08zmHbpT7tYIvSnXQbTLqfPT2/20DqmQZpSSimltp2b+Sq51OKqrlzK4Wa+ukkjWjsN0pRSSim17eweSFGs+otuK1Z9dg+kNmlEa6dBmlJKKaW2ncePjzBX8ShUPEJjKFQ85ioejx8f2eyhdUyDNKWUUkptO0fHcjx56gDZlMNEvko25fDkqQNbanWntuBQSiml1LZ0dCy3pYKypTSTppRSSinVgzRIU0oppZTqQRqkKaWUUkr1IA3SlFJKKaV6kAZpSimllFI9SIM0pZRSSqkepEGaUkoppVQP0iBNKaWUUqoHaZCmlFJKKdWDNEhTSimllOpBGqQppZRSSvUgDdKUUkoppXqQBmlKKaWUUj1IgzSllFJKqR6kQZpSSimlVA/SIE0ppZRSqgdpkKaUUkop1YM0SFNKKaWU6kEapCmllFJK9SAN0pRSSimlepAGaUoppZRSPUiDNKWUUkqpHqRBmlJKKaVUD+pakCYiB0Xkj0XkdRF5VUT+VovHvE9E8iLycvznH3VrPEoppZRSW4nTxef2gZ82xrwkIn3AiyLyWWPMa0se95wx5vu7OA6llFJKqS2na5k0Y8wNY8xL8b/ngdeB/d16PaWUUkqp7eSu1KSJyBHg7cCXW9z9bhH5uoj8gYg81Ob7PyoiZ0TkzOTkZDeHqpRSSinVE7oepIlIDvgU8DFjTGHJ3S8Bh40xbwP+A/C7rZ7DGPOUMeaUMebU2NhYV8erlFJKKdULuhq
"text/plain": [
"<Figure size 720x432 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plt.figure(figsize=(10,6))\n",
"plt.scatter(x,y,alpha=.4)\n",
"plt.xlim(0,4.5e8)\n",
"plt.ylim(0,3e9)\n",
"plt.xlabel(\"Feature\")\n",
"plt.ylabel(\"Target\")\n",
"plt.title(\"Feature v/s Target\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "ec3fb414",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"LinearRegression()"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# y = theta_0 + theta_1*X\n",
"rgr = LinearRegression()\n",
"rgr.fit(x,y)"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "40be1d77",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[3.11150918]])"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#theta_1 or slope\n",
"th0= rgr.coef_ \n",
"th0"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "e4c31635",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>production_budget_usd</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>400000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>750000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>10000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5029</th>\n",
" <td>225000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5030</th>\n",
" <td>215000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5031</th>\n",
" <td>306000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5032</th>\n",
" <td>200000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5033</th>\n",
" <td>425000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5034 rows × 1 columns</p>\n",
"</div>"
],
"text/plain": [
" production_budget_usd\n",
"0 1000000\n",
"1 10000\n",
"2 400000\n",
"3 750000\n",
"4 10000\n",
"... ...\n",
"5029 225000000\n",
"5030 215000000\n",
"5031 306000000\n",
"5032 200000000\n",
"5033 425000000\n",
"\n",
"[5034 rows x 1 columns]"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#theta_0 or c\n",
"th1 = rgr.intercept_\n",
"th1\n",
"Y=rgr.predict(x)\n",
"x"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0d71ef51",
"metadata": {
"scrolled": true
},
"outputs": [
{
"ename": "",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[1;31mRunning cells with 'Python 3.10.4 64-bit' requires ipykernel package.\n",
"\u001b[1;31mRun the following command to install 'ipykernel' into the Python environment. \n",
"\u001b[1;31mCommand: '\"c:/Program Files/Python310/python.exe\" -m pip install ipykernel -U --user --force-reinstall'"
]
}
],
"source": [
"plt.figure(figsize=(10,6))\n",
"plt.scatter(x,y,alpha=.3)\n",
"\n",
"plt.scatter(x,Y) \n",
"plt.plot(x['col_1'],Y,color='black') #to convert dataframe to an array \n",
"plt.xlim(0,450000000)\n",
"plt.ylim(0,3000000000)\n",
"plt.xlabel(\"Feature\")\n",
"plt.ylabel(\"Target\")\n",
"plt.title(\"Feature v/s Target\")\n",
"\n",
"plt.show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.10.4 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.4"
},
"vscode": {
"interpreter": {
"hash": "26de051ba29f2982a8de78e945f0abaf191376122a1563185a90213a26c5da77"
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}