Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import os, sys\n",
"import csv\n",
"import matplotlib\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
"import statsmodels.api as sm\n",
"import statsmodels.formula.api as smf # http://www.statsmodels.org/dev/example_formulas.html\n",
"from sklearn import datasets, linear_model\n",
"from sklearn.metrics import mean_squared_error, r2_score\n",
"from sklearn.preprocessing import PolynomialFeatures"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Unnamed: 0</th>\n",
" <th>crim</th>\n",
" <th>zn</th>\n",
" <th>indus</th>\n",
" <th>chas</th>\n",
" <th>nox</th>\n",
" <th>rm</th>\n",
" <th>age</th>\n",
" <th>dis</th>\n",
" <th>rad</th>\n",
" <th>tax</th>\n",
" <th>ptratio</th>\n",
" <th>black</th>\n",
" <th>lstat</th>\n",
" <th>medv</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0.00632</td>\n",
" <td>18.0</td>\n",
" <td>2.31</td>\n",
" <td>0</td>\n",
" <td>0.538</td>\n",
" <td>6.575</td>\n",
" <td>65.2</td>\n",
" <td>4.0900</td>\n",
" <td>1</td>\n",
" <td>296</td>\n",
" <td>15.3</td>\n",
" <td>396.90</td>\n",
" <td>4.98</td>\n",
" <td>24.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>0.02731</td>\n",
" <td>0.0</td>\n",
" <td>7.07</td>\n",
" <td>0</td>\n",
" <td>0.469</td>\n",
" <td>6.421</td>\n",
" <td>78.9</td>\n",
" <td>4.9671</td>\n",
" <td>2</td>\n",
" <td>242</td>\n",
" <td>17.8</td>\n",
" <td>396.90</td>\n",
" <td>9.14</td>\n",
" <td>21.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>0.02729</td>\n",
" <td>0.0</td>\n",
" <td>7.07</td>\n",
" <td>0</td>\n",
" <td>0.469</td>\n",
" <td>7.185</td>\n",
" <td>61.1</td>\n",
" <td>4.9671</td>\n",
" <td>2</td>\n",
" <td>242</td>\n",
" <td>17.8</td>\n",
" <td>392.83</td>\n",
" <td>4.03</td>\n",
" <td>34.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>0.03237</td>\n",
" <td>0.0</td>\n",
" <td>2.18</td>\n",
" <td>0</td>\n",
" <td>0.458</td>\n",
" <td>6.998</td>\n",
" <td>45.8</td>\n",
" <td>6.0622</td>\n",
" <td>3</td>\n",
" <td>222</td>\n",
" <td>18.7</td>\n",
" <td>394.63</td>\n",
" <td>2.94</td>\n",
" <td>33.4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>0.06905</td>\n",
" <td>0.0</td>\n",
" <td>2.18</td>\n",
" <td>0</td>\n",
" <td>0.458</td>\n",
" <td>7.147</td>\n",
" <td>54.2</td>\n",
" <td>6.0622</td>\n",
" <td>3</td>\n",
" <td>222</td>\n",
" <td>18.7</td>\n",
" <td>396.90</td>\n",
" <td>5.33</td>\n",
" <td>36.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>6</td>\n",
" <td>0.02985</td>\n",
" <td>0.0</td>\n",
" <td>2.18</td>\n",
" <td>0</td>\n",
" <td>0.458</td>\n",
" <td>6.430</td>\n",
" <td>58.7</td>\n",
" <td>6.0622</td>\n",
" <td>3</td>\n",
" <td>222</td>\n",
" <td>18.7</td>\n",
" <td>394.12</td>\n",
" <td>5.21</td>\n",
" <td>28.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>7</td>\n",
" <td>0.08829</td>\n",
" <td>12.5</td>\n",
" <td>7.87</td>\n",
" <td>0</td>\n",
" <td>0.524</td>\n",
" <td>6.012</td>\n",
" <td>66.6</td>\n",
" <td>5.5605</td>\n",
" <td>5</td>\n",
" <td>311</td>\n",
" <td>15.2</td>\n",
" <td>395.60</td>\n",
" <td>12.43</td>\n",
" <td>22.9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>8</td>\n",
" <td>0.14455</td>\n",
" <td>12.5</td>\n",
" <td>7.87</td>\n",
" <td>0</td>\n",
" <td>0.524</td>\n",
" <td>6.172</td>\n",
" <td>96.1</td>\n",
" <td>5.9505</td>\n",
" <td>5</td>\n",
" <td>311</td>\n",
" <td>15.2</td>\n",
" <td>396.90</td>\n",
" <td>19.15</td>\n",
" <td>27.1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>9</td>\n",
" <td>0.21124</td>\n",
" <td>12.5</td>\n",
" <td>7.87</td>\n",
" <td>0</td>\n",
" <td>0.524</td>\n",
" <td>5.631</td>\n",
" <td>100.0</td>\n",
" <td>6.0821</td>\n",
" <td>5</td>\n",
" <td>311</td>\n",
" <td>15.2</td>\n",
" <td>386.63</td>\n",
" <td>29.93</td>\n",
" <td>16.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>10</td>\n",
" <td>0.17004</td>\n",
" <td>12.5</td>\n",
" <td>7.87</td>\n",
" <td>0</td>\n",
" <td>0.524</td>\n",
" <td>6.004</td>\n",
" <td>85.9</td>\n",
" <td>6.5921</td>\n",
" <td>5</td>\n",
" <td>311</td>\n",
" <td>15.2</td>\n",
" <td>386.71</td>\n",
" <td>17.10</td>\n",
" <td>18.9</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Unnamed: 0 crim zn indus chas nox rm age dis rad \\\n",
"0 1 0.00632 18.0 2.31 0 0.538 6.575 65.2 4.0900 1 \n",
"1 2 0.02731 0.0 7.07 0 0.469 6.421 78.9 4.9671 2 \n",
"2 3 0.02729 0.0 7.07 0 0.469 7.185 61.1 4.9671 2 \n",
"3 4 0.03237 0.0 2.18 0 0.458 6.998 45.8 6.0622 3 \n",
"4 5 0.06905 0.0 2.18 0 0.458 7.147 54.2 6.0622 3 \n",
"5 6 0.02985 0.0 2.18 0 0.458 6.430 58.7 6.0622 3 \n",
"6 7 0.08829 12.5 7.87 0 0.524 6.012 66.6 5.5605 5 \n",
"7 8 0.14455 12.5 7.87 0 0.524 6.172 96.1 5.9505 5 \n",
"8 9 0.21124 12.5 7.87 0 0.524 5.631 100.0 6.0821 5 \n",
"9 10 0.17004 12.5 7.87 0 0.524 6.004 85.9 6.5921 5 \n",
"\n",
" tax ptratio black lstat medv \n",
"0 296 15.3 396.90 4.98 24.0 \n",
"1 242 17.8 396.90 9.14 21.6 \n",
"2 242 17.8 392.83 4.03 34.7 \n",
"3 222 18.7 394.63 2.94 33.4 \n",
"4 222 18.7 396.90 5.33 36.2 \n",
"5 222 18.7 394.12 5.21 28.7 \n",
"6 311 15.2 395.60 12.43 22.9 \n",
"7 311 15.2 396.90 19.15 27.1 \n",
"8 311 15.2 386.63 29.93 16.5 \n",
"9 311 15.2 386.71 17.10 18.9 "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"'''following 3.6 Lab: Linear Regression (page 109)'''\n",
"\n",
"#read in Boston housing data set and getting it into Pandas DataFrame\n",
"fname = 'Boston.csv'\n",
"df = pd.read_csv(fname)\n",
"\n",
"df.head(10) #show first 10 lines of dataset"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table class=\"simpletable\">\n",
"<caption>OLS Regression Results</caption>\n",
"<tr>\n",
" <th>Dep. Variable:</th> <td>medv</td> <th> R-squared: </th> <td> 0.544</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Model:</th> <td>OLS</td> <th> Adj. R-squared: </th> <td> 0.543</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Method:</th> <td>Least Squares</td> <th> F-statistic: </th> <td> 601.6</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Date:</th> <td>Tue, 17 Jul 2018</td> <th> Prob (F-statistic):</th> <td>5.08e-88</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Time:</th> <td>08:24:43</td> <th> Log-Likelihood: </th> <td> -1641.5</td>\n",
"</tr>\n",
"<tr>\n",
" <th>No. Observations:</th> <td> 506</td> <th> AIC: </th> <td> 3287.</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Df Residuals:</th> <td> 504</td> <th> BIC: </th> <td> 3295.</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Df Model:</th> <td> 1</td> <th> </th> <td> </td> \n",
"</tr>\n",
"<tr>\n",
" <th>Covariance Type:</th> <td>nonrobust</td> <th> </th> <td> </td> \n",
"</tr>\n",
"</table>\n",
"<table class=\"simpletable\">\n",
"<tr>\n",
" <td></td> <th>coef</th> <th>std err</th> <th>t</th> <th>P>|t|</th> <th>[0.025</th> <th>0.975]</th> \n",
"</tr>\n",
"<tr>\n",
" <th>const</th> <td> 34.5538</td> <td> 0.563</td> <td> 61.415</td> <td> 0.000</td> <td> 33.448</td> <td> 35.659</td>\n",
"</tr>\n",
"<tr>\n",
" <th>lstat</th> <td> -0.9500</td> <td> 0.039</td> <td> -24.528</td> <td> 0.000</td> <td> -1.026</td> <td> -0.874</td>\n",
"</tr>\n",
"</table>\n",
"<table class=\"simpletable\">\n",
"<tr>\n",
" <th>Omnibus:</th> <td>137.043</td> <th> Durbin-Watson: </th> <td> 0.892</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Prob(Omnibus):</th> <td> 0.000</td> <th> Jarque-Bera (JB): </th> <td> 291.373</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Skew:</th> <td> 1.453</td> <th> Prob(JB): </th> <td>5.36e-64</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Kurtosis:</th> <td> 5.319</td> <th> Cond. No. </th> <td> 29.7</td>\n",
"</tr>\n",
"</table><br/><br/>Warnings:<br/>[1] Standard Errors assume that the covariance matrix of the errors is correctly specified."
],
"text/plain": [
"<class 'statsmodels.iolib.summary.Summary'>\n",
"\"\"\"\n",
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: medv R-squared: 0.544\n",
"Model: OLS Adj. R-squared: 0.543\n",
"Method: Least Squares F-statistic: 601.6\n",
"Date: Tue, 17 Jul 2018 Prob (F-statistic): 5.08e-88\n",
"Time: 08:24:43 Log-Likelihood: -1641.5\n",
"No. Observations: 506 AIC: 3287.\n",
"Df Residuals: 504 BIC: 3295.\n",
"Df Model: 1 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"const 34.5538 0.563 61.415 0.000 33.448 35.659\n",
"lstat -0.9500 0.039 -24.528 0.000 -1.026 -0.874\n",
"==============================================================================\n",
"Omnibus: 137.043 Durbin-Watson: 0.892\n",
"Prob(Omnibus): 0.000 Jarque-Bera (JB): 291.373\n",
"Skew: 1.453 Prob(JB): 5.36e-64\n",
"Kurtosis: 5.319 Cond. No. 29.7\n",
"==============================================================================\n",
"\n",
"Warnings:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"\"\"\""
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#doing the linear regression using the package statsmodels \n",
"\n",
"'''Lab: 3.6.2 Simple Linear Regression (page 110)'''\n",
"\n",
"X = df[\"lstat\"] #independent variable (or predictor)\n",
"y = df[\"medv\"] #dependent variable (which we want to fit / predict)\n",
"\n",
"X0 = sm.add_constant(X) # we have to manually specify that we want an intercept (beta_0) in our model\n",
"model = sm.OLS(y, X0).fit() #doing the actual linear regression, using the OLS = ordinary least squared\n",
"\n",
"model.summary() #showing the results in a table"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Text(0,0.5,'medv')"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"Y_hat = model.predict(X0) #predicted Y accodring to linear fit\n",
"\n",
"fig1, ax1 = plt.subplots(1,1,figsize=(6,4))\n",
"ax1.scatter(X,y, color='b')\n",
"ax1.plot(X,Y_hat,color='r',linewidth=3)\n",
"ax1.set_xlabel('lstat')\n",
"ax1.set_ylabel('medv')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"coefficients: \n",
" const 34.553841\n",
"lstat -0.950049\n",
"dtype: float64\n",
"\n",
"R^2: 0.544146297586\n",
"\n",
"pvalues: \n",
" const 3.743081e-236\n",
"lstat 5.081103e-88\n",
"dtype: float64\n"
]
}
],
"source": [
"# to access the important values\"\n",
"print(\"coefficients: \\n\", model.params)\n",
"print()\n",
"print(\"R^2: \", model.rsquared)\n",
"print()\n",
"print(\"pvalues: \\n\",model.pvalues)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table class=\"simpletable\">\n",
"<caption>OLS Regression Results</caption>\n",
"<tr>\n",
" <th>Dep. Variable:</th> <td>medv</td> <th> R-squared: </th> <td> 0.551</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Model:</th> <td>OLS</td> <th> Adj. R-squared: </th> <td> 0.549</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Method:</th> <td>Least Squares</td> <th> F-statistic: </th> <td> 309.0</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Date:</th> <td>Tue, 17 Jul 2018</td> <th> Prob (F-statistic):</th> <td>2.98e-88</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Time:</th> <td>08:24:43</td> <th> Log-Likelihood: </th> <td> -1637.5</td>\n",
"</tr>\n",
"<tr>\n",
" <th>No. Observations:</th> <td> 506</td> <th> AIC: </th> <td> 3281.</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Df Residuals:</th> <td> 503</td> <th> BIC: </th> <td> 3294.</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Df Model:</th> <td> 2</td> <th> </th> <td> </td> \n",
"</tr>\n",
"<tr>\n",
" <th>Covariance Type:</th> <td>nonrobust</td> <th> </th> <td> </td> \n",
"</tr>\n",
"</table>\n",
"<table class=\"simpletable\">\n",
"<tr>\n",
" <td></td> <th>coef</th> <th>std err</th> <th>t</th> <th>P>|t|</th> <th>[0.025</th> <th>0.975]</th> \n",
"</tr>\n",
"<tr>\n",
" <th>const</th> <td> 33.2228</td> <td> 0.731</td> <td> 45.458</td> <td> 0.000</td> <td> 31.787</td> <td> 34.659</td>\n",
"</tr>\n",
"<tr>\n",
" <th>lstat</th> <td> -1.0321</td> <td> 0.048</td> <td> -21.416</td> <td> 0.000</td> <td> -1.127</td> <td> -0.937</td>\n",
"</tr>\n",
"<tr>\n",
" <th>age</th> <td> 0.0345</td> <td> 0.012</td> <td> 2.826</td> <td> 0.005</td> <td> 0.011</td> <td> 0.059</td>\n",
"</tr>\n",
"</table>\n",
"<table class=\"simpletable\">\n",
"<tr>\n",
" <th>Omnibus:</th> <td>124.288</td> <th> Durbin-Watson: </th> <td> 0.945</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Prob(Omnibus):</th> <td> 0.000</td> <th> Jarque-Bera (JB): </th> <td> 244.026</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Skew:</th> <td> 1.362</td> <th> Prob(JB): </th> <td>1.02e-53</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Kurtosis:</th> <td> 5.038</td> <th> Cond. No. </th> <td> 201.</td>\n",
"</tr>\n",
"</table><br/><br/>Warnings:<br/>[1] Standard Errors assume that the covariance matrix of the errors is correctly specified."
],
"text/plain": [
"<class 'statsmodels.iolib.summary.Summary'>\n",
"\"\"\"\n",
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: medv R-squared: 0.551\n",
"Model: OLS Adj. R-squared: 0.549\n",
"Method: Least Squares F-statistic: 309.0\n",
"Date: Tue, 17 Jul 2018 Prob (F-statistic): 2.98e-88\n",
"Time: 08:24:43 Log-Likelihood: -1637.5\n",
"No. Observations: 506 AIC: 3281.\n",
"Df Residuals: 503 BIC: 3294.\n",
"Df Model: 2 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"const 33.2228 0.731 45.458 0.000 31.787 34.659\n",
"lstat -1.0321 0.048 -21.416 0.000 -1.127 -0.937\n",
"age 0.0345 0.012 2.826 0.005 0.011 0.059\n",
"==============================================================================\n",
"Omnibus: 124.288 Durbin-Watson: 0.945\n",
"Prob(Omnibus): 0.000 Jarque-Bera (JB): 244.026\n",
"Skew: 1.362 Prob(JB): 1.02e-53\n",
"Kurtosis: 5.038 Cond. No. 201.\n",
"==============================================================================\n",
"\n",
"Warnings:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"\"\"\""
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"'''Lab: 3.6.3 Multiple Linear Regression (page 113)'''\n",
"\n",
"#with two predictor variables\n",
"X = df[['lstat', 'age']]\n",
"y = df[\"medv\"]\n",
"X0 = sm.add_constant(X) # we have to manually specify that we want an intercept (beta_0) in our model\n",
"\n",
"model = sm.OLS(y, X0).fit()\n",
"model.summary()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table class=\"simpletable\">\n",
"<caption>OLS Regression Results</caption>\n",
"<tr>\n",
" <th>Dep. Variable:</th> <td>medv</td> <th> R-squared: </th> <td> 0.741</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Model:</th> <td>OLS</td> <th> Adj. R-squared: </th> <td> 0.734</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Method:</th> <td>Least Squares</td> <th> F-statistic: </th> <td> 108.1</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Date:</th> <td>Tue, 17 Jul 2018</td> <th> Prob (F-statistic):</th> <td>6.72e-135</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Time:</th> <td>08:24:44</td> <th> Log-Likelihood: </th> <td> -1498.8</td> \n",
"</tr>\n",
"<tr>\n",
" <th>No. Observations:</th> <td> 506</td> <th> AIC: </th> <td> 3026.</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Df Residuals:</th> <td> 492</td> <th> BIC: </th> <td> 3085.</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Df Model:</th> <td> 13</td> <th> </th> <td> </td> \n",
"</tr>\n",
"<tr>\n",
" <th>Covariance Type:</th> <td>nonrobust</td> <th> </th> <td> </td> \n",
"</tr>\n",
"</table>\n",
"<table class=\"simpletable\">\n",
"<tr>\n",
" <td></td> <th>coef</th> <th>std err</th> <th>t</th> <th>P>|t|</th> <th>[0.025</th> <th>0.975]</th> \n",
"</tr>\n",
"<tr>\n",
" <th>const</th> <td> 36.4595</td> <td> 5.103</td> <td> 7.144</td> <td> 0.000</td> <td> 26.432</td> <td> 46.487</td>\n",
"</tr>\n",
"<tr>\n",
" <th>crim</th> <td> -0.1080</td> <td> 0.033</td> <td> -3.287</td> <td> 0.001</td> <td> -0.173</td> <td> -0.043</td>\n",
"</tr>\n",
"<tr>\n",
" <th>zn</th> <td> 0.0464</td> <td> 0.014</td> <td> 3.382</td> <td> 0.001</td> <td> 0.019</td> <td> 0.073</td>\n",
"</tr>\n",
"<tr>\n",
" <th>indus</th> <td> 0.0206</td> <td> 0.061</td> <td> 0.334</td> <td> 0.738</td> <td> -0.100</td> <td> 0.141</td>\n",
"</tr>\n",
"<tr>\n",
" <th>chas</th> <td> 2.6867</td> <td> 0.862</td> <td> 3.118</td> <td> 0.002</td> <td> 0.994</td> <td> 4.380</td>\n",
"</tr>\n",
"<tr>\n",
" <th>nox</th> <td> -17.7666</td> <td> 3.820</td> <td> -4.651</td> <td> 0.000</td> <td> -25.272</td> <td> -10.262</td>\n",
"</tr>\n",
"<tr>\n",
" <th>rm</th> <td> 3.8099</td> <td> 0.418</td> <td> 9.116</td> <td> 0.000</td> <td> 2.989</td> <td> 4.631</td>\n",
"</tr>\n",
"<tr>\n",
" <th>age</th> <td> 0.0007</td> <td> 0.013</td> <td> 0.052</td> <td> 0.958</td> <td> -0.025</td> <td> 0.027</td>\n",
"</tr>\n",
"<tr>\n",
" <th>dis</th> <td> -1.4756</td> <td> 0.199</td> <td> -7.398</td> <td> 0.000</td> <td> -1.867</td> <td> -1.084</td>\n",
"</tr>\n",
"<tr>\n",
" <th>rad</th> <td> 0.3060</td> <td> 0.066</td> <td> 4.613</td> <td> 0.000</td> <td> 0.176</td> <td> 0.436</td>\n",
"</tr>\n",
"<tr>\n",
" <th>tax</th> <td> -0.0123</td> <td> 0.004</td> <td> -3.280</td> <td> 0.001</td> <td> -0.020</td> <td> -0.005</td>\n",
"</tr>\n",
"<tr>\n",
" <th>ptratio</th> <td> -0.9527</td> <td> 0.131</td> <td> -7.283</td> <td> 0.000</td> <td> -1.210</td> <td> -0.696</td>\n",
"</tr>\n",
"<tr>\n",
" <th>black</th> <td> 0.0093</td> <td> 0.003</td> <td> 3.467</td> <td> 0.001</td> <td> 0.004</td> <td> 0.015</td>\n",
"</tr>\n",
"<tr>\n",
" <th>lstat</th> <td> -0.5248</td> <td> 0.051</td> <td> -10.347</td> <td> 0.000</td> <td> -0.624</td> <td> -0.425</td>\n",
"</tr>\n",
"</table>\n",
"<table class=\"simpletable\">\n",
"<tr>\n",
" <th>Omnibus:</th> <td>178.041</td> <th> Durbin-Watson: </th> <td> 1.078</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Prob(Omnibus):</th> <td> 0.000</td> <th> Jarque-Bera (JB): </th> <td> 783.126</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Skew:</th> <td> 1.521</td> <th> Prob(JB): </th> <td>8.84e-171</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Kurtosis:</th> <td> 8.281</td> <th> Cond. No. </th> <td>1.51e+04</td> \n",
"</tr>\n",
"</table><br/><br/>Warnings:<br/>[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.<br/>[2] The condition number is large, 1.51e+04. This might indicate that there are<br/>strong multicollinearity or other numerical problems."
],
"text/plain": [
"<class 'statsmodels.iolib.summary.Summary'>\n",
"\"\"\"\n",
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: medv R-squared: 0.741\n",
"Model: OLS Adj. R-squared: 0.734\n",
"Method: Least Squares F-statistic: 108.1\n",
"Date: Tue, 17 Jul 2018 Prob (F-statistic): 6.72e-135\n",
"Time: 08:24:44 Log-Likelihood: -1498.8\n",
"No. Observations: 506 AIC: 3026.\n",
"Df Residuals: 492 BIC: 3085.\n",
"Df Model: 13 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"const 36.4595 5.103 7.144 0.000 26.432 46.487\n",
"crim -0.1080 0.033 -3.287 0.001 -0.173 -0.043\n",
"zn 0.0464 0.014 3.382 0.001 0.019 0.073\n",
"indus 0.0206 0.061 0.334 0.738 -0.100 0.141\n",
"chas 2.6867 0.862 3.118 0.002 0.994 4.380\n",
"nox -17.7666 3.820 -4.651 0.000 -25.272 -10.262\n",
"rm 3.8099 0.418 9.116 0.000 2.989 4.631\n",
"age 0.0007 0.013 0.052 0.958 -0.025 0.027\n",
"dis -1.4756 0.199 -7.398 0.000 -1.867 -1.084\n",
"rad 0.3060 0.066 4.613 0.000 0.176 0.436\n",
"tax -0.0123 0.004 -3.280 0.001 -0.020 -0.005\n",
"ptratio -0.9527 0.131 -7.283 0.000 -1.210 -0.696\n",
"black 0.0093 0.003 3.467 0.001 0.004 0.015\n",
"lstat -0.5248 0.051 -10.347 0.000 -0.624 -0.425\n",
"==============================================================================\n",
"Omnibus: 178.041 Durbin-Watson: 1.078\n",
"Prob(Omnibus): 0.000 Jarque-Bera (JB): 783.126\n",
"Skew: 1.521 Prob(JB): 8.84e-171\n",
"Kurtosis: 8.281 Cond. No. 1.51e+04\n",
"==============================================================================\n",
"\n",
"Warnings:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"[2] The condition number is large, 1.51e+04. This might indicate that there are\n",
"strong multicollinearity or other numerical problems.\n",
"\"\"\""
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# multiple linear regression with all predictor variables \n",
"\n",
"def_new=df.drop([df.columns[0], 'medv'], axis=1) #drop first column (just name of indices) and predicted variable column \n",
"Xall = def_new\n",
"y = df[\"medv\"]\n",
"Xall0 = sm.add_constant(Xall) # we have to manually specify that we want an intercept (beta_0) in our model\n",
"model = sm.OLS(y, Xall0).fit()\n",
"model.summary()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table class=\"simpletable\">\n",
"<caption>OLS Regression Results</caption>\n",
"<tr>\n",
" <th>Dep. Variable:</th> <td>medv</td> <th> R-squared: </th> <td> 0.556</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Model:</th> <td>OLS</td> <th> Adj. R-squared: </th> <td> 0.553</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Method:</th> <td>Least Squares</td> <th> F-statistic: </th> <td> 209.3</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Date:</th> <td>Tue, 17 Jul 2018</td> <th> Prob (F-statistic):</th> <td>4.86e-88</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Time:</th> <td>08:24:44</td> <th> Log-Likelihood: </th> <td> -1635.0</td>\n",
"</tr>\n",
"<tr>\n",
" <th>No. Observations:</th> <td> 506</td> <th> AIC: </th> <td> 3278.</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Df Residuals:</th> <td> 502</td> <th> BIC: </th> <td> 3295.</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Df Model:</th> <td> 3</td> <th> </th> <td> </td> \n",
"</tr>\n",
"<tr>\n",
" <th>Covariance Type:</th> <td>nonrobust</td> <th> </th> <td> </td> \n",
"</tr>\n",
"</table>\n",
"<table class=\"simpletable\">\n",
"<tr>\n",
" <td></td> <th>coef</th> <th>std err</th> <th>t</th> <th>P>|t|</th> <th>[0.025</th> <th>0.975]</th> \n",
"</tr>\n",
"<tr>\n",
" <th>Intercept</th> <td> 36.0885</td> <td> 1.470</td> <td> 24.553</td> <td> 0.000</td> <td> 33.201</td> <td> 38.976</td>\n",
"</tr>\n",
"<tr>\n",
" <th>lstat</th> <td> -1.3921</td> <td> 0.167</td> <td> -8.313</td> <td> 0.000</td> <td> -1.721</td> <td> -1.063</td>\n",
"</tr>\n",
"<tr>\n",
" <th>age</th> <td> -0.0007</td> <td> 0.020</td> <td> -0.036</td> <td> 0.971</td> <td> -0.040</td> <td> 0.038</td>\n",
"</tr>\n",
"<tr>\n",
" <th>lstat:age</th> <td> 0.0042</td> <td> 0.002</td> <td> 2.244</td> <td> 0.025</td> <td> 0.001</td> <td> 0.008</td>\n",
"</tr>\n",
"</table>\n",
"<table class=\"simpletable\">\n",
"<tr>\n",
" <th>Omnibus:</th> <td>135.601</td> <th> Durbin-Watson: </th> <td> 0.965</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Prob(Omnibus):</th> <td> 0.000</td> <th> Jarque-Bera (JB): </th> <td> 296.955</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Skew:</th> <td> 1.417</td> <th> Prob(JB): </th> <td>3.29e-65</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Kurtosis:</th> <td> 5.461</td> <th> Cond. No. </th> <td>6.88e+03</td>\n",
"</tr>\n",
"</table><br/><br/>Warnings:<br/>[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.<br/>[2] The condition number is large, 6.88e+03. This might indicate that there are<br/>strong multicollinearity or other numerical problems."
],
"text/plain": [
"<class 'statsmodels.iolib.summary.Summary'>\n",
"\"\"\"\n",
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: medv R-squared: 0.556\n",
"Model: OLS Adj. R-squared: 0.553\n",
"Method: Least Squares F-statistic: 209.3\n",
"Date: Tue, 17 Jul 2018 Prob (F-statistic): 4.86e-88\n",
"Time: 08:24:44 Log-Likelihood: -1635.0\n",
"No. Observations: 506 AIC: 3278.\n",
"Df Residuals: 502 BIC: 3295.\n",
"Df Model: 3 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"Intercept 36.0885 1.470 24.553 0.000 33.201 38.976\n",
"lstat -1.3921 0.167 -8.313 0.000 -1.721 -1.063\n",
"age -0.0007 0.020 -0.036 0.971 -0.040 0.038\n",
"lstat:age 0.0042 0.002 2.244 0.025 0.001 0.008\n",
"==============================================================================\n",
"Omnibus: 135.601 Durbin-Watson: 0.965\n",
"Prob(Omnibus): 0.000 Jarque-Bera (JB): 296.955\n",
"Skew: 1.417 Prob(JB): 3.29e-65\n",
"Kurtosis: 5.461 Cond. No. 6.88e+03\n",
"==============================================================================\n",
"\n",
"Warnings:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"[2] The condition number is large, 6.88e+03. This might indicate that there are\n",
"strong multicollinearity or other numerical problems.\n",
"\"\"\""
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"'''Lab: 3.6.4 Interaction terms (page 115)'''\n",
"\n",
"# multiple linear regression including interaction terms (here between lstat and age)\n",
"\n",
"# just for illustration I use smf.ols() which uses the formula syntax known from R, \n",
"# see http://www.statsmodels.org/devel/examples/notebooks/generated/formulas.html\n",
"# for comprehensive overview of how these formula work: http://patsy.readthedocs.io/en/latest/formulas.html#the-formula-language\n",
"\n",
"model_interac = smf.ols(formula='medv ~ lstat * age', data=df).fit() #important: because we use formula, we need smf instead of sm, and ols in lower case\n",
"# equivalent: model_interac = smf.ols(formula='medv ~ lstat + age + lstat:age', data=df).fit()\n",
"model_interac.summary()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<table class=\"simpletable\">\n",
"<caption>OLS Regression Results</caption>\n",
"<tr>\n",
" <th>Dep. Variable:</th> <td>medv</td> <th> R-squared: </th> <td> 0.641</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Model:</th> <td>OLS</td> <th> Adj. R-squared: </th> <td> 0.639</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Method:</th> <td>Least Squares</td> <th> F-statistic: </th> <td> 448.5</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Date:</th> <td>Tue, 17 Jul 2018</td> <th> Prob (F-statistic):</th> <td>1.56e-112</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Time:</th> <td>08:24:44</td> <th> Log-Likelihood: </th> <td> -1581.3</td> \n",
"</tr>\n",
"<tr>\n",
" <th>No. Observations:</th> <td> 506</td> <th> AIC: </th> <td> 3169.</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Df Residuals:</th> <td> 503</td> <th> BIC: </th> <td> 3181.</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Df Model:</th> <td> 2</td> <th> </th> <td> </td> \n",
"</tr>\n",
"<tr>\n",
" <th>Covariance Type:</th> <td>nonrobust</td> <th> </th> <td> </td> \n",
"</tr>\n",
"</table>\n",
"<table class=\"simpletable\">\n",
"<tr>\n",
" <td></td> <th>coef</th> <th>std err</th> <th>t</th> <th>P>|t|</th> <th>[0.025</th> <th>0.975]</th> \n",
"</tr>\n",
"<tr>\n",
" <th>Intercept</th> <td> 42.8620</td> <td> 0.872</td> <td> 49.149</td> <td> 0.000</td> <td> 41.149</td> <td> 44.575</td>\n",
"</tr>\n",
"<tr>\n",
" <th>lstat</th> <td> -2.3328</td> <td> 0.124</td> <td> -18.843</td> <td> 0.000</td> <td> -2.576</td> <td> -2.090</td>\n",
"</tr>\n",
"<tr>\n",
" <th>I(lstat ** 2)</th> <td> 0.0435</td> <td> 0.004</td> <td> 11.628</td> <td> 0.000</td> <td> 0.036</td> <td> 0.051</td>\n",
"</tr>\n",
"</table>\n",
"<table class=\"simpletable\">\n",
"<tr>\n",
" <th>Omnibus:</th> <td>107.006</td> <th> Durbin-Watson: </th> <td> 0.921</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Prob(Omnibus):</th> <td> 0.000</td> <th> Jarque-Bera (JB): </th> <td> 228.388</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Skew:</th> <td> 1.128</td> <th> Prob(JB): </th> <td>2.55e-50</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Kurtosis:</th> <td> 5.397</td> <th> Cond. No. </th> <td>1.13e+03</td>\n",
"</tr>\n",
"</table><br/><br/>Warnings:<br/>[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.<br/>[2] The condition number is large, 1.13e+03. This might indicate that there are<br/>strong multicollinearity or other numerical problems."
],
"text/plain": [
"<class 'statsmodels.iolib.summary.Summary'>\n",
"\"\"\"\n",
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: medv R-squared: 0.641\n",
"Model: OLS Adj. R-squared: 0.639\n",
"Method: Least Squares F-statistic: 448.5\n",
"Date: Tue, 17 Jul 2018 Prob (F-statistic): 1.56e-112\n",
"Time: 08:24:44 Log-Likelihood: -1581.3\n",
"No. Observations: 506 AIC: 3169.\n",
"Df Residuals: 503 BIC: 3181.\n",
"Df Model: 2 \n",
"Covariance Type: nonrobust \n",
"=================================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"---------------------------------------------------------------------------------\n",
"Intercept 42.8620 0.872 49.149 0.000 41.149 44.575\n",
"lstat -2.3328 0.124 -18.843 0.000 -2.576 -2.090\n",
"I(lstat ** 2) 0.0435 0.004 11.628 0.000 0.036 0.051\n",
"==============================================================================\n",
"Omnibus: 107.006 Durbin-Watson: 0.921\n",
"Prob(Omnibus): 0.000 Jarque-Bera (JB): 228.388\n",
"Skew: 1.128 Prob(JB): 2.55e-50\n",
"Kurtosis: 5.397 Cond. No. 1.13e+03\n",
"==============================================================================\n",
"\n",
"Warnings:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"[2] The condition number is large, 1.13e+03. This might indicate that there are\n",
"strong multicollinearity or other numerical problems.\n",
"\"\"\""
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"'''Lab: 3.6.5 Non-linear transfomartions of predictos (page 115)'''\n",
"\n",
"# e.g. including quadratic terms of predictors\n",
"\n",
"model_nonlin = smf.ols(formula='medv ~ lstat + I(lstat**2)', data=df).fit() #important: because we use formula, we need smf instead of sm, and ols in lower case\n",
"model_nonlin.summary()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Text(0,0.5,'medv')"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"X = df[\"lstat\"] \n",
"Y_hat_nonlin = model_nonlin.predict(X)\n",
"\n",
"fig2, ax2 = plt.subplots(1,1,figsize=(6,4))\n",
"ax2.scatter(X,y, color='b')\n",
"ax2.scatter(X,Y_hat_nonlin,color='r')#,ls='none')#linewidth=3) \n",
"ax2.set_xlabel('lstat')\n",
"ax2.set_ylabel('medv')"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Coefficients [-0.95004935]\n",
"Intercept: 34.5538408794\n",
"Variance score: 0.54\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/opt/conda/lib/python3.6/site-packages/ipykernel_launcher.py:14: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.\n",
" \n",
"/opt/conda/lib/python3.6/site-packages/ipykernel_launcher.py:15: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.\n",
" from ipykernel import kernelapp as app\n"
]
},
{
"data": {
"text/plain": [
"[<matplotlib.lines.Line2D at 0x7f2c6e0b7518>]"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Now lets have a look at scikit learn, another python package for all kinds of statistical learning algorithms\n",
"# In order to directly compare it to statsmodels, I first do again the simple linear regression from Lab 3.6.2\n",
"\n",
"fname = 'Boston.csv'\n",
"df = pd.read_csv(fname)\n",
"\n",
"'''Lab: 3.6.2 Simple Linear Regression (page 110)'''\n",
"\n",
"X = df[\"lstat\"] #independent variable (or predictor)\n",
"y = df[\"medv\"] #dependent variable (which we want to fit / predict)\n",
"\n",
"\n",
"#sklearn does not really handle pandas dataframes (at least it did not work for me) => convert it to numpy array\n",
"X_np = pd.DataFrame.as_matrix(X)\n",
"y_np = pd.DataFrame.as_matrix(y)\n",
"\n",
"#necessary to convert from 1D to 2D array since sklearn needs 2D input for predictor matrix\n",
"X_np2 = X_np[:,np.newaxis]\n",
"\n",
"\n",
"regr = linear_model.LinearRegression()\n",
"regr.fit(X_np2, y_np) #performing actual linear regression\n",
"\n",
"y_hat = regr.predict(X_np2) #predicting values using fit\n",
"\n",
"# we dont get a nice table with sklearn but we can acces the results like this:\n",
"# (note that there is no implementation of STDs, p-values,..., so in my opinion statsmodels is better for linear regressions)\n",
"print('Coefficients', regr.coef_)\n",
"print('Intercept:', regr.intercept_)\n",
"print('Variance score: %.2f' % r2_score(y_np, y_hat))\n",
"\n",
"\n",
"# Plot outputs\n",
"plt.scatter(X_np2, y_np, color='blue')\n",
"plt.plot(X_np2, y_hat, color='red', linewidth=3)\n"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Coefficients [-0.15784473]\n",
"Intercept: 39.9358610212\n",
"Variance score: 0.61\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/opt/conda/lib/python3.6/site-packages/ipykernel_launcher.py:20: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.\n",
"/opt/conda/lib/python3.6/site-packages/ipykernel_launcher.py:21: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.\n"
]
},
{
"data": {
"text/plain": [
"Text(0,0.5,'mpg')"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEKCAYAAAAfGVI8AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJztnXmcHFW1+L9ntoSQYJJJlCVhhl0jCEIUlEVkwKdBZVERDJgnaGRwAUGeaD4/1OfDJ4IP4gIaEQgmoLII8kQFEUV8gCQSSQDDPklkC2EzJGSZnN8ft4rU9NTaXd1d3X2+n099pvvWrXtPV/fcU/ece84VVcUwDMNoXdrqLYBhGIZRX0wRGIZhtDimCAzDMFocUwSGYRgtjikCwzCMFscUgWEYRotjisAwDKPFMUVgGIbR4pgiMAzDaHE66i1AGiZMmKC9vb31FsMwDKOhWLhw4XOqOjGpXkMogt7eXhYsWFBvMQzDMBoKERlIU89MQ4ZhGC2OKQLDMIwWxxSBYRhGi2OKwDAMo8UxRWAYhtHimCKoAvPnQ28vtLW5v/Pn11siwzCMaBpi+WgjMX8+zJwJa9a49wMD7j3A9On1k8swDCMKmxHkzKxZm5WAz5o1rtwwDKOImCLImWXLspUbhmHUG1MEObP99tnKDcMw6o0pgpw55xwYNWpo2ahRrtwwDKOImCLImenTYc4c6OkBEfd3zhxzFBuGUVxs1VAVmD7dBn7DMBoHmxEYhmG0OKYIDMMwWhxTBDXAIo0NwygyLaEI6jkQ+5HGAwOgujnS2JSBYRhFoekVQdJAXG0lYZHGhmEUnaZfNZQ0EFc7L5BFGhuGUXSafkYQNxDX4mndIo0Nwyg6Ta8I4gbiWjytW6SxYRhFp+kVwbRp0eW1eFq3SGPDMIpO0yuCX/wiurxWT+vTp8MTT8CmTe6vKQHDMIpE0yuCVauiy+1p3TAMowVWDSVheYEMw2h1mn5G0N2drdwwDKPVaHpFMHs2dHYOLevsdOWGYRhGCyiC6dPhssuG+gEuu8zMQYZhGD4t4SMwP4BhGEY0TT8jaHQsc6lhGNWmJWYEjYqfMK+auZAMwzBsRlBgLHOpYRi1wBRBFcjLnGOZSw3DqAWmCHImz41oLHOpYRi1wBRBzuRpzrHMpYZh1IKqKwIRaReRe0Xkf733O4jI3SLysIj8XES6qi1DLcnTnGO5kAzDqAW1mBGcCjwYeH8ucIGq7gK8AJxUAxlqRt7mHMtcahhGtamqIhCRScDhwCXeewEOAa7xqswFjqymDLXGzDmGYTQa1Z4RXAj8B7DJe98NvKiqG733K4DtqixDTUlrzrFAMcMwikLVAspE5P3As6q6UEQO9otDqmrE9TOBmQDbN9gymaSUFhYoZhhGkajmjGB/4IMi8gTwM5xJ6EJgrIj4CmgS8GTYxao6R1WnqurUiRMnVlHMdKR9gk9TzwLFDMMoElVTBKr6ZVWdpKq9wLHAH1R1OnAb8GGv2gzghmrJkBdpYwPS1rNAMcMwikQ94gi+BJwuIo/gfAY/qYMMmUj7BJ+2ngWKGYZRJGqiCFT1j6r6fu/1Y6r6dlXdWVU/oqrraiFDJaR9gk9bz1YWGYZRJCyyOAVRT+qqQ/0AaZ/0LVDMMIwiYYogBWFP8D6+H+CUU2D16uHnR42CnXeGjg436Hd0uLq1CBSzJaqGYaTB9iNIgT9Iz5rlBv5S1qyBiy8eXt7dDXvtBbfeurlscHBz3Ysuyl9WH1uiahhGWkQ1dBl/oZg6daouWLCg3mIA7uk67S3r6YEVK9zgX0p7O2zcOLw8L3p7w5VWT4+bgRiG0fyIyEJVnZpUz0xDGcmysmfZsnAlAK68muYaW6JqGEZaTBFkJM5fUErSzKGSvQqSyLpE1fwJhtG6mCLISOmKn/b2ytqrVkRxliWqeW6mYxhG42GKoAyCK37mzk0/Q4iiGuaaLEtULeWFYbQ2tmqoQoIripYtS+9IDlKtiOKk5Hc+5k8wjNbGZgQ5EJwhZDUVFSGi2FJeGEZrY4ogJUFn6oQJ7ghzrPpr9dPQ1rbZBOO34ffjB5+JZHfexjl+w85ZygvDaHFUtfDHPvvso/Vk3jzVUaNUneFn+DFqlKvj09+v2tYWXT+qjf7+6H5K+5g3T7WnR1XE/fXPhcnqX5t0Lqw9wzAaF2CBphhjLaAsBVHBWUFKA7XSXFNKe3t03EGwj9KoYXBP8HPmREc/9/S4vxZkZhitQ9qAMlMEKUgTTSzifARZrsmK30dc1HCUw1q8veGizgVlNwyjObDI4hxJ4zQtrRN1TXu7y0EUdS5NH3GrfOIcv+YUNgwjDFMEKUgTTTxtWvI1o0ZFO5P9c1H9BJ23cQN6nOP3nHOgq2voua4ucwobRqtjiiAFweCsKH7xi6GrcWB4QNeMGS4AbdWqodd2d7u6F100tB9/hlAaDBY32CcFkpWahhrAMmgYRpUxH0FG0tr+RVy9np7NA3SUbb+93SmILOmh58/fHMTmzwSSrreMpIbRWqT1EVhkcUa23z7daiBfWQT3AYiy7Q8Owoknutdxg3k5g38QiyA2DCMMMw1lJEv2UZ81a+DUU91sIor1612dKPJIDGfOYsMwwjBFkJEwG3zUKqAgq1bFxwj4daLIIzFcqUM7qdwwjNbAFEEZlO43PHt25RlISylNBRFljspi1rnppmzlhmG0BqYIciDNqqIshJmB/ICwUoJmnaTNZaKUxsCAbUZjGK2MKYKc8GcJlSiD0aPd3zAzkOpwZRCMLUjjQ4jzBdhmNIbRupgi8DjllM3ZPjs63PtyKMeZ7DNihBuIo8xA/nLUsPiAND6EJNmy+hxse0vDaBLSZKar91HN7KPz5qmOHh2e8bO/v/w2/Uye3d2qXV3ZspBGnevpie5TJPwakXDZovoorR/3GaMymTYyloXVaCZImX207oN8mqNsRfDqq+6IICm9dHt7ed2G9RMcXLq7o/tLm+q6lKjBPUp5ZK2f9/VFpFmVm9G6pFUEzWsaeuwxZ68YOdLZUi64YFiVMHNKkKTlnuVyzDHDTTQi8f3NmBEfPJZ1c5lKN6NpxuA027vZaFnSaIt6H2XNCK6+OvyR9YgjVNesUdVoc0raGUHc5jBB01Dpk35np+qUKfF9lx5dXclPpkGzj99nnHkjrRkkrF4zzgjSmtcMo1Gg5U1Da9fGj6zjxumB2z4SWyXORxBlRujvdwN9lkE+7dHdnfyx8zZvxH3OZjOjNKNyM1obUwQ+c+cmjrAf5PphT4BJjuKoQSNpllHpkUTeg1lce83mWDUfgdFspFUErZN9dPFi2HNP9/8dwY+2+iKjv38u009Idp1UYweyNCT1GSVXubuQ5d1e0ak0sZ9hFAnboayUPfZwI9dLL8HBB4dW+fTL5zP94+2w777w/POxzdUjUVuanEZ5J5ZrtUR1pelDTAkYrUDrKAKfrbaC225z/+lnnx1e569/daOuCCxcGFolatWNHx1cDY45JvqcH9wVlY5i9eryAr4qXV1kGEbxaT1F4CPC/F2/zpajlPfym+h6U6e6kXXOnCHFUTuBnXBC9USeMyd8MA+ml4BwU86qVeWlkEja8cwwjMandXwEIZRm9dyeAf7C/kzin9EXTZ8Ol146bPNf37acZtOaOPydzZLOp9n5LAzbjcwwWgfzEaSgNPhpGT1MZgUjeRWOOy78ovnzXVKgyZNh+fLXioJP5OUyahScfHJ8HV9JBJPEZQniauSAL8MwqkPVFIGIjBSRv4rI30XkfhH5ule+g4jcLSIPi8jPRaQrqa1qEeXw3LpnBFx5pRt1L7oovNKKFa4BEX57+s2xEcpRdHcPN7lEdReGH/WaxXHbrE5ewzDKp5ozgnXAIaq6J7AX8F4R2Q84F7hAVXcBXgBOqqIMsaRyhPb3O4Vwzz2R7fz02X9DEc7m60A6U5vv0A1bppgllfXAgHMEd6VQp+bkNQwjjKopAi+eYbX3ttM7FDgEuMYrnwscWS0ZksjkCJ061SmE555zr0P4Ol9DaeNWDmEML8f2reocuKrD9wLImsrab8dfXhq2aqi7e/hnszTShmEA1Y0sBtqBRcBq3ExgAvBI4PxkYElSO9VMQ102g4OqZ5wRGwa8gXZ9M4vLSiExb158NtKoaN+0kcUWRWsYzQ9FyD6qqoOquhcwCXg78KawamHXishMEVkgIgtWrlxZTTHLo60Nzj+f3h7lKK4LrdLBIEvYA0X4GMmP26tWbX4qnz49e+TusmXps4Japk3DMHxqsmpIVV8E/gjsB4wVkQ7v1CTgyYhr5qjqVFWdOnHixFzkCDOFVGoeWbYMrucoBGUXHuJFXhfeN8ejCD/k07SzMbI9fyCeP9/JlIXx46OvKXUSN2MaacMwyqOaq4YmishY7/UWwKHAg8BtwIe9ajOAG6olQ5CwPX0/8Qk48cT4fX6TCA6wj7AL43iRUbzCDXwwtP6nmcNGOnmAN/EGnh52ftmyzbJm2Q+hqwtefjn6mmnTouVOU14vzI9hGDUgjf2onAN4C3AvcB+wBDjbK98R+CvwCHA1MCKprTx8BHHbM1aSqTPM1t7Z6ez9wib9z3HfSezwAG5PZeePkzdq17NG9hE0goyGUWSwNNRDyZIeOutGJEnpmOfNUz10xO2JHZ/BeTrvp5vKkjXpmrDP1N+/2SHd3l7+Hs3VwvYHMIzKSKsIWibFRL3TMPgpKNYNPM2tHMIUHoyse8sWH+CItT9jLenWkPb0uFiCVavi6wQ/k29+CjqMR40qVh6hVkuBbRh5YykmSghbm9/ZOTwQq1pBV35646d0a77f/wAdbOCHfDq07mFrb2QNW/Iir2NnHgbc4BcVNDYw4LJrd3aGnxdxdXp74ZRT3N/jjw9fNXT88fna4iux8TeKH8MwGp4004Z6H3nFEYSZcOq1y1bQLDNd5ifagI7kOu3ri/cFjB49fM/icndMy8MWX6mN33wEhlEZmI+gAVmyRLWjI3aE/vGYL6gwGFklSFanc+nR3V2ZkszDxt9s22EaRi1JqwhaxkfQSGwlL/NLjqKPP0TWWcA+/Bu/43mGblsW/Drz3k4zqw/BbPyGUV/MR1BF8lrbHtXOmvatOJRbETZ5ieyGM5WFrGICijAVlxCvq8u1IwIdHfkqAcjuQzAbv2E0CGmmDfU+imQaystuHddOf/9wc8ph/C7RlnMyF1VkCsrbh5DHvTLTkGGUD+YjqA55rW1Paidyjf+yZTrA5NhR+nxO13Y2lD3IpwlQS/uZK4lVMGexYVSGKYIqEbUKJ2sQWtxqnqSBrqdHtZN1egXHRzbyOD16Oufr63ghsyJQDR+Es37mSgdyCygzjMpIqwhS+QhE5OiQo09EXl9Vu1UBycvuHVc/Kd/ROedA56guPs5PEZRPMWdYnV4G+A5fZAWTmM3n2YlHUsnV3u7+BvdqKOczQOUZTi0xnmHUiDTaAvg18DxwrXes8soeBk5I00YlRzkzgmqlTyjnKTdo5+7uTmd22XLL4fL77ZTWbWtT7etTfefIBfor3q8rGd7BIKI38AE9mD8obIrt2+83GGtRzpN9ubOnqM9pMwLDyAZ5moaAG4E3BN6/AbgOGE+KjWUqPbIqgjBnK+SrDNI6MNOYWNIebW3R5zo7Nyu/kazRk/ixLubNoZXvZU+dwWU6grWJffoDfjlO23JMO0n3y3wEhpGevBXB4pL34isA4N40bVRyZFUEUTt7tbdnaiYXKg3qynL4A/TmgXSTHsrN+pu2aaEXPM3r9at8VV/P07HtlhtYVs5MImkmYErAMNKTtyK4CPhf3P4BM7wZwkXAlsBtadqo5MiqCOIGtVpTboqHcg7f5BL69P6Pf+jSvn5dzfDH7Vfp0kv5d30Li1L1kzVNRBYlEne/bBmpYWQjrSJIFVksIgIcDRzgzQbuAK7VNBfnQNbI4o6O8A1a2tthY/TmYFUhS9ZTcM7Z5cvLi7xNkzV18pbPc9yaS/gc32MyK4ad/wPv5kJO49cczibaK+qrHKLul4hTBz5Fy5RqGEUk18hib8D/C267yVuBO2qlBMph5sxs5dUkLOtpGCLQ3+8G10+HJyVNZPXq6NVG8+fDhAmwYs14zuM/2JHH+Cg/4y72HVLvEG7jVxzBUnbjs3yP0fwrtL28Vu6URldPmxZ+v0p/bba/smHkSJppA/BJYBlwOTAXeAI4Mc21eRxFWjVUDmlXDQUds3GO4axmmyQH7L7cqVfxUd3AcOfKi2yl53O69vD4MDNNHvclzIfQ37/ZVxBnKsoau2EYrQY5+wiWAt2B993A0jTX5nEUKaBMNZ+0B3Eraip1MJcO0mnbm8yAfov/0OcZO+zkRtr0aj6k+/NnHbXFplxs9EmripLktmWkhhFP3orgVqAr8L4L+H2aa/M4iqQI8kp7ELfGvlIHc+mTctb2RrFa+/mB/oNdQys8t8NU94HXravoXibFGcTJXcmyVsNoFdIqgrTZR/8J3C0iXxORrwF3AY+IyOkicnrF9qkGotJoWZ+4COW4iN2eHujujj4f1nbWqOc1bMnFnMKbeJBp/JpbOHTI+e7HF7g0pDvsAN/8Jjz3XLYOEuTyy6POt7c7RzE4v8/AgFMPAwPJUdmGYQwnrSJ4FLge2OQdNwBPAmO8o2XIK+1BmBPZ3yZz553Dr+nrc87k2bPjHdDTprm/viN2YMA5o7OitPEbpvEebmF3FnMJJ/EqIzZXePJJpwEnT3Yj8AMPZGrflzOqPOoezZ3rVgvlpZQNo+VJM20A3gb8ErgXWOwd96W5No+jSKahPBOhRZk10gTEzZsXXW94YFl+xwSe1Vl8Q59k6/AK73mP6m9+ozo4mMu9jDP95JUA0DCaFXKOI1gKfBFYgpsR+Eokwwr58inSDmXz57uH3+CTaN5r2uOe3oNfV9wOYNtvn249frl0sp6P8nO+wAXszb3DK7zxjXDqqfDxj0dOXyrdwSwq5qBaMQ6G0WjkvUPZSlW9UVUfV9UB/6hQxoYkmJVTxP3NO7CpPSKOq7Q8zsYeZapSjc8ompYNdDGPE9iHhRzEn7iOo9hEQIP94x/Q38+610/iotd9mcmyYtjOZuPHh7cdVV5KnHnNMIwMpJk2AH3AJcBxuAjjo4Gj01ybx1Er01BRVqBEJc3r6xsqX3//cPOPby6JMhuNHp1uOWlHR3az0Q48qhdwmuqYMcNOrqdD53OcHjji7tfua1Q8RXd3+nuV5jsryvdqGLWGnJePzgMW4ILJLvOOS9Ncm8dRC0VQtN2wSgPi+voqC77KcvT0uP7KvVZfekn1wgt1oGOH0Er3jHin6i9+oR0Ru6jlaeMv2vdqGLUkb0WwOE29ah21UARF3w2r0uCrpKM0+jpqRhE2+4gaYNvZqEfwS72Nd4U2sLx9ez2D84btopbnPS/692oY1SStIkjrI7hLRKbkZ5AqHkXfDStJvkrlHBx0yzJ9G35Y0r5SVON9JZN62rmBI3k3f2RvFjKXj7Oezs3nB5dxPmeygkl8j8+yMw/T2Zmvjb/o36thFIG0iuAAYJGILBWR+0RksYjcV03Bak1eW1CWQ2nitaBD1T/nJmbR8uUhp78GP21AlohLdBfFOedAV5d7fS978+/MZZfOARYf+f9cBjyP0bzCZ/kBS9mNazd8gEuPv5XeHs0lMKye36thNAxppg1AT9iR5to8jmb2EcT1m2W3rrC65Th8/XbLvS54v+bNczunBet0dnp11q7VM8dfovexe2hjf2cPPbnrJ3rlpWurdn8No9khTx9BvY9mXjVUbvK5MPlK5U+zN3KYr6ASX0PQ9p5kn3c+hk16CL/XGzk8tPKzMlEvfN3ZujVPlf2d2Koho1VJqwhSBZTVmyIFlOVNXFAVRJ9LE3AV1XYUo0YNT9mQlaBsSQFjpQFhu/AQn+e7fILL2JKhgqynk6s4ju93fIHTLt/LNqQxjBTkHVBmVIlyks+ltW+nqecrHN/ZW2mwWbDPJPlLcyo9zK58ju8ziRWcybdZxuTXznWxgRlcwT0b30rvvx8MN9yQzqNtGEYipgjqzDnnQGfn0DJ/5UylkbNRSd3avG+9u9tF8QZTWqTdUS2MUtnC2hJxs4DeXrjttvB2XmQc53MmO/IYx/Bz/o93DDm//8Y/wZFHwq67ugx8/wrfRc0wjHSYIigApbmF/PeVpLOYPx9+9KPwc5s2udU8L78Mq1Y5842fwhlcH3Gprv1UF93d7oiSLSi//7l8U9HAQLx5q6cHBungao5hf/6PfbmLqziWjcF9lB97DE47DSZNgtNPh8cfj78phmGEk8aRUO+jSNlH86ZaAU/lOIqD/VaaGdQnGCGd5Yj6DJNYpheO/JLquHHDT7a1qR59tOrtt6tu2lTZDTSMJoCcA8qMKlGtgKdVq8q7zu83LKtnsNzPwlq6Kcwpp2yOiRgzBi6+uHxT/uzZm+MQfJ7tmsyES74Fy5e7xnfbbfPJTZvguuvgoIPgbW+DefNg/fryOjeMViKNtijnACYDtwEPAvcDp3rl44FbgIe9v+OS2rIZQXYqXf6ZtCdClNx55Dsq3XchdtYxOKh6001uH4SwxrbeWvUb31BdubKyGxqBLU01igz1jiMAtgH29l6PAR4CpgDfBs7yys8Czk1qq5kVQbUCnkaPLm8Q7utz1yeZbfJKcBfXR2aWLNGH3v0pXSsjhzc4cqTqJz+pumRJZTc2gAWrGUWn7opgWEdue8vDgKXANrpZWSxNuraZFYFqdZ4qy/URgOs/KUV0uYomOPMYGTJeg2u73Ps4apRqNyv1K/yX/pNtwjs47DDVX/861S5qcVhCO6PopFUENQkoE5Fe4HZgd2CZqo4NnHtBVcfFXd/MAWXVopw9in16elwOoTA/Q3e326s+a7BakP5+uOiiyncoK6U0QK2T9XyEq/lS1wW8Zf3C4RfsttvmXdS23DJzf3H3YN68fDcrMoxyKExAmYiMBq4FTlPVlzNcN1NEFojIgpUrV1ZPwCYlapezNCxbBs8/H37OLy9XCfT1OSUQ10a5bZc62DfQxZVMZ8/19/DhN/yZa/gQg8Gf/NKlzru93XZO+9x5Z6b+4gL2Zs4MTx4YlljQMOpOmmlDuQfQCfwOOD1QZqahGlCp2SYpz1FbW3ltl26mE9dHVhNZGgd2L4/p7I7Tdd0WW0V3Pnt2qv6SkgL6JiLzJRj1gnr7CAABrgAuLCk/j6HO4m8ntWWKIDuV+giitsvMckQ5lNM6mrMOlmEDblRfUya/7Ab8HXeMFuDoo1XXxmc/nTcv/vOrmi/BqB9FUAQHAArcByzyjmlAN3ArbvnorcD4pLZMEWSnEkWgWtmOZ77Tu1JFUs5gWep4TxqkdeNG1ZNOiheiu1v10Ucj+0yXZTVGBsOoEmkVQdV8BKp6h6qKqr5FVffyjptUdZWq9qnqLt7fCGu0UQlRNv60VBLQtmkTPPFE5QnsypFj+nTXd5IMr9n329vhkkvc2HzZZeGVV62CnXZyfoQbbxx2OiknVJrkgeZDMOpKGm1R78NmBNkp94k8KWAs7YxCNZupptwZQdLS27Ls84sWJQt21lmqg4Ov9e/fO1/m0g164mQwH4JRLai3aSjPwxSBI0u8Qdjgkibnj7+BfdjuYmmOKVOGyjFlytDz226bfge0rq7snzFsAE2jLELPv/iiPrPbgbFC3tn2Dh3L84kDeJwM5kMwqoUpgiajnKfG0sGnry9+4G1rG/qU2tWVXRH4kcmq0f1NmZJuxvHatpYR5DGAJt1X18cm/QazEgXei7+VNYDHNZsnlg6j9TBF0GSUM+iV/uOnMcskZR9Nc/gk1ckiTxiVOmHnzYueJUU5et/LTYlCn8glr82sor6L4CCclNcpD8z81JqYImgysg56SWvcow6/vXKVQBZFkEWeMCqZESTdn6Sln/tv97g+0751rPB37XqC6rp1iYNwmntZKWZ+ak3SKgJLQ90gZN22ctas8vYf9turJDI5LWn6iIveTbuDW9iKnKT74/cb1Uf/ub3cMvcpxm+xlp/x0dA29n3opzBiBO+a0cu4NSuGnFuzxskA0Sub8lh15VOtdOdGk5BGW9T7sBlB9ql9OdlB0z6lxh1BZ3GUjyBNhtOkzxe8L+WsGkrqO2jaiesjGFB2Ct9PbPgQfj9s1hEVvJdWhjTYjKA1wUxDzUeWwSDqH3/06M02aRH3PstKFnDXb7lluoGlVBkEnclxY2Zezsyoz5G0girLAFl67du5K1EhzOIb2rP9plgZ80xRYT6C1sQUQYtT6T9+0vV5RMvGjZVZPmeccoybGaXxEaTpN6qNblaq7r13/Ac99FAdw8uxMuT1NF+LVUO2MqlYmCIwKv6nrPba90pXy6RRdnFyxq0a6u6ONwelMS+9di82btQH3ntabOV1dOqbuD/0+kZJUWGzjuJhisCoKvPmDc9AGoxD8AluXt/ePtTuncY2HkcaZVROVG9X1/BguuGxBfFHZAzENdckXnwsV6pIcn952/fND1Ec8ppZmSIwqkqaQTypTlplEkXaJ+WskcVRCfuSntBLlUns51i6VNePHBPbyA3b9atu2FCTJ+08+oi7H0Z68vy+TREYVSWNWSepTqVPkNV6Ak1SMFmC7XzFE6aMenpUR7Fab+Tw+EamTNFrfvB0VW3vRTD1qZqPQTXf37UpAqOqpHn6S6pTqe27UtNSFOWs4kmaHYSZmobW26Rf5NvJjd1xR2UfLoI8nuYrbcN8DI48fUJpFYEFlBllERUMFixPqpM1SK6Um27KVp6WpEC16dNhzhwX8CWSHBi3fj1s2DC0bM2a0uuE8zkTQTmIP0U3dsABrtMLLkj7cVKR5vtMotLAuLAgv2DgXatQ6f9FOZgiKACnnAIdHe7/u6PDvS86M2cmlyfVOecc6Owceq6zc3hkcBR5RcuWRh7D0IG+p8e9D25GH9z3YO7c4YojDYOD4df9mYMQlGMOeBJ22y384tNPd8IdeSSsXZuqv7g9DwYHo2VMS9pI7yjK/T6bbS+HSu9jWaSZNtT7aGbTULXMG9UmbOlle3u2VUNhGU4TnawBapF9NEs7WRP1jR4dv4R12229xjdsUP3Up+IbGzdO9ZFHyv6cUQ7y7u7y7kM5Nv6O7orQAAARh0lEQVRyZGhWc5KtGmoxRVCLzJPVII9BuNI28hgE8nY4Z/EfpEnwN2wQuOKK5Iavvz7z58xLEVRCOTLYktV4TBE0CHH/z0UmD4dWHm1U+uRUjWCtUpmSvuOkcT1Uud13X/I61jPPVB0cTPU5ixC0Vo4MRZC7yKRVBOYjqDN5OOnqQR4OrTzaKN2jOGjHr5YMYTbpYNmsWc6e68sU9V2KbPZJxLFmDRx/fInvaI89XAcvvQQHHxx+4Xnnuc733Zc9tgvfxNr/nGnuQ7V9WeV8F/VwrEKyXyLpfOH8gmm0Rb2PZp4RNLKPoBkSoWWVIax+Z+dwX0ewjajvuKMjeTaQ9ncx76eb9L86vprYwN4sCG0v6T7U4ndazu+hHr+hcqLVa30vfTDTUOMQ51AtMnk4tIoQQJRHVtewI2inLv2OR4/OrgT8a5Pkeg+/TWzoU/xomIxx96FWvqxyfg+1/g0l+SWSztfSL5hWEYirW2ymTp2qCxYsqLcYhkFbm/u3TYOIs95U2k4pYdeFtTeZZfwf72QS/4xsax7TOX7dpdDVFdunSDZ5mpmo787/vpPO1/JeishCVZ2aVM98BIaRgTx8IFnbCRLlbwhrbznbc0DPCnj1Va4f9bHQ645nPowYAZMnw/Llmfstui+rGiT5JZLOF/FemiIwWhbfoSey+Uhy3IUF+3R2uuuCdHUNDQAqdQ7uvHP5QWhhDurVq4cH570WhDRiBK/MmU9Xp3IyF4c3vGKFG6lEOExuGXYf0gQQ5kEjBIclBUImBYTV6l5mIo39qN5Hs/sIjNqTtN4/zk9TapPu7x+eSyiYhjrKOdjXF+9zGDs22p4c5qDu6nJr7qP2UAjW34d7Ev0IX+Wr2n+y20UtadvRan0nRQwOSxMImeS3qJVfEPMRGEY0vb0wMBB9vr0dNm6srK2eHrd8tKMjPFWD30eSzThJ1rA+08q456RVLNrmfXDPPdGNvvvdbHXb9fyLrSJlzIOk+1gUGkVOSO8jMEVgtCRpnLVp/zUqdQ4mnc/DQZ3o4JRNfJv/4It8J7LtjbSzF4u4n92HyZgHSTIWhUaRE8xZbBixJDlrszjuqu0czOJYbmsLt69HteHPOKStjTM5H0E5iutC63YwyBL2QBE+RrjxvpJAqXKDw2rtV8gjiK1wvpA09qN6H+YjMPKmEh9BmrayBBBNmRJ+fsqU6PbDfASlR1KQU+ka9tKynXlIX+l6XWwnN2470yXFS/E5K72PeV1TKZX2WUuZsYAyw4gnLGNouY67Sp2DpcrAVwJx7QfLopzKYQFjUeP6lltGyPjKK6pHHBGvdd74Rt227alIJZPXfSylXknnKgliq6XMaRWB+QgMownIYrcu28at6jbEOeOMWFkO5Hbu4MBhl1aDRrLX+9RSZvMRGEYLkcVuXbaNW8RtiKMKt98eWe3PHIQinMH5gFY1UKpeSecqoYgymyIwjAJQqfMwy65WueyAdeCBTiE8/TRMmRJa5XzORGnjb5M+OHwPypzIazev4P2fMMEd1XLk1mUHsiTS2I/qfZiPwGhm8t4lLY3dOvdEbRs26NK+k+P9CFttpfrQQxV2NJxKP0uSI70ajtxaJcrDfASG0Rg0UoBSHP7nOI4ruZKEjSGuvRaOPromciWRJmCv0b4LH/MRGEaDUO6m7UXDl/cqPoagvJklrKczvPKHPrTZ51Bnr26a+9xo30VWqqYIRORSEXlWRJYEysaLyC0i8rD3d1y1+jeMRqGIzsNyKJX3Ad7MCNbz5skvQ19f+EUXXOAi66ZOhVWrqi9kCGnuc6N9F1mp5ozgcuC9JWVnAbeq6i7Ard57w2hpCuk8LIOoz/GV/x4Dv/+9e/L/+tfDL1640HloReLzHqUgq+M9TO4gYd9F4SKDKyWNI6HcA+gFlgTeLwW28V5vAyxN0445i41mpwg7teVB6s9x883xjmVQvfjisvovx/EelLu7OzqLayV91AOK4CwWkV7gf1V1d+/9i6o6NnD+BVVNNA+Zs9gwmpTly+GAA+KN8MceC5df7jbQSaAWjvdGcu43vLNYRGaKyAIRWbBy5cp6i2MYRjWYPNmNquvWwQknhNf52c9g5EjYbrtEr20tHO/N4twPUmtF8IyIbAPg/X02qqKqzlHVqao6deLEiTUT0DCMOtDVBVdc4Swtc+aE13nySffYLQK//W1olVo43pvFuR+k1orgV8AM7/UM4IYa928YRtH51KecQogzB7/vfU4hnH32kMQ9tXC8N4tzP0g1l49eBdwJ7CYiK0TkJOBbwGEi8jBwmPfeMAxjOPvsQ2+PMo7nuZP9wut84xtu6c7BB8NLLzF9uptQ+BOHnh73fnpCfFsWatFHrbHIYsMwCkswU6ewif/my3yJb8dfdN99sMce1ReuAWh4Z7FhGEbQ7q60cRbnIiifnBhjVX7LW9yj+hVXVF/AJsEUgWEYhSXKHv/uCz7opgqPPALd3eEXz5jhFMInPwkbNlRf2AbGFIFhGIUl0R6/007w3HMuzfVRR4U38pOfuFVJu+4KTz1VM9kbCfMRGIbRXFx4IXzhC/F1/vhHeNe7aiJOPTEfgWEYrclppzmz0V/+El3n4IPdFOPcc6u3j2YDYYrAMIzm5J3vdIP8M8/A7ruH1znrLLc06fDD4ZVXaitfgTBFYBhGc/P618PixbBxI3zmM+F1broJRo+GMWPgoYdqK18BMEVgGEZr0N4O3/++myVcdVV4ndWrYbfdnNnommtqK18dMUVgGEbrceyxTiHcf390VtOPfMQphNNOg8HB2spXY0wRGIbRukyZAq++Cv/6F7znPeF1Zs+Gjg7Ye2+3VLUJMUVgGIYxejT87nduF7Wo7HH33gsTJ7pZwt1311a+KmOKwDAMw0cEvvIVZzb6/e+j6+23n6v7gx/UTrYqYorAMAwjjL4+pxCWL4cddgiv89nPOoVwzDHOxNSgmCIwDMOIY9IkeOwxt4vajBnhda6+GrbYArbeunj7VabAFIFhGEYaurrc3smqLn9RGM8842YPIi42oUEwRWAYhpGVE090CuHee6PrHH64UwizZhU+jYUpAsMwjHLZay83yL/wAuy/f3idb37TpbE48EB46aXaypcSUwSGYRiVMnYs3HGHW376la+E17njDldPBBYtqq18CZgiMAzDyAsRF4egCjfeGF3vrW91dS+/vGaixWGKwDAMoxq8//1OITz6qAtEC+MTn3AK4cQT67qLmikCwzCMarLjjvDss7B2LXz4w+F1LrvMrUraaSd48snayocpAsMwjNowcqSLN1CF7343vM5jj8F227lZwh/+UDPRTBEYhmHUms99zimEO++MrtPX5xRCDWYIpggMwzDqxX77OYXw7LOw557hdU48sepimCIwDMOoNxMnuiWlGzfC5z8/9Nz48VXv3hSBYRhGUWhvd/sfqLq02DfcEJ3OIkc6qt6DYRiGkZ2ojXKqgM0IDMMwWhxTBIZhGC2OKQLDMIwWxxSBYRhGi2OKwDAMo8UxRWAYhtHimCIwDMNocUQLvoUagIisBAaq0PQE4LkqtJsnJmN+NIKcJmN+NIKc1ZaxR1UjcmBvpiEUQbUQkQWqOrXecsRhMuZHI8hpMuZHI8hZFBnNNGQYhtHimCIwDMNocVpdEcyptwApMBnzoxHkNBnzoxHkLISMLe0jMAzDMGxGYBiG0fK0jCIQkSdEZLGILBKRBV7ZeBG5RUQe9v6Oq6N8u3my+cfLInKaiHxNRP4ZKJ9WB9kuFZFnRWRJoCz03onjuyLyiIjcJyJ711HG80TkH54cvxSRsV55r4isDdzTH9ZCxhg5I79jEfmydy+Xisi/1VHGnwfke0JEFnnldbmXIjJZRG4TkQdF5H4ROdUrL8zvMkbGwv0uUdWWOIAngAklZd8GzvJenwWcW285PVnagaeBHuBrwBfrLM9BwN7AkqR7B0wDfgMIsB9wdx1lfA/Q4b0+NyBjb7BeAe5l6HcMTAH+DowAdgAeBdrrIWPJ+e8AZ9fzXgLbAHt7r8cAD3n3qzC/yxgZC/e7bJkZQQRHAHO913OBI+soS5A+4FFVrUYQXWZU9Xbg+ZLiqHt3BHCFOu4CxorINvWQUVVvVtWN3tu7gEnVliOJiHsZxRHAz1R1nao+DjwCvL1qwnnEySgiAhwDXFVtOeJQ1adU9W/e638BDwLbUaDfZZSMRfxdtpIiUOBmEVkoIjO9sjeo6lPgvjTg9XWTbijHMvQf7bPeNPLSepqvSoi6d9sBywP1Vnhl9eZE3BOhzw4icq+I/ElEDqyXUAHCvuMi3ssDgWdU9eFAWV3vpYj0Am8F7qagv8sSGYMU4nfZSopgf1XdG3gf8BkROajeAoUhIl3AB4GrvaKLgZ2AvYCncNPyIiMhZXVdmiYis4CNwHyv6Clge1V9K3A6cKWIbFUv+Yj+jgt3L4HjGPqQUtd7KSKjgWuB01T15biqIWU1uZdRMhbpd9kyikBVn/T+Pgv8EjfFfsafHnp/n62fhK/xPuBvqvoMgKo+o6qDqroJ+DE1MA2kJOrerQAmB+pNAp6ssWyvISIzgPcD09UzxHqmllXe64U42/uu9ZIx5jsu2r3sAI4Gfu6X1fNeikgnboCdr6rXecWF+l1GyFi432VLKAIR2VJExvivcc6aJcCvgBletRnADfWRcAhDnrhK7JhH4eQuAlH37lfAx71VGvsBL/lT9VojIu8FvgR8UFXXBMoniki793pHYBfgsXrI6MkQ9R3/CjhWREaIyA44Of9aa/kCHAr8Q1VX+AX1upeer+InwIOq+j+BU4X5XUbJWMjfZT081LU+gB1xqy/+DtwPzPLKu4FbgYe9v+PrLOcoYBXwukDZT4HFwH24H/M2dZDrKty0dQPuyeqkqHuHm4L/APc0sxiYWkcZH8HZhRd5xw+9uh/yfgd/B/4GfKDO9zLyOwZmefdyKfC+esnolV8OnFxSty73EjgAZ9q5L/D9TivS7zJGxsL9Li2y2DAMo8VpCdOQYRiGEY0pAsMwjBbHFIFhGEaLY4rAMAyjxTFFYBiG0eKYIjCaCi+DY1FiLQyjITBFYBgeXuRs4WkUOY3GwRSB0Yy0i8iPvRzwN4vIFiKyl4jcFcgB7+ep/6OIfFNE/gSc6kV3Xisi93jH/l69dwXyxN8rImNE5GARud1r7wER+aGItHn1jxO3/8USETnXKztGRP7He32qiDzmvd5JRO7wXu/jJRxbKCK/C6RLGCJnrW+o0dyYIjCakV2AH6jqm4EXcRGbVwBfUtW34CJLvxqoP1ZV36Wq3wFmAxeo6tu86y7x6nwR+Iyq7oXLwLnWK387cAawBy5x3NEisi0uz/whuERybxORI4HbvWvx/q4Ske1wEah/9vLSfA/4sKruA1wKnBMhp2Hkhk0xjWbkcVVd5L1eiBugx6rqn7yyuWzO7gqBJGq4fDpTXJoYALby8lT9BfgfEZkPXKeqK7w6f1VV/8n+KtygvgH4o6qu9MrnAwep6vUiMtprbzJwJW4TmAOB64DdgN2BW7y223GpHsLkNIzcMEVgNCPrAq8HgbEJ9V8JvG4D3qGqa0vqfEtEfo3LFXOXiBzqlZfmaFHCUx773Al8Apc76M+4fPTvwM0qtgfuV9V3pJDTMHLDTENGK/AS8EJgo48TgD9F1L0Z+Kz/RkT28v7upKqLVfVcYAHwRq/K20VkB8838FHgDtzmI+8SkQleNsnjAv3djjMz3Q7cC7wbWKeqL+GUw0QReYfXZ6eIvLnyj28Y8ZgiMFqFGcB5InIfzm7/nxH1Pg9M9ZzKDwAne+WneY7fv+P8A/6uUncC38Kljn4c+KW69MZfBm7DyySpqn465D/jzEK3q+ogLgvlHQCquh74MHCu188i4J25fHrDiMGyjxpGmYjIwbhN599fb1kMoxJsRmAYhtHi2IzAMAyjxbEZgWEYRotjisAwDKPFMUVgGIbR4pgiMAzDaHFMERiGYbQ4pggMwzBanP8Poe0s3CddJdAAAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"'''Exercise: 3.7.8 Simple Linear Regression (page 121)'''\n",
"\n",
"\n",
"fname = 'Auto.csv'\n",
"df = pd.read_csv(fname)\n",
"\n",
"#convert strings to float in case of e.g. '?' convert it to NaN\n",
"df['horsepower'] = df['horsepower'].apply(pd.to_numeric, errors='coerce') \n",
"\n",
"\n",
"# Drop NaN values, listing the converted columns explicitly, so NaN values in other columns aren't dropped\n",
"df = df.dropna(how='any',subset = ['horsepower'])\n",
"\n",
"\n",
"X = df[\"horsepower\"] #independent variable (or predictor)\n",
"Y = df[\"mpg\"] #dependent variable (which we want to fit / predict)\n",
"\n",
"\n",
"#sklearn does not really handle pandas dataframes (at leat it did not work for me) => convert to numpy array\n",
"X_np=pd.DataFrame.as_matrix(X)\n",
"Y_np =pd.DataFrame.as_matrix(Y)\n",
"\n",
"#necessary to convert from 1D to 2D array since sklearn needs 2D input for predictor matrix\n",
"X_np2 = X_np[:,np.newaxis]\n",
"\n",
"\n",
"regr = linear_model.LinearRegression()\n",
"regr.fit(X_np2, Y_np) #performing actual linear regression\n",
"\n",
"Y_hat = regr.predict(X_np2) #predicting values using fit\n",
"\n",
"print('Coefficients', regr.coef_)\n",
"print('Intercept:', regr.intercept_)\n",
"print('Variance score: %.2f' % r2_score(Y_np, Y_hat))\n",
"\n",
"# Plot outputs\n",
"plt.scatter(X_np2, Y_np, color='blue')\n",
"plt.plot(X_np2, Y_hat, color='red', linewidth=3) \n",
"plt.xlabel('horespower')\n",
"plt.ylabel('mpg')"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 1440x1440 with 64 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"'''Exercise: 3.7.9 Multiple linear Regression (page 122)'''\n",
"\n",
"\n",
"fname = 'Auto.csv'\n",
"df = pd.read_csv(fname)\n",
"\n",
"#plot all binary variable combinations as scatter plots\n",
"f, axarr = plt.subplots(8,8,figsize=(20,20))\n",
"\n",
"col_to_use = ['mpg','cylinders','displacement','horsepower','weight','acceleration','year','origin']\n",
"df_new = df[col_to_use]\n",
"\n",
"c_row = 0\n",
"for row in df_new:\n",
" c_col = 0\n",
" for col in df_new:\n",
" var1 = df_new[col]\n",
" var2 = df_new[row]\n",
" axarr[c_row,c_col].scatter(var1,var2)\n",
" axarr[c_row,c_col].set_xlabel(col)\n",
" axarr[c_row,c_col].set_ylabel(row)\n",
" c_col += 1\n",
" c_row += 1\n",
"f.subplots_adjust(hspace=0.4, wspace=0.4)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>mpg</th>\n",
" <th>cylinders</th>\n",
" <th>displacement</th>\n",
" <th>weight</th>\n",
" <th>acceleration</th>\n",
" <th>year</th>\n",
" <th>origin</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>mpg</th>\n",
" <td>1.000000</td>\n",
" <td>-0.776260</td>\n",
" <td>-0.804443</td>\n",
" <td>-0.831739</td>\n",
" <td>0.422297</td>\n",
" <td>0.581469</td>\n",
" <td>0.563698</td>\n",
" </tr>\n",
" <tr>\n",
" <th>cylinders</th>\n",
" <td>-0.776260</td>\n",
" <td>1.000000</td>\n",
" <td>0.950920</td>\n",
" <td>0.897017</td>\n",
" <td>-0.504061</td>\n",
" <td>-0.346717</td>\n",
" <td>-0.564972</td>\n",
" </tr>\n",
" <tr>\n",
" <th>displacement</th>\n",
" <td>-0.804443</td>\n",
" <td>0.950920</td>\n",
" <td>1.000000</td>\n",
" <td>0.933104</td>\n",
" <td>-0.544162</td>\n",
" <td>-0.369804</td>\n",
" <td>-0.610664</td>\n",
" </tr>\n",
" <tr>\n",
" <th>weight</th>\n",
" <td>-0.831739</td>\n",
" <td>0.897017</td>\n",
" <td>0.933104</td>\n",
" <td>1.000000</td>\n",
" <td>-0.419502</td>\n",
" <td>-0.307900</td>\n",
" <td>-0.581265</td>\n",
" </tr>\n",
" <tr>\n",
" <th>acceleration</th>\n",
" <td>0.422297</td>\n",
" <td>-0.504061</td>\n",
" <td>-0.544162</td>\n",
" <td>-0.419502</td>\n",
" <td>1.000000</td>\n",
" <td>0.282901</td>\n",
" <td>0.210084</td>\n",
" </tr>\n",
" <tr>\n",
" <th>year</th>\n",
" <td>0.581469</td>\n",
" <td>-0.346717</td>\n",
" <td>-0.369804</td>\n",
" <td>-0.307900</td>\n",
" <td>0.282901</td>\n",
" <td>1.000000</td>\n",
" <td>0.184314</td>\n",
" </tr>\n",
" <tr>\n",
" <th>origin</th>\n",
" <td>0.563698</td>\n",
" <td>-0.564972</td>\n",
" <td>-0.610664</td>\n",
" <td>-0.581265</td>\n",
" <td>0.210084</td>\n",
" <td>0.184314</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" mpg cylinders displacement weight acceleration \\\n",
"mpg 1.000000 -0.776260 -0.804443 -0.831739 0.422297 \n",
"cylinders -0.776260 1.000000 0.950920 0.897017 -0.504061 \n",
"displacement -0.804443 0.950920 1.000000 0.933104 -0.544162 \n",
"weight -0.831739 0.897017 0.933104 1.000000 -0.419502 \n",
"acceleration 0.422297 -0.504061 -0.544162 -0.419502 1.000000 \n",
"year 0.581469 -0.346717 -0.369804 -0.307900 0.282901 \n",
"origin 0.563698 -0.564972 -0.610664 -0.581265 0.210084 \n",
"\n",
" year origin \n",
"mpg 0.581469 0.563698 \n",
"cylinders -0.346717 -0.564972 \n",
"displacement -0.369804 -0.610664 \n",
"weight -0.307900 -0.581265 \n",
"acceleration 0.282901 0.210084 \n",
"year 1.000000 0.184314 \n",
"origin 0.184314 1.000000 "
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#correlation matrix to check for possible collinearities\n",
"df.corr()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Coefficients [-0.49337632 0.01989564 -0.01695114 -0.00647404 0.08057584 0.75077268\n",
" 1.4261405 ]\n",
"Intercept: -17.218434622\n",
"Variance score: 0.82\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/opt/conda/lib/python3.6/site-packages/ipykernel_launcher.py:20: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.\n"
]
}
],
"source": [
"#doing multiple linear regression fit\n",
"\n",
"col_to_use_fit = ['cylinders','displacement','horsepower','weight','acceleration','year','origin']\n",
"\n",
"\n",
"#for using sklearn we have to use floats, however rows containing strings as '?' have to be deleted\n",
"\n",
"#convert strings to float and in case of e.g. '?' convert it to NaN\n",
"df[col_to_use_fit] = df[col_to_use_fit].apply(pd.to_numeric, errors='coerce') \n",
"\n",
"\n",
"# Drop NaN values, listing the converted columns explicitly, so NaN values in other columns aren't dropped\n",
"df = df.dropna(how='any',subset = col_to_use_fit)\n",
"\n",
"X = df[col_to_use_fit] #independent variables (or predictors)\n",
"Y = df[\"mpg\"] #dependent variable (which we want to fit / predict)\n",
"\n",
"\n",
"#sklearn does not really handle pandas dataframes (at least it did not work for me) => convert to numpy array\n",
"X_np=pd.DataFrame.as_matrix(X)\n",
"\n",
"\n",
"regr = linear_model.LinearRegression()\n",
"regr.fit(X_np, Y_np)\n",
"\n",
"Y_hat = regr.predict(X_np) #predicting values using fit\n",
"\n",
"print('Coefficients', regr.coef_)\n",
"print('Intercept:', regr.intercept_)\n",
"print('Variance score: %.2f' % r2_score(Y_np, Y_hat))\n"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Coefficients [ 6.98857616e+00 -4.78538689e-01 5.03433939e-01 4.13288566e-03\n",
" -5.85917321e+00 6.97430284e-01 -2.08955704e+01 -3.38326306e-03\n",
" 1.16133262e-02 3.57462990e-04 2.77871993e-01 -1.74125857e-01\n",
" 4.02168216e-01 -8.49062164e-05 2.47186800e-05 -3.47899523e-03\n",
" 5.93380246e-03 2.39811277e-02 -1.96842975e-05 -7.21273895e-03\n",
" -5.83750571e-03 2.23250718e-03 2.34619450e-04 -2.24523733e-04\n",
" -5.78847492e-04 5.56215079e-02 4.58316099e-01 1.39257020e-01]\n",
"Intercept: 35.4788874758\n",
"Variance score: 0.89\n"
]
}
],
"source": [
"# in order to get all interaction terms, obtain all combinations of predictors (of second order), \n",
"# see https://stackoverflow.com/questions/45828964/how-to-add-interaction-term-in-python-sklearn\n",
"\n",
"poly = PolynomialFeatures(interaction_only=True,include_bias = False)\n",
"X_interact = poly.fit_transform(X_np) #new feature space: [x1,x2,...xN,x1*x2,...x1*XN,x2*x3,...x2*xN,...,...x(N-1)*xN] \n",
"\n",
"regr_interact = linear_model.LinearRegression()\n",
"regr_interact.fit(X_interact, Y_np)\n",
"\n",
"Y_hat_interact = regr_interact.predict(X_interact) #predicting values using fit\n",
"\n",
"print('Coefficients', regr_interact.coef_)\n",
"print('Intercept:', regr_interact.intercept_)\n",
"print('Variance score: %.2f' % r2_score(Y_np, Y_hat_interact))"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table class=\"simpletable\">\n",
"<caption>OLS Regression Results</caption>\n",
"<tr>\n",
" <th>Dep. Variable:</th> <td>mpg</td> <th> R-squared: </th> <td> 0.821</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Model:</th> <td>OLS</td> <th> Adj. R-squared: </th> <td> 0.818</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Method:</th> <td>Least Squares</td> <th> F-statistic: </th> <td> 252.4</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Date:</th> <td>Tue, 17 Jul 2018</td> <th> Prob (F-statistic):</th> <td>2.04e-139</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Time:</th> <td>08:25:14</td> <th> Log-Likelihood: </th> <td> -1023.5</td> \n",
"</tr>\n",
"<tr>\n",
" <th>No. Observations:</th> <td> 392</td> <th> AIC: </th> <td> 2063.</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Df Residuals:</th> <td> 384</td> <th> BIC: </th> <td> 2095.</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Df Model:</th> <td> 7</td> <th> </th> <td> </td> \n",
"</tr>\n",
"<tr>\n",
" <th>Covariance Type:</th> <td>nonrobust</td> <th> </th> <td> </td> \n",
"</tr>\n",
"</table>\n",
"<table class=\"simpletable\">\n",
"<tr>\n",
" <td></td> <th>coef</th> <th>std err</th> <th>t</th> <th>P>|t|</th> <th>[0.025</th> <th>0.975]</th> \n",
"</tr>\n",
"<tr>\n",
" <th>const</th> <td> -17.2184</td> <td> 4.644</td> <td> -3.707</td> <td> 0.000</td> <td> -26.350</td> <td> -8.087</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders</th> <td> -0.4934</td> <td> 0.323</td> <td> -1.526</td> <td> 0.128</td> <td> -1.129</td> <td> 0.142</td>\n",
"</tr>\n",
"<tr>\n",
" <th>displacement</th> <td> 0.0199</td> <td> 0.008</td> <td> 2.647</td> <td> 0.008</td> <td> 0.005</td> <td> 0.035</td>\n",
"</tr>\n",
"<tr>\n",
" <th>horsepower</th> <td> -0.0170</td> <td> 0.014</td> <td> -1.230</td> <td> 0.220</td> <td> -0.044</td> <td> 0.010</td>\n",
"</tr>\n",
"<tr>\n",
" <th>weight</th> <td> -0.0065</td> <td> 0.001</td> <td> -9.929</td> <td> 0.000</td> <td> -0.008</td> <td> -0.005</td>\n",
"</tr>\n",
"<tr>\n",
" <th>acceleration</th> <td> 0.0806</td> <td> 0.099</td> <td> 0.815</td> <td> 0.415</td> <td> -0.114</td> <td> 0.275</td>\n",
"</tr>\n",
"<tr>\n",
" <th>year</th> <td> 0.7508</td> <td> 0.051</td> <td> 14.729</td> <td> 0.000</td> <td> 0.651</td> <td> 0.851</td>\n",
"</tr>\n",
"<tr>\n",
" <th>origin</th> <td> 1.4261</td> <td> 0.278</td> <td> 5.127</td> <td> 0.000</td> <td> 0.879</td> <td> 1.973</td>\n",
"</tr>\n",
"</table>\n",
"<table class=\"simpletable\">\n",
"<tr>\n",
" <th>Omnibus:</th> <td>31.906</td> <th> Durbin-Watson: </th> <td> 1.309</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Prob(Omnibus):</th> <td> 0.000</td> <th> Jarque-Bera (JB): </th> <td> 53.100</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Skew:</th> <td> 0.529</td> <th> Prob(JB): </th> <td>2.95e-12</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Kurtosis:</th> <td> 4.460</td> <th> Cond. No. </th> <td>8.59e+04</td>\n",
"</tr>\n",
"</table><br/><br/>Warnings:<br/>[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.<br/>[2] The condition number is large, 8.59e+04. This might indicate that there are<br/>strong multicollinearity or other numerical problems."
],
"text/plain": [
"<class 'statsmodels.iolib.summary.Summary'>\n",
"\"\"\"\n",
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: mpg R-squared: 0.821\n",
"Model: OLS Adj. R-squared: 0.818\n",
"Method: Least Squares F-statistic: 252.4\n",
"Date: Tue, 17 Jul 2018 Prob (F-statistic): 2.04e-139\n",
"Time: 08:25:14 Log-Likelihood: -1023.5\n",
"No. Observations: 392 AIC: 2063.\n",
"Df Residuals: 384 BIC: 2095.\n",
"Df Model: 7 \n",
"Covariance Type: nonrobust \n",
"================================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"--------------------------------------------------------------------------------\n",
"const -17.2184 4.644 -3.707 0.000 -26.350 -8.087\n",
"cylinders -0.4934 0.323 -1.526 0.128 -1.129 0.142\n",
"displacement 0.0199 0.008 2.647 0.008 0.005 0.035\n",
"horsepower -0.0170 0.014 -1.230 0.220 -0.044 0.010\n",
"weight -0.0065 0.001 -9.929 0.000 -0.008 -0.005\n",
"acceleration 0.0806 0.099 0.815 0.415 -0.114 0.275\n",
"year 0.7508 0.051 14.729 0.000 0.651 0.851\n",
"origin 1.4261 0.278 5.127 0.000 0.879 1.973\n",
"==============================================================================\n",
"Omnibus: 31.906 Durbin-Watson: 1.309\n",
"Prob(Omnibus): 0.000 Jarque-Bera (JB): 53.100\n",
"Skew: 0.529 Prob(JB): 2.95e-12\n",
"Kurtosis: 4.460 Cond. No. 8.59e+04\n",
"==============================================================================\n",
"\n",
"Warnings:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"[2] The condition number is large, 8.59e+04. This might indicate that there are\n",
"strong multicollinearity or other numerical problems.\n",
"\"\"\""
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# since it is very difficult to interpret the above results I get back to using statsmodels with its nice output table\n",
"''' 3.7.9c: Mulitiple linear regression'''\n",
"\n",
"\n",
"fname = 'Auto.csv'\n",
"df = pd.read_csv(fname)\n",
"\n",
"col_to_use_fit = ['cylinders','displacement','horsepower','weight','acceleration','year','origin']\n",
"\n",
"# again rows containing strings as '?' have to be deleted\n",
"# convert strings to float and in case of e.g. '?' convert it to NaN\n",
"df[col_to_use_fit] = df[col_to_use_fit].apply(pd.to_numeric, errors='coerce') \n",
"\n",
"# Drop NaN values, listing the converted columns explicitly, so NaN values in other columns aren't dropped\n",
"df = df.dropna(how='any',subset = col_to_use_fit)\n",
"\n",
"\n",
"X = df[col_to_use_fit]\n",
"y = df[\"mpg\"]\n",
"X0 = sm.add_constant(X) # we have to manually specify that we want an intercept (beta_0) in our model\n",
"model = sm.OLS(y, X0).fit() # performing the linear regression\n",
"model.summary()\n"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 1440x432 with 14 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"''' 3.7.9d: Plotting'''\n",
"\n",
"fname = 'Auto.csv'\n",
"df = pd.read_csv(fname)\n",
"y = df[\"mpg\"]\n",
"\n",
"col_to_use_fit = ['cylinders','displacement','horsepower','weight','acceleration','year','origin']\n",
"\n",
"\n",
"\n",
"f, axarr = plt.subplots(2,len(col_to_use_fit),figsize=(20,6))\n",
"\n",
"for ind, param in enumerate(col_to_use_fit):\n",
" #print(ind,param)\n",
" \n",
" # convert strings to float and in case of e.g. '?' convert it to NaN\n",
" df[[param,'mpg']] = df[[param,'mpg']].apply(pd.to_numeric, errors='coerce') \n",
"\n",
" # Drop NaN values, listing the converted columns explicitly, so NaN values in other columns aren't dropped\n",
" df_used = df.dropna(how='any',subset = [param,'mpg'])\n",
" \n",
" y = df_used[\"mpg\"]\n",
"\n",
" \n",
" X = np.asarray(df_used[param])\n",
" X0 = sm.add_constant(X) # we have to manually specify that we want an intercept (beta_0) in our model\n",
" model = sm.OLS(y, X0).fit() # performing the linear regression\n",
" y_hat = model.fittedvalues\n",
" resids = model.resid\n",
"\n",
" axarr[0,ind].scatter(df_used[param],df_used['mpg'])\n",
" axarr[0,ind].plot(df_used[param],y_hat,color='red') \n",
" axarr[0,ind].set_xlabel(param)\n",
" axarr[0,ind].set_ylabel('mpg')\n",
" \n",
" #plot residuals\n",
" axarr[1,ind].scatter(y,resids, color='blue')\n",
" axarr[1,ind].set_xlabel('mpg')\n",
" axarr[1,ind].set_ylabel('residuals')\n",
" \n",
"f.subplots_adjust(hspace=0.4, wspace=0.4)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table class=\"simpletable\">\n",
"<caption>OLS Regression Results</caption>\n",
"<tr>\n",
" <th>Dep. Variable:</th> <td>mpg</td> <th> R-squared: </th> <td> 0.931</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Model:</th> <td>OLS</td> <th> Adj. R-squared: </th> <td> 0.906</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Method:</th> <td>Least Squares</td> <th> F-statistic: </th> <td> 38.51</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Date:</th> <td>Tue, 17 Jul 2018</td> <th> Prob (F-statistic):</th> <td>1.50e-123</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Time:</th> <td>08:25:16</td> <th> Log-Likelihood: </th> <td> -838.24</td> \n",
"</tr>\n",
"<tr>\n",
" <th>No. Observations:</th> <td> 392</td> <th> AIC: </th> <td> 1880.</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Df Residuals:</th> <td> 290</td> <th> BIC: </th> <td> 2286.</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Df Model:</th> <td> 101</td> <th> </th> <td> </td> \n",
"</tr>\n",
"<tr>\n",
" <th>Covariance Type:</th> <td>nonrobust</td> <th> </th> <td> </td> \n",
"</tr>\n",
"</table>\n",
"<table class=\"simpletable\">\n",
"<tr>\n",
" <td></td> <th>coef</th> <th>std err</th> <th>t</th> <th>P>|t|</th> <th>[0.025</th> <th>0.975]</th> \n",
"</tr>\n",
"<tr>\n",
" <th>Intercept</th> <td> -0.0172</td> <td> 0.395</td> <td> -0.043</td> <td> 0.965</td> <td> -0.795</td> <td> 0.761</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders</th> <td> -0.9694</td> <td> 3.083</td> <td> -0.314</td> <td> 0.753</td> <td> -7.037</td> <td> 5.098</td>\n",
"</tr>\n",
"<tr>\n",
" <th>displacement</th> <td> 15.5886</td> <td> 26.002</td> <td> 0.600</td> <td> 0.549</td> <td> -35.589</td> <td> 66.766</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:displacement</th> <td> 9.9093</td> <td> 7.405</td> <td> 1.338</td> <td> 0.182</td> <td> -4.664</td> <td> 24.483</td>\n",
"</tr>\n",
"<tr>\n",
" <th>horsepower</th> <td> 3.5012</td> <td> 23.820</td> <td> 0.147</td> <td> 0.883</td> <td> -43.381</td> <td> 50.383</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:horsepower</th> <td> 0.3706</td> <td> 8.546</td> <td> 0.043</td> <td> 0.965</td> <td> -16.450</td> <td> 17.191</td>\n",
"</tr>\n",
"<tr>\n",
" <th>displacement:horsepower</th> <td> -0.2285</td> <td> 0.316</td> <td> -0.722</td> <td> 0.471</td> <td> -0.851</td> <td> 0.394</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:displacement:horsepower</th> <td> -0.1061</td> <td> 0.094</td> <td> -1.128</td> <td> 0.260</td> <td> -0.291</td> <td> 0.079</td>\n",
"</tr>\n",
"<tr>\n",
" <th>weight</th> <td> -0.5497</td> <td> 0.851</td> <td> -0.646</td> <td> 0.519</td> <td> -2.224</td> <td> 1.124</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:weight</th> <td> -0.3716</td> <td> 0.284</td> <td> -1.309</td> <td> 0.192</td> <td> -0.931</td> <td> 0.187</td>\n",
"</tr>\n",
"<tr>\n",
" <th>displacement:weight</th> <td> -0.0100</td> <td> 0.022</td> <td> -0.455</td> <td> 0.649</td> <td> -0.053</td> <td> 0.033</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:displacement:weight</th> <td> 0.0016</td> <td> 0.004</td> <td> 0.389</td> <td> 0.698</td> <td> -0.007</td> <td> 0.010</td>\n",
"</tr>\n",
"<tr>\n",
" <th>horsepower:weight</th> <td> 0.0024</td> <td> 0.011</td> <td> 0.224</td> <td> 0.823</td> <td> -0.019</td> <td> 0.024</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:horsepower:weight</th> <td> 0.0040</td> <td> 0.004</td> <td> 1.084</td> <td> 0.279</td> <td> -0.003</td> <td> 0.011</td>\n",
"</tr>\n",
"<tr>\n",
" <th>displacement:horsepower:weight</th> <td>-1.791e-05</td> <td> 0.000</td> <td> -0.082</td> <td> 0.935</td> <td> -0.000</td> <td> 0.000</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:displacement:horsepower:weight</th> <td> 2.426e-05</td> <td> 4.09e-05</td> <td> 0.593</td> <td> 0.553</td> <td>-5.62e-05</td> <td> 0.000</td>\n",
"</tr>\n",
"<tr>\n",
" <th>acceleration</th> <td> -24.8662</td> <td> 46.097</td> <td> -0.539</td> <td> 0.590</td> <td> -115.593</td> <td> 65.860</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:acceleration</th> <td> 2.7613</td> <td> 24.785</td> <td> 0.111</td> <td> 0.911</td> <td> -46.019</td> <td> 51.542</td>\n",
"</tr>\n",
"<tr>\n",
" <th>displacement:acceleration</th> <td> -1.0363</td> <td> 1.455</td> <td> -0.712</td> <td> 0.477</td> <td> -3.899</td> <td> 1.827</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:displacement:acceleration</th> <td> -0.7075</td> <td> 0.511</td> <td> -1.385</td> <td> 0.167</td> <td> -1.713</td> <td> 0.298</td>\n",
"</tr>\n",
"<tr>\n",
" <th>horsepower:acceleration</th> <td> 0.0148</td> <td> 1.697</td> <td> 0.009</td> <td> 0.993</td> <td> -3.325</td> <td> 3.355</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:horsepower:acceleration</th> <td> -0.1210</td> <td> 0.636</td> <td> -0.190</td> <td> 0.849</td> <td> -1.373</td> <td> 1.131</td>\n",
"</tr>\n",
"<tr>\n",
" <th>displacement:horsepower:acceleration</th> <td> 0.0271</td> <td> 0.024</td> <td> 1.153</td> <td> 0.250</td> <td> -0.019</td> <td> 0.073</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:displacement:horsepower:acceleration</th> <td> 0.0049</td> <td> 0.006</td> <td> 0.767</td> <td> 0.444</td> <td> -0.008</td> <td> 0.017</td>\n",
"</tr>\n",
"<tr>\n",
" <th>weight:acceleration</th> <td> 0.0331</td> <td> 0.062</td> <td> 0.534</td> <td> 0.594</td> <td> -0.089</td> <td> 0.155</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:weight:acceleration</th> <td> 0.0279</td> <td> 0.018</td> <td> 1.549</td> <td> 0.123</td> <td> -0.008</td> <td> 0.063</td>\n",
"</tr>\n",
"<tr>\n",
" <th>displacement:weight:acceleration</th> <td> -0.0004</td> <td> 0.001</td> <td> -0.275</td> <td> 0.784</td> <td> -0.003</td> <td> 0.002</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:displacement:weight:acceleration</th> <td> 0.0002</td> <td> 0.000</td> <td> 0.758</td> <td> 0.449</td> <td> -0.000</td> <td> 0.001</td>\n",
"</tr>\n",
"<tr>\n",
" <th>horsepower:weight:acceleration</th> <td> 7.027e-05</td> <td> 0.001</td> <td> 0.082</td> <td> 0.935</td> <td> -0.002</td> <td> 0.002</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:horsepower:weight:acceleration</th> <td> -0.0003</td> <td> 0.000</td> <td> -1.293</td> <td> 0.197</td> <td> -0.001</td> <td> 0.000</td>\n",
"</tr>\n",
"<tr>\n",
" <th>displacement:horsepower:weight:acceleration</th> <td> 5.366e-06</td> <td> 1.68e-05</td> <td> 0.319</td> <td> 0.750</td> <td>-2.77e-05</td> <td> 3.85e-05</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:displacement:horsepower:weight:acceleration</th> <td>-2.931e-06</td> <td> 3.04e-06</td> <td> -0.965</td> <td> 0.335</td> <td>-8.91e-06</td> <td> 3.05e-06</td>\n",
"</tr>\n",
"<tr>\n",
" <th>year</th> <td> -7.7724</td> <td> 5.452</td> <td> -1.426</td> <td> 0.155</td> <td> -18.503</td> <td> 2.958</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:year</th> <td> -2.2048</td> <td> 1.821</td> <td> -1.211</td> <td> 0.227</td> <td> -5.789</td> <td> 1.380</td>\n",
"</tr>\n",
"<tr>\n",
" <th>displacement:year</th> <td> -0.1627</td> <td> 0.344</td> <td> -0.473</td> <td> 0.636</td> <td> -0.839</td> <td> 0.514</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:displacement:year</th> <td> -0.1109</td> <td> 0.097</td> <td> -1.141</td> <td> 0.255</td> <td> -0.302</td> <td> 0.080</td>\n",
"</tr>\n",
"<tr>\n",
" <th>horsepower:year</th> <td> 0.0345</td> <td> 0.322</td> <td> 0.107</td> <td> 0.915</td> <td> -0.600</td> <td> 0.669</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:horsepower:year</th> <td> 0.0331</td> <td> 0.121</td> <td> 0.275</td> <td> 0.784</td> <td> -0.204</td> <td> 0.270</td>\n",
"</tr>\n",
"<tr>\n",
" <th>displacement:horsepower:year</th> <td> 0.0018</td> <td> 0.004</td> <td> 0.411</td> <td> 0.681</td> <td> -0.007</td> <td> 0.010</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:displacement:horsepower:year</th> <td> 0.0013</td> <td> 0.001</td> <td> 1.038</td> <td> 0.300</td> <td> -0.001</td> <td> 0.004</td>\n",
"</tr>\n",
"<tr>\n",
" <th>weight:year</th> <td> 0.0100</td> <td> 0.015</td> <td> 0.674</td> <td> 0.501</td> <td> -0.019</td> <td> 0.039</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:weight:year</th> <td> 0.0061</td> <td> 0.004</td> <td> 1.670</td> <td> 0.096</td> <td> -0.001</td> <td> 0.013</td>\n",
"</tr>\n",
"<tr>\n",
" <th>displacement:weight:year</th> <td> 7.834e-05</td> <td> 0.000</td> <td> 0.297</td> <td> 0.767</td> <td> -0.000</td> <td> 0.001</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:displacement:weight:year</th> <td>-2.064e-05</td> <td> 4.85e-05</td> <td> -0.425</td> <td> 0.671</td> <td> -0.000</td> <td> 7.48e-05</td>\n",
"</tr>\n",
"<tr>\n",
" <th>horsepower:weight:year</th> <td>-2.282e-05</td> <td> 0.000</td> <td> -0.142</td> <td> 0.887</td> <td> -0.000</td> <td> 0.000</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:horsepower:weight:year</th> <td>-8.244e-05</td> <td> 4.81e-05</td> <td> -1.716</td> <td> 0.087</td> <td> -0.000</td> <td> 1.21e-05</td>\n",
"</tr>\n",
"<tr>\n",
" <th>displacement:horsepower:weight:year</th> <td> 7.251e-07</td> <td> 2.64e-06</td> <td> 0.274</td> <td> 0.784</td> <td>-4.48e-06</td> <td> 5.93e-06</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:displacement:horsepower:weight:year</th> <td>-2.607e-07</td> <td> 4.95e-07</td> <td> -0.527</td> <td> 0.599</td> <td>-1.23e-06</td> <td> 7.13e-07</td>\n",
"</tr>\n",
"<tr>\n",
" <th>acceleration:year</th> <td> 0.8053</td> <td> 0.649</td> <td> 1.240</td> <td> 0.216</td> <td> -0.473</td> <td> 2.083</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:acceleration:year</th> <td> 0.1014</td> <td> 0.330</td> <td> 0.308</td> <td> 0.759</td> <td> -0.548</td> <td> 0.750</td>\n",
"</tr>\n",
"<tr>\n",
" <th>displacement:acceleration:year</th> <td> 0.0271</td> <td> 0.021</td> <td> 1.285</td> <td> 0.200</td> <td> -0.014</td> <td> 0.069</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:displacement:acceleration:year</th> <td> 0.0042</td> <td> 0.007</td> <td> 0.628</td> <td> 0.531</td> <td> -0.009</td> <td> 0.017</td>\n",
"</tr>\n",
"<tr>\n",
" <th>horsepower:acceleration:year</th> <td> -0.0057</td> <td> 0.024</td> <td> -0.236</td> <td> 0.813</td> <td> -0.053</td> <td> 0.041</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:horsepower:acceleration:year</th> <td> -0.0005</td> <td> 0.009</td> <td> -0.055</td> <td> 0.956</td> <td> -0.018</td> <td> 0.017</td>\n",
"</tr>\n",
"<tr>\n",
" <th>displacement:horsepower:acceleration:year</th> <td> -0.0004</td> <td> 0.000</td> <td> -1.253</td> <td> 0.211</td> <td> -0.001</td> <td> 0.000</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:displacement:horsepower:acceleration:year</th> <td>-2.812e-05</td> <td> 8.86e-05</td> <td> -0.318</td> <td> 0.751</td> <td> -0.000</td> <td> 0.000</td>\n",
"</tr>\n",
"<tr>\n",
" <th>weight:acceleration:year</th> <td> -0.0008</td> <td> 0.001</td> <td> -0.799</td> <td> 0.425</td> <td> -0.003</td> <td> 0.001</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:weight:acceleration:year</th> <td> -0.0004</td> <td> 0.000</td> <td> -1.703</td> <td> 0.090</td> <td> -0.001</td> <td> 6.23e-05</td>\n",
"</tr>\n",
"<tr>\n",
" <th>displacement:weight:acceleration:year</th> <td> 3.71e-06</td> <td> 1.6e-05</td> <td> 0.231</td> <td> 0.817</td> <td>-2.78e-05</td> <td> 3.53e-05</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:displacement:weight:acceleration:year</th> <td>-1.154e-06</td> <td> 2.54e-06</td> <td> -0.455</td> <td> 0.650</td> <td>-6.15e-06</td> <td> 3.84e-06</td>\n",
"</tr>\n",
"<tr>\n",
" <th>horsepower:weight:acceleration:year</th> <td> 1.038e-07</td> <td> 1.19e-05</td> <td> 0.009</td> <td> 0.993</td> <td>-2.33e-05</td> <td> 2.35e-05</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:horsepower:weight:acceleration:year</th> <td> 5.657e-06</td> <td> 3.23e-06</td> <td> 1.751</td> <td> 0.081</td> <td>-7.03e-07</td> <td> 1.2e-05</td>\n",
"</tr>\n",
"<tr>\n",
" <th>displacement:horsepower:weight:acceleration:year</th> <td>-6.701e-08</td> <td> 2.04e-07</td> <td> -0.328</td> <td> 0.743</td> <td>-4.69e-07</td> <td> 3.35e-07</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:displacement:horsepower:weight:acceleration:year</th> <td> 2.688e-08</td> <td> 3.69e-08</td> <td> 0.728</td> <td> 0.467</td> <td>-4.58e-08</td> <td> 9.96e-08</td>\n",
"</tr>\n",
"<tr>\n",
" <th>origin</th> <td> -1.6045</td> <td> 3.093</td> <td> -0.519</td> <td> 0.604</td> <td> -7.692</td> <td> 4.483</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:origin</th> <td> -0.7675</td> <td> 2.486</td> <td> -0.309</td> <td> 0.758</td> <td> -5.660</td> <td> 4.125</td>\n",
"</tr>\n",
"<tr>\n",
" <th>displacement:origin</th> <td> 10.2514</td> <td> 21.847</td> <td> 0.469</td> <td> 0.639</td> <td> -32.748</td> <td> 53.251</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:displacement:origin</th> <td> -11.6479</td> <td> 12.279</td> <td> -0.949</td> <td> 0.344</td> <td> -35.815</td> <td> 12.520</td>\n",
"</tr>\n",
"<tr>\n",
" <th>horsepower:origin</th> <td> 3.0028</td> <td> 21.382</td> <td> 0.140</td> <td> 0.888</td> <td> -39.082</td> <td> 45.087</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:horsepower:origin</th> <td> -1.6331</td> <td> 11.009</td> <td> -0.148</td> <td> 0.882</td> <td> -23.300</td> <td> 20.034</td>\n",
"</tr>\n",
"<tr>\n",
" <th>displacement:horsepower:origin</th> <td> -0.1664</td> <td> 0.266</td> <td> -0.626</td> <td> 0.532</td> <td> -0.690</td> <td> 0.357</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:displacement:horsepower:origin</th> <td> 0.1477</td> <td> 0.151</td> <td> 0.975</td> <td> 0.330</td> <td> -0.150</td> <td> 0.446</td>\n",
"</tr>\n",
"<tr>\n",
" <th>weight:origin</th> <td> -0.3467</td> <td> 0.727</td> <td> -0.477</td> <td> 0.634</td> <td> -1.778</td> <td> 1.085</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:weight:origin</th> <td> 0.4399</td> <td> 0.414</td> <td> 1.063</td> <td> 0.289</td> <td> -0.374</td> <td> 1.254</td>\n",
"</tr>\n",
"<tr>\n",
" <th>displacement:weight:origin</th> <td> 0.0092</td> <td> 0.020</td> <td> 0.461</td> <td> 0.645</td> <td> -0.030</td> <td> 0.048</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:displacement:weight:origin</th> <td> -0.0017</td> <td> 0.005</td> <td> -0.367</td> <td> 0.714</td> <td> -0.011</td> <td> 0.008</td>\n",
"</tr>\n",
"<tr>\n",
" <th>horsepower:weight:origin</th> <td> 0.0054</td> <td> 0.013</td> <td> 0.428</td> <td> 0.669</td> <td> -0.019</td> <td> 0.030</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:horsepower:weight:origin</th> <td> -0.0046</td> <td> 0.005</td> <td> -0.830</td> <td> 0.407</td> <td> -0.015</td> <td> 0.006</td>\n",
"</tr>\n",
"<tr>\n",
" <th>displacement:horsepower:weight:origin</th> <td> 6.377e-05</td> <td> 0.000</td> <td> 0.321</td> <td> 0.748</td> <td> -0.000</td> <td> 0.000</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:displacement:horsepower:weight:origin</th> <td>-2.917e-05</td> <td> 4.8e-05</td> <td> -0.607</td> <td> 0.544</td> <td> -0.000</td> <td> 6.53e-05</td>\n",
"</tr>\n",
"<tr>\n",
" <th>acceleration:origin</th> <td> -23.8434</td> <td> 45.005</td> <td> -0.530</td> <td> 0.597</td> <td> -112.422</td> <td> 64.735</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:acceleration:origin</th> <td> 6.8524</td> <td> 20.644</td> <td> 0.332</td> <td> 0.740</td> <td> -33.778</td> <td> 47.483</td>\n",
"</tr>\n",
"<tr>\n",
" <th>displacement:acceleration:origin</th> <td> -0.6654</td> <td> 1.190</td> <td> -0.559</td> <td> 0.576</td> <td> -3.007</td> <td> 1.676</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:displacement:acceleration:origin</th> <td> 0.7784</td> <td> 0.745</td> <td> 1.045</td> <td> 0.297</td> <td> -0.688</td> <td> 2.244</td>\n",
"</tr>\n",
"<tr>\n",
" <th>horsepower:acceleration:origin</th> <td> 0.0619</td> <td> 1.563</td> <td> 0.040</td> <td> 0.968</td> <td> -3.015</td> <td> 3.139</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:horsepower:acceleration:origin</th> <td> 0.0675</td> <td> 0.737</td> <td> 0.092</td> <td> 0.927</td> <td> -1.384</td> <td> 1.519</td>\n",
"</tr>\n",
"<tr>\n",
" <th>displacement:horsepower:acceleration:origin</th> <td> 0.0011</td> <td> 0.017</td> <td> 0.063</td> <td> 0.950</td> <td> -0.033</td> <td> 0.035</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:displacement:horsepower:acceleration:origin</th> <td> -0.0075</td> <td> 0.009</td> <td> -0.818</td> <td> 0.414</td> <td> -0.026</td> <td> 0.011</td>\n",
"</tr>\n",
"<tr>\n",
" <th>weight:acceleration:origin</th> <td> 0.0212</td> <td> 0.055</td> <td> 0.385</td> <td> 0.701</td> <td> -0.087</td> <td> 0.129</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:weight:acceleration:origin</th> <td> -0.0292</td> <td> 0.028</td> <td> -1.033</td> <td> 0.303</td> <td> -0.085</td> <td> 0.026</td>\n",
"</tr>\n",
"<tr>\n",
" <th>displacement:weight:acceleration:origin</th> <td> 0.0005</td> <td> 0.001</td> <td> 0.395</td> <td> 0.693</td> <td> -0.002</td> <td> 0.003</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:displacement:weight:acceleration:origin</th> <td> -0.0002</td> <td> 0.000</td> <td> -0.661</td> <td> 0.509</td> <td> -0.001</td> <td> 0.000</td>\n",
"</tr>\n",
"<tr>\n",
" <th>horsepower:weight:acceleration:origin</th> <td> -0.0006</td> <td> 0.001</td> <td> -0.636</td> <td> 0.526</td> <td> -0.002</td> <td> 0.001</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:horsepower:weight:acceleration:origin</th> <td> 0.0004</td> <td> 0.000</td> <td> 0.929</td> <td> 0.354</td> <td> -0.000</td> <td> 0.001</td>\n",
"</tr>\n",
"<tr>\n",
" <th>displacement:horsepower:weight:acceleration:origin</th> <td>-9.366e-06</td> <td> 1.57e-05</td> <td> -0.596</td> <td> 0.552</td> <td>-4.03e-05</td> <td> 2.16e-05</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:displacement:horsepower:weight:acceleration:origin</th> <td> 3.338e-06</td> <td> 3.28e-06</td> <td> 1.018</td> <td> 0.309</td> <td>-3.11e-06</td> <td> 9.79e-06</td>\n",
"</tr>\n",
"<tr>\n",
" <th>year:origin</th> <td> -6.1216</td> <td> 4.912</td> <td> -1.246</td> <td> 0.214</td> <td> -15.790</td> <td> 3.547</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:year:origin</th> <td> 4.3985</td> <td> 2.511</td> <td> 1.752</td> <td> 0.081</td> <td> -0.543</td> <td> 9.340</td>\n",
"</tr>\n",
"<tr>\n",
" <th>displacement:year:origin</th> <td> -0.1074</td> <td> 0.288</td> <td> -0.373</td> <td> 0.710</td> <td> -0.675</td> <td> 0.460</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:displacement:year:origin</th> <td> 0.1231</td> <td> 0.162</td> <td> 0.761</td> <td> 0.447</td> <td> -0.195</td> <td> 0.441</td>\n",
"</tr>\n",
"<tr>\n",
" <th>horsepower:year:origin</th> <td> 0.0187</td> <td> 0.289</td> <td> 0.065</td> <td> 0.948</td> <td> -0.551</td> <td> 0.588</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:horsepower:year:origin</th> <td> -0.0343</td> <td> 0.153</td> <td> -0.224</td> <td> 0.823</td> <td> -0.335</td> <td> 0.267</td>\n",
"</tr>\n",
"<tr>\n",
" <th>displacement:horsepower:year:origin</th> <td> 0.0028</td> <td> 0.004</td> <td> 0.793</td> <td> 0.429</td> <td> -0.004</td> <td> 0.010</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:displacement:horsepower:year:origin</th> <td> -0.0018</td> <td> 0.002</td> <td> -0.879</td> <td> 0.380</td> <td> -0.006</td> <td> 0.002</td>\n",
"</tr>\n",
"<tr>\n",
" <th>weight:year:origin</th> <td> 0.0076</td> <td> 0.011</td> <td> 0.698</td> <td> 0.485</td> <td> -0.014</td> <td> 0.029</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:weight:year:origin</th> <td> -0.0079</td> <td> 0.005</td> <td> -1.559</td> <td> 0.120</td> <td> -0.018</td> <td> 0.002</td>\n",
"</tr>\n",
"<tr>\n",
" <th>displacement:weight:year:origin</th> <td>-9.258e-05</td> <td> 0.000</td> <td> -0.398</td> <td> 0.691</td> <td> -0.001</td> <td> 0.000</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:displacement:weight:year:origin</th> <td> 2.574e-05</td> <td> 5.68e-05</td> <td> 0.454</td> <td> 0.650</td> <td> -8.6e-05</td> <td> 0.000</td>\n",
"</tr>\n",
"<tr>\n",
" <th>horsepower:weight:year:origin</th> <td> -0.0001</td> <td> 0.000</td> <td> -0.995</td> <td> 0.320</td> <td> -0.000</td> <td> 0.000</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:horsepower:weight:year:origin</th> <td> 9.734e-05</td> <td> 6.81e-05</td> <td> 1.429</td> <td> 0.154</td> <td>-3.68e-05</td> <td> 0.000</td>\n",
"</tr>\n",
"<tr>\n",
" <th>displacement:horsepower:weight:year:origin</th> <td>-1.062e-06</td> <td> 2.32e-06</td> <td> -0.457</td> <td> 0.648</td> <td>-5.64e-06</td> <td> 3.51e-06</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:displacement:horsepower:weight:year:origin</th> <td> 2.907e-07</td> <td> 5.94e-07</td> <td> 0.490</td> <td> 0.625</td> <td>-8.78e-07</td> <td> 1.46e-06</td>\n",
"</tr>\n",
"<tr>\n",
" <th>acceleration:year:origin</th> <td> 0.6903</td> <td> 0.627</td> <td> 1.101</td> <td> 0.272</td> <td> -0.544</td> <td> 1.925</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:acceleration:year:origin</th> <td> -0.3571</td> <td> 0.283</td> <td> -1.263</td> <td> 0.208</td> <td> -0.914</td> <td> 0.200</td>\n",
"</tr>\n",
"<tr>\n",
" <th>displacement:acceleration:year:origin</th> <td> -0.0086</td> <td> 0.018</td> <td> -0.493</td> <td> 0.622</td> <td> -0.043</td> <td> 0.026</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:displacement:acceleration:year:origin</th> <td> -0.0045</td> <td> 0.010</td> <td> -0.456</td> <td> 0.648</td> <td> -0.024</td> <td> 0.015</td>\n",
"</tr>\n",
"<tr>\n",
" <th>horsepower:acceleration:year:origin</th> <td> -0.0031</td> <td> 0.020</td> <td> -0.151</td> <td> 0.880</td> <td> -0.043</td> <td> 0.037</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:horsepower:acceleration:year:origin</th> <td> 0.0022</td> <td> 0.010</td> <td> 0.218</td> <td> 0.827</td> <td> -0.018</td> <td> 0.022</td>\n",
"</tr>\n",
"<tr>\n",
" <th>displacement:horsepower:acceleration:year:origin</th> <td> 6.583e-05</td> <td> 0.000</td> <td> 0.275</td> <td> 0.783</td> <td> -0.000</td> <td> 0.001</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:displacement:horsepower:acceleration:year:origin</th> <td> 5.775e-05</td> <td> 0.000</td> <td> 0.464</td> <td> 0.643</td> <td> -0.000</td> <td> 0.000</td>\n",
"</tr>\n",
"<tr>\n",
" <th>weight:acceleration:year:origin</th> <td> -0.0003</td> <td> 0.001</td> <td> -0.400</td> <td> 0.689</td> <td> -0.002</td> <td> 0.001</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:weight:acceleration:year:origin</th> <td> 0.0005</td> <td> 0.000</td> <td> 1.353</td> <td> 0.177</td> <td> -0.000</td> <td> 0.001</td>\n",
"</tr>\n",
"<tr>\n",
" <th>displacement:weight:acceleration:year:origin</th> <td> -3.57e-06</td> <td> 1.4e-05</td> <td> -0.254</td> <td> 0.799</td> <td>-3.12e-05</td> <td> 2.41e-05</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:displacement:weight:acceleration:year:origin</th> <td> 9.149e-07</td> <td> 3e-06</td> <td> 0.305</td> <td> 0.760</td> <td>-4.98e-06</td> <td> 6.81e-06</td>\n",
"</tr>\n",
"<tr>\n",
" <th>horsepower:weight:acceleration:year:origin</th> <td> 1.061e-05</td> <td> 1.08e-05</td> <td> 0.979</td> <td> 0.328</td> <td>-1.07e-05</td> <td> 3.2e-05</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:horsepower:weight:acceleration:year:origin</th> <td> -6.54e-06</td> <td> 4.75e-06</td> <td> -1.378</td> <td> 0.169</td> <td>-1.59e-05</td> <td> 2.8e-06</td>\n",
"</tr>\n",
"<tr>\n",
" <th>displacement:horsepower:weight:acceleration:year:origin</th> <td> 1.05e-07</td> <td> 1.88e-07</td> <td> 0.559</td> <td> 0.577</td> <td>-2.65e-07</td> <td> 4.75e-07</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:displacement:horsepower:weight:acceleration:year:origin</th> <td>-3.033e-08</td> <td> 4.04e-08</td> <td> -0.751</td> <td> 0.454</td> <td> -1.1e-07</td> <td> 4.92e-08</td>\n",
"</tr>\n",
"</table>\n",
"<table class=\"simpletable\">\n",
"<tr>\n",
" <th>Omnibus:</th> <td>46.080</td> <th> Durbin-Watson: </th> <td> 1.751</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Prob(Omnibus):</th> <td> 0.000</td> <th> Jarque-Bera (JB): </th> <td> 130.524</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Skew:</th> <td> 0.539</td> <th> Prob(JB): </th> <td>4.54e-29</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Kurtosis:</th> <td> 5.613</td> <th> Cond. No. </th> <td>3.52e+16</td>\n",
"</tr>\n",
"</table><br/><br/>Warnings:<br/>[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.<br/>[2] The condition number is large, 3.52e+16. This might indicate that there are<br/>strong multicollinearity or other numerical problems."
],
"text/plain": [
"<class 'statsmodels.iolib.summary.Summary'>\n",
"\"\"\"\n",
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: mpg R-squared: 0.931\n",
"Model: OLS Adj. R-squared: 0.906\n",
"Method: Least Squares F-statistic: 38.51\n",
"Date: Tue, 17 Jul 2018 Prob (F-statistic): 1.50e-123\n",
"Time: 08:25:16 Log-Likelihood: -838.24\n",
"No. Observations: 392 AIC: 1880.\n",
"Df Residuals: 290 BIC: 2286.\n",
"Df Model: 101 \n",
"Covariance Type: nonrobust \n",
"=====================================================================================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"-------------------------------------------------------------------------------------------------------------------------------------\n",
"Intercept -0.0172 0.395 -0.043 0.965 -0.795 0.761\n",
"cylinders -0.9694 3.083 -0.314 0.753 -7.037 5.098\n",
"displacement 15.5886 26.002 0.600 0.549 -35.589 66.766\n",
"cylinders:displacement 9.9093 7.405 1.338 0.182 -4.664 24.483\n",
"horsepower 3.5012 23.820 0.147 0.883 -43.381 50.383\n",
"cylinders:horsepower 0.3706 8.546 0.043 0.965 -16.450 17.191\n",
"displacement:horsepower -0.2285 0.316 -0.722 0.471 -0.851 0.394\n",
"cylinders:displacement:horsepower -0.1061 0.094 -1.128 0.260 -0.291 0.079\n",
"weight -0.5497 0.851 -0.646 0.519 -2.224 1.124\n",
"cylinders:weight -0.3716 0.284 -1.309 0.192 -0.931 0.187\n",
"displacement:weight -0.0100 0.022 -0.455 0.649 -0.053 0.033\n",
"cylinders:displacement:weight 0.0016 0.004 0.389 0.698 -0.007 0.010\n",
"horsepower:weight 0.0024 0.011 0.224 0.823 -0.019 0.024\n",
"cylinders:horsepower:weight 0.0040 0.004 1.084 0.279 -0.003 0.011\n",
"displacement:horsepower:weight -1.791e-05 0.000 -0.082 0.935 -0.000 0.000\n",
"cylinders:displacement:horsepower:weight 2.426e-05 4.09e-05 0.593 0.553 -5.62e-05 0.000\n",
"acceleration -24.8662 46.097 -0.539 0.590 -115.593 65.860\n",
"cylinders:acceleration 2.7613 24.785 0.111 0.911 -46.019 51.542\n",
"displacement:acceleration -1.0363 1.455 -0.712 0.477 -3.899 1.827\n",
"cylinders:displacement:acceleration -0.7075 0.511 -1.385 0.167 -1.713 0.298\n",
"horsepower:acceleration 0.0148 1.697 0.009 0.993 -3.325 3.355\n",
"cylinders:horsepower:acceleration -0.1210 0.636 -0.190 0.849 -1.373 1.131\n",
"displacement:horsepower:acceleration 0.0271 0.024 1.153 0.250 -0.019 0.073\n",
"cylinders:displacement:horsepower:acceleration 0.0049 0.006 0.767 0.444 -0.008 0.017\n",
"weight:acceleration 0.0331 0.062 0.534 0.594 -0.089 0.155\n",
"cylinders:weight:acceleration 0.0279 0.018 1.549 0.123 -0.008 0.063\n",
"displacement:weight:acceleration -0.0004 0.001 -0.275 0.784 -0.003 0.002\n",
"cylinders:displacement:weight:acceleration 0.0002 0.000 0.758 0.449 -0.000 0.001\n",
"horsepower:weight:acceleration 7.027e-05 0.001 0.082 0.935 -0.002 0.002\n",
"cylinders:horsepower:weight:acceleration -0.0003 0.000 -1.293 0.197 -0.001 0.000\n",
"displacement:horsepower:weight:acceleration 5.366e-06 1.68e-05 0.319 0.750 -2.77e-05 3.85e-05\n",
"cylinders:displacement:horsepower:weight:acceleration -2.931e-06 3.04e-06 -0.965 0.335 -8.91e-06 3.05e-06\n",
"year -7.7724 5.452 -1.426 0.155 -18.503 2.958\n",
"cylinders:year -2.2048 1.821 -1.211 0.227 -5.789 1.380\n",
"displacement:year -0.1627 0.344 -0.473 0.636 -0.839 0.514\n",
"cylinders:displacement:year -0.1109 0.097 -1.141 0.255 -0.302 0.080\n",
"horsepower:year 0.0345 0.322 0.107 0.915 -0.600 0.669\n",
"cylinders:horsepower:year 0.0331 0.121 0.275 0.784 -0.204 0.270\n",
"displacement:horsepower:year 0.0018 0.004 0.411 0.681 -0.007 0.010\n",
"cylinders:displacement:horsepower:year 0.0013 0.001 1.038 0.300 -0.001 0.004\n",
"weight:year 0.0100 0.015 0.674 0.501 -0.019 0.039\n",
"cylinders:weight:year 0.0061 0.004 1.670 0.096 -0.001 0.013\n",
"displacement:weight:year 7.834e-05 0.000 0.297 0.767 -0.000 0.001\n",
"cylinders:displacement:weight:year -2.064e-05 4.85e-05 -0.425 0.671 -0.000 7.48e-05\n",
"horsepower:weight:year -2.282e-05 0.000 -0.142 0.887 -0.000 0.000\n",
"cylinders:horsepower:weight:year -8.244e-05 4.81e-05 -1.716 0.087 -0.000 1.21e-05\n",
"displacement:horsepower:weight:year 7.251e-07 2.64e-06 0.274 0.784 -4.48e-06 5.93e-06\n",
"cylinders:displacement:horsepower:weight:year -2.607e-07 4.95e-07 -0.527 0.599 -1.23e-06 7.13e-07\n",
"acceleration:year 0.8053 0.649 1.240 0.216 -0.473 2.083\n",
"cylinders:acceleration:year 0.1014 0.330 0.308 0.759 -0.548 0.750\n",
"displacement:acceleration:year 0.0271 0.021 1.285 0.200 -0.014 0.069\n",
"cylinders:displacement:acceleration:year 0.0042 0.007 0.628 0.531 -0.009 0.017\n",
"horsepower:acceleration:year -0.0057 0.024 -0.236 0.813 -0.053 0.041\n",
"cylinders:horsepower:acceleration:year -0.0005 0.009 -0.055 0.956 -0.018 0.017\n",
"displacement:horsepower:acceleration:year -0.0004 0.000 -1.253 0.211 -0.001 0.000\n",
"cylinders:displacement:horsepower:acceleration:year -2.812e-05 8.86e-05 -0.318 0.751 -0.000 0.000\n",
"weight:acceleration:year -0.0008 0.001 -0.799 0.425 -0.003 0.001\n",
"cylinders:weight:acceleration:year -0.0004 0.000 -1.703 0.090 -0.001 6.23e-05\n",
"displacement:weight:acceleration:year 3.71e-06 1.6e-05 0.231 0.817 -2.78e-05 3.53e-05\n",
"cylinders:displacement:weight:acceleration:year -1.154e-06 2.54e-06 -0.455 0.650 -6.15e-06 3.84e-06\n",
"horsepower:weight:acceleration:year 1.038e-07 1.19e-05 0.009 0.993 -2.33e-05 2.35e-05\n",
"cylinders:horsepower:weight:acceleration:year 5.657e-06 3.23e-06 1.751 0.081 -7.03e-07 1.2e-05\n",
"displacement:horsepower:weight:acceleration:year -6.701e-08 2.04e-07 -0.328 0.743 -4.69e-07 3.35e-07\n",
"cylinders:displacement:horsepower:weight:acceleration:year 2.688e-08 3.69e-08 0.728 0.467 -4.58e-08 9.96e-08\n",
"origin -1.6045 3.093 -0.519 0.604 -7.692 4.483\n",
"cylinders:origin -0.7675 2.486 -0.309 0.758 -5.660 4.125\n",
"displacement:origin 10.2514 21.847 0.469 0.639 -32.748 53.251\n",
"cylinders:displacement:origin -11.6479 12.279 -0.949 0.344 -35.815 12.520\n",
"horsepower:origin 3.0028 21.382 0.140 0.888 -39.082 45.087\n",
"cylinders:horsepower:origin -1.6331 11.009 -0.148 0.882 -23.300 20.034\n",
"displacement:horsepower:origin -0.1664 0.266 -0.626 0.532 -0.690 0.357\n",
"cylinders:displacement:horsepower:origin 0.1477 0.151 0.975 0.330 -0.150 0.446\n",
"weight:origin -0.3467 0.727 -0.477 0.634 -1.778 1.085\n",
"cylinders:weight:origin 0.4399 0.414 1.063 0.289 -0.374 1.254\n",
"displacement:weight:origin 0.0092 0.020 0.461 0.645 -0.030 0.048\n",
"cylinders:displacement:weight:origin -0.0017 0.005 -0.367 0.714 -0.011 0.008\n",
"horsepower:weight:origin 0.0054 0.013 0.428 0.669 -0.019 0.030\n",
"cylinders:horsepower:weight:origin -0.0046 0.005 -0.830 0.407 -0.015 0.006\n",
"displacement:horsepower:weight:origin 6.377e-05 0.000 0.321 0.748 -0.000 0.000\n",
"cylinders:displacement:horsepower:weight:origin -2.917e-05 4.8e-05 -0.607 0.544 -0.000 6.53e-05\n",
"acceleration:origin -23.8434 45.005 -0.530 0.597 -112.422 64.735\n",
"cylinders:acceleration:origin 6.8524 20.644 0.332 0.740 -33.778 47.483\n",
"displacement:acceleration:origin -0.6654 1.190 -0.559 0.576 -3.007 1.676\n",
"cylinders:displacement:acceleration:origin 0.7784 0.745 1.045 0.297 -0.688 2.244\n",
"horsepower:acceleration:origin 0.0619 1.563 0.040 0.968 -3.015 3.139\n",
"cylinders:horsepower:acceleration:origin 0.0675 0.737 0.092 0.927 -1.384 1.519\n",
"displacement:horsepower:acceleration:origin 0.0011 0.017 0.063 0.950 -0.033 0.035\n",
"cylinders:displacement:horsepower:acceleration:origin -0.0075 0.009 -0.818 0.414 -0.026 0.011\n",
"weight:acceleration:origin 0.0212 0.055 0.385 0.701 -0.087 0.129\n",
"cylinders:weight:acceleration:origin -0.0292 0.028 -1.033 0.303 -0.085 0.026\n",
"displacement:weight:acceleration:origin 0.0005 0.001 0.395 0.693 -0.002 0.003\n",
"cylinders:displacement:weight:acceleration:origin -0.0002 0.000 -0.661 0.509 -0.001 0.000\n",
"horsepower:weight:acceleration:origin -0.0006 0.001 -0.636 0.526 -0.002 0.001\n",
"cylinders:horsepower:weight:acceleration:origin 0.0004 0.000 0.929 0.354 -0.000 0.001\n",
"displacement:horsepower:weight:acceleration:origin -9.366e-06 1.57e-05 -0.596 0.552 -4.03e-05 2.16e-05\n",
"cylinders:displacement:horsepower:weight:acceleration:origin 3.338e-06 3.28e-06 1.018 0.309 -3.11e-06 9.79e-06\n",
"year:origin -6.1216 4.912 -1.246 0.214 -15.790 3.547\n",
"cylinders:year:origin 4.3985 2.511 1.752 0.081 -0.543 9.340\n",
"displacement:year:origin -0.1074 0.288 -0.373 0.710 -0.675 0.460\n",
"cylinders:displacement:year:origin 0.1231 0.162 0.761 0.447 -0.195 0.441\n",
"horsepower:year:origin 0.0187 0.289 0.065 0.948 -0.551 0.588\n",
"cylinders:horsepower:year:origin -0.0343 0.153 -0.224 0.823 -0.335 0.267\n",
"displacement:horsepower:year:origin 0.0028 0.004 0.793 0.429 -0.004 0.010\n",
"cylinders:displacement:horsepower:year:origin -0.0018 0.002 -0.879 0.380 -0.006 0.002\n",
"weight:year:origin 0.0076 0.011 0.698 0.485 -0.014 0.029\n",
"cylinders:weight:year:origin -0.0079 0.005 -1.559 0.120 -0.018 0.002\n",
"displacement:weight:year:origin -9.258e-05 0.000 -0.398 0.691 -0.001 0.000\n",
"cylinders:displacement:weight:year:origin 2.574e-05 5.68e-05 0.454 0.650 -8.6e-05 0.000\n",
"horsepower:weight:year:origin -0.0001 0.000 -0.995 0.320 -0.000 0.000\n",
"cylinders:horsepower:weight:year:origin 9.734e-05 6.81e-05 1.429 0.154 -3.68e-05 0.000\n",
"displacement:horsepower:weight:year:origin -1.062e-06 2.32e-06 -0.457 0.648 -5.64e-06 3.51e-06\n",
"cylinders:displacement:horsepower:weight:year:origin 2.907e-07 5.94e-07 0.490 0.625 -8.78e-07 1.46e-06\n",
"acceleration:year:origin 0.6903 0.627 1.101 0.272 -0.544 1.925\n",
"cylinders:acceleration:year:origin -0.3571 0.283 -1.263 0.208 -0.914 0.200\n",
"displacement:acceleration:year:origin -0.0086 0.018 -0.493 0.622 -0.043 0.026\n",
"cylinders:displacement:acceleration:year:origin -0.0045 0.010 -0.456 0.648 -0.024 0.015\n",
"horsepower:acceleration:year:origin -0.0031 0.020 -0.151 0.880 -0.043 0.037\n",
"cylinders:horsepower:acceleration:year:origin 0.0022 0.010 0.218 0.827 -0.018 0.022\n",
"displacement:horsepower:acceleration:year:origin 6.583e-05 0.000 0.275 0.783 -0.000 0.001\n",
"cylinders:displacement:horsepower:acceleration:year:origin 5.775e-05 0.000 0.464 0.643 -0.000 0.000\n",
"weight:acceleration:year:origin -0.0003 0.001 -0.400 0.689 -0.002 0.001\n",
"cylinders:weight:acceleration:year:origin 0.0005 0.000 1.353 0.177 -0.000 0.001\n",
"displacement:weight:acceleration:year:origin -3.57e-06 1.4e-05 -0.254 0.799 -3.12e-05 2.41e-05\n",
"cylinders:displacement:weight:acceleration:year:origin 9.149e-07 3e-06 0.305 0.760 -4.98e-06 6.81e-06\n",
"horsepower:weight:acceleration:year:origin 1.061e-05 1.08e-05 0.979 0.328 -1.07e-05 3.2e-05\n",
"cylinders:horsepower:weight:acceleration:year:origin -6.54e-06 4.75e-06 -1.378 0.169 -1.59e-05 2.8e-06\n",
"displacement:horsepower:weight:acceleration:year:origin 1.05e-07 1.88e-07 0.559 0.577 -2.65e-07 4.75e-07\n",
"cylinders:displacement:horsepower:weight:acceleration:year:origin -3.033e-08 4.04e-08 -0.751 0.454 -1.1e-07 4.92e-08\n",
"==============================================================================\n",
"Omnibus: 46.080 Durbin-Watson: 1.751\n",
"Prob(Omnibus): 0.000 Jarque-Bera (JB): 130.524\n",
"Skew: 0.539 Prob(JB): 4.54e-29\n",
"Kurtosis: 5.613 Cond. No. 3.52e+16\n",
"==============================================================================\n",
"\n",
"Warnings:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"[2] The condition number is large, 3.52e+16. This might indicate that there are\n",
"strong multicollinearity or other numerical problems.\n",
"\"\"\""
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"''' 3.7.9e: Including interaction terms'''\n",
"\n",
"model_interac = smf.ols(formula='mpg ~ cylinders * displacement * horsepower * weight * acceleration * year * origin', data=df).fit() #important: because we use formula, we need smf instead of sm, and ols in lower case\n",
"model_interac.summary()\n",
"\n",
"# as can be seen below we have may to many interaction terms if we include all possible (higher-order) interaction terms\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" number of coefficients: \n",
" 128\n"
]
}
],
"source": [
"print(\" number of coefficients: \\n\", len(model_interac.params))"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table class=\"simpletable\">\n",
"<caption>OLS Regression Results</caption>\n",
"<tr>\n",
" <th>Dep. Variable:</th> <td>mpg</td> <th> R-squared: </th> <td> 0.847</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Model:</th> <td>OLS</td> <th> Adj. R-squared: </th> <td> 0.843</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Method:</th> <td>Least Squares</td> <th> F-statistic: </th> <td> 264.1</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Date:</th> <td>Tue, 17 Jul 2018</td> <th> Prob (F-statistic):</th> <td>9.73e-151</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Time:</th> <td>08:25:16</td> <th> Log-Likelihood: </th> <td> -993.83</td> \n",
"</tr>\n",
"<tr>\n",
" <th>No. Observations:</th> <td> 392</td> <th> AIC: </th> <td> 2006.</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Df Residuals:</th> <td> 383</td> <th> BIC: </th> <td> 2041.</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Df Model:</th> <td> 8</td> <th> </th> <td> </td> \n",
"</tr>\n",
"<tr>\n",
" <th>Covariance Type:</th> <td>nonrobust</td> <th> </th> <td> </td> \n",
"</tr>\n",
"</table>\n",
"<table class=\"simpletable\">\n",
"<tr>\n",
" <td></td> <th>coef</th> <th>std err</th> <th>t</th> <th>P>|t|</th> <th>[0.025</th> <th>0.975]</th> \n",
"</tr>\n",
"<tr>\n",
" <th>Intercept</th> <td> -2.7097</td> <td> 4.686</td> <td> -0.578</td> <td> 0.563</td> <td> -11.923</td> <td> 6.504</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders</th> <td> -2.6962</td> <td> 0.409</td> <td> -6.584</td> <td> 0.000</td> <td> -3.501</td> <td> -1.891</td>\n",
"</tr>\n",
"<tr>\n",
" <th>displacement</th> <td> -0.0775</td> <td> 0.014</td> <td> -5.474</td> <td> 0.000</td> <td> -0.105</td> <td> -0.050</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders:displacement</th> <td> 0.0136</td> <td> 0.002</td> <td> 7.907</td> <td> 0.000</td> <td> 0.010</td> <td> 0.017</td>\n",
"</tr>\n",
"<tr>\n",
" <th>horsepower</th> <td> -0.0476</td> <td> 0.013</td> <td> -3.559</td> <td> 0.000</td> <td> -0.074</td> <td> -0.021</td>\n",
"</tr>\n",
"<tr>\n",
" <th>weight</th> <td> -0.0052</td> <td> 0.001</td> <td> -8.370</td> <td> 0.000</td> <td> -0.006</td> <td> -0.004</td>\n",
"</tr>\n",
"<tr>\n",
" <th>acceleration</th> <td> 0.0598</td> <td> 0.092</td> <td> 0.651</td> <td> 0.515</td> <td> -0.121</td> <td> 0.240</td>\n",
"</tr>\n",
"<tr>\n",
" <th>year</th> <td> 0.7595</td> <td> 0.047</td> <td> 16.044</td> <td> 0.000</td> <td> 0.666</td> <td> 0.853</td>\n",
"</tr>\n",
"<tr>\n",
" <th>origin</th> <td> 0.7087</td> <td> 0.274</td> <td> 2.590</td> <td> 0.010</td> <td> 0.171</td> <td> 1.247</td>\n",
"</tr>\n",
"</table>\n",
"<table class=\"simpletable\">\n",
"<tr>\n",
" <th>Omnibus:</th> <td>35.211</td> <th> Durbin-Watson: </th> <td> 1.456</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Prob(Omnibus):</th> <td> 0.000</td> <th> Jarque-Bera (JB): </th> <td> 88.581</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Skew:</th> <td> 0.432</td> <th> Prob(JB): </th> <td>5.82e-20</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Kurtosis:</th> <td> 5.162</td> <th> Cond. No. </th> <td>1.03e+05</td>\n",
"</tr>\n",
"</table><br/><br/>Warnings:<br/>[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.<br/>[2] The condition number is large, 1.03e+05. This might indicate that there are<br/>strong multicollinearity or other numerical problems."
],
"text/plain": [
"<class 'statsmodels.iolib.summary.Summary'>\n",
"\"\"\"\n",
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: mpg R-squared: 0.847\n",
"Model: OLS Adj. R-squared: 0.843\n",
"Method: Least Squares F-statistic: 264.1\n",
"Date: Tue, 17 Jul 2018 Prob (F-statistic): 9.73e-151\n",
"Time: 08:25:16 Log-Likelihood: -993.83\n",
"No. Observations: 392 AIC: 2006.\n",
"Df Residuals: 383 BIC: 2041.\n",
"Df Model: 8 \n",
"Covariance Type: nonrobust \n",
"==========================================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------------------\n",
"Intercept -2.7097 4.686 -0.578 0.563 -11.923 6.504\n",
"cylinders -2.6962 0.409 -6.584 0.000 -3.501 -1.891\n",
"displacement -0.0775 0.014 -5.474 0.000 -0.105 -0.050\n",
"cylinders:displacement 0.0136 0.002 7.907 0.000 0.010 0.017\n",
"horsepower -0.0476 0.013 -3.559 0.000 -0.074 -0.021\n",
"weight -0.0052 0.001 -8.370 0.000 -0.006 -0.004\n",
"acceleration 0.0598 0.092 0.651 0.515 -0.121 0.240\n",
"year 0.7595 0.047 16.044 0.000 0.666 0.853\n",
"origin 0.7087 0.274 2.590 0.010 0.171 1.247\n",
"==============================================================================\n",
"Omnibus: 35.211 Durbin-Watson: 1.456\n",
"Prob(Omnibus): 0.000 Jarque-Bera (JB): 88.581\n",
"Skew: 0.432 Prob(JB): 5.82e-20\n",
"Kurtosis: 5.162 Cond. No. 1.03e+05\n",
"==============================================================================\n",
"\n",
"Warnings:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"[2] The condition number is large, 1.03e+05. This might indicate that there are\n",
"strong multicollinearity or other numerical problems.\n",
"\"\"\""
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"''' 3.7.9e: Including chosen interaction terms: cylinder and displacement'''\n",
"\n",
"model_interac2 = smf.ols(formula='mpg ~ cylinders * displacement + horsepower + weight + acceleration + year + origin', data=df).fit() #important: because we use formula, we need smf instead of sm, and ols in lower case\n",
"model_interac2.summary()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table class=\"simpletable\">\n",
"<caption>OLS Regression Results</caption>\n",
"<tr>\n",
" <th>Dep. Variable:</th> <td>mpg</td> <th> R-squared: </th> <td> 0.826</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Model:</th> <td>OLS</td> <th> Adj. R-squared: </th> <td> 0.823</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Method:</th> <td>Least Squares</td> <th> F-statistic: </th> <td> 227.9</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Date:</th> <td>Tue, 17 Jul 2018</td> <th> Prob (F-statistic):</th> <td>1.60e-140</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Time:</th> <td>08:25:16</td> <th> Log-Likelihood: </th> <td> -1018.0</td> \n",
"</tr>\n",
"<tr>\n",
" <th>No. Observations:</th> <td> 392</td> <th> AIC: </th> <td> 2054.</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Df Residuals:</th> <td> 383</td> <th> BIC: </th> <td> 2090.</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Df Model:</th> <td> 8</td> <th> </th> <td> </td> \n",
"</tr>\n",
"<tr>\n",
" <th>Covariance Type:</th> <td>nonrobust</td> <th> </th> <td> </td> \n",
"</tr>\n",
"</table>\n",
"<table class=\"simpletable\">\n",
"<tr>\n",
" <td></td> <th>coef</th> <th>std err</th> <th>t</th> <th>P>|t|</th> <th>[0.025</th> <th>0.975]</th> \n",
"</tr>\n",
"<tr>\n",
" <th>Intercept</th> <td> 8.4915</td> <td> 9.044</td> <td> 0.939</td> <td> 0.348</td> <td> -9.290</td> <td> 26.273</td>\n",
"</tr>\n",
"<tr>\n",
" <th>cylinders</th> <td> -0.5042</td> <td> 0.319</td> <td> -1.579</td> <td> 0.115</td> <td> -1.132</td> <td> 0.123</td>\n",
"</tr>\n",
"<tr>\n",
" <th>displacement</th> <td> 0.0157</td> <td> 0.008</td> <td> 2.081</td> <td> 0.038</td> <td> 0.001</td> <td> 0.030</td>\n",
"</tr>\n",
"<tr>\n",
" <th>horsepower</th> <td> -0.0140</td> <td> 0.014</td> <td> -1.025</td> <td> 0.306</td> <td> -0.041</td> <td> 0.013</td>\n",
"</tr>\n",
"<tr>\n",
" <th>weight</th> <td> -0.0064</td> <td> 0.001</td> <td> -9.851</td> <td> 0.000</td> <td> -0.008</td> <td> -0.005</td>\n",
"</tr>\n",
"<tr>\n",
" <th>acceleration</th> <td> 0.0918</td> <td> 0.098</td> <td> 0.941</td> <td> 0.348</td> <td> -0.100</td> <td> 0.284</td>\n",
"</tr>\n",
"<tr>\n",
" <th>year</th> <td> 0.4189</td> <td> 0.113</td> <td> 3.723</td> <td> 0.000</td> <td> 0.198</td> <td> 0.640</td>\n",
"</tr>\n",
"<tr>\n",
" <th>origin</th> <td> -14.0456</td> <td> 4.699</td> <td> -2.989</td> <td> 0.003</td> <td> -23.284</td> <td> -4.807</td>\n",
"</tr>\n",
"<tr>\n",
" <th>year:origin</th> <td> 0.1989</td> <td> 0.060</td> <td> 3.298</td> <td> 0.001</td> <td> 0.080</td> <td> 0.317</td>\n",
"</tr>\n",
"</table>\n",
"<table class=\"simpletable\">\n",
"<tr>\n",
" <th>Omnibus:</th> <td>31.636</td> <th> Durbin-Watson: </th> <td> 1.346</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Prob(Omnibus):</th> <td> 0.000</td> <th> Jarque-Bera (JB): </th> <td> 50.301</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Skew:</th> <td> 0.542</td> <th> Prob(JB): </th> <td>1.19e-11</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Kurtosis:</th> <td> 4.381</td> <th> Cond. No. </th> <td>1.87e+05</td>\n",
"</tr>\n",
"</table><br/><br/>Warnings:<br/>[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.<br/>[2] The condition number is large, 1.87e+05. This might indicate that there are<br/>strong multicollinearity or other numerical problems."
],
"text/plain": [
"<class 'statsmodels.iolib.summary.Summary'>\n",
"\"\"\"\n",
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: mpg R-squared: 0.826\n",
"Model: OLS Adj. R-squared: 0.823\n",
"Method: Least Squares F-statistic: 227.9\n",
"Date: Tue, 17 Jul 2018 Prob (F-statistic): 1.60e-140\n",
"Time: 08:25:16 Log-Likelihood: -1018.0\n",
"No. Observations: 392 AIC: 2054.\n",
"Df Residuals: 383 BIC: 2090.\n",
"Df Model: 8 \n",
"Covariance Type: nonrobust \n",
"================================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"--------------------------------------------------------------------------------\n",
"Intercept 8.4915 9.044 0.939 0.348 -9.290 26.273\n",
"cylinders -0.5042 0.319 -1.579 0.115 -1.132 0.123\n",
"displacement 0.0157 0.008 2.081 0.038 0.001 0.030\n",
"horsepower -0.0140 0.014 -1.025 0.306 -0.041 0.013\n",
"weight -0.0064 0.001 -9.851 0.000 -0.008 -0.005\n",
"acceleration 0.0918 0.098 0.941 0.348 -0.100 0.284\n",
"year 0.4189 0.113 3.723 0.000 0.198 0.640\n",
"origin -14.0456 4.699 -2.989 0.003 -23.284 -4.807\n",
"year:origin 0.1989 0.060 3.298 0.001 0.080 0.317\n",
"==============================================================================\n",
"Omnibus: 31.636 Durbin-Watson: 1.346\n",
"Prob(Omnibus): 0.000 Jarque-Bera (JB): 50.301\n",
"Skew: 0.542 Prob(JB): 1.19e-11\n",
"Kurtosis: 4.381 Cond. No. 1.87e+05\n",
"==============================================================================\n",
"\n",
"Warnings:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"[2] The condition number is large, 1.87e+05. This might indicate that there are\n",
"strong multicollinearity or other numerical problems.\n",
"\"\"\""
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"''' 3.7.9e: Including chosen interaction term: year and origin'''\n",
"\n",
"model_interac2 = smf.ols(formula='mpg ~ cylinders + displacement + horsepower + weight + acceleration + year * origin', data=df).fit() #important: because we use formula, we need smf instead of sm, and ols in lower case\n",
"model_interac2.summary()\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table class=\"simpletable\">\n",
"<caption>OLS Regression Results</caption>\n",
"<tr>\n",
" <th>Dep. Variable:</th> <td>mpg</td> <th> R-squared: </th> <td> 0.689</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Model:</th> <td>OLS</td> <th> Adj. R-squared: </th> <td> 0.687</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Method:</th> <td>Least Squares</td> <th> F-statistic: </th> <td> 435.6</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Date:</th> <td>Tue, 17 Jul 2018</td> <th> Prob (F-statistic):</th> <td>1.55e-100</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Time:</th> <td>08:25:16</td> <th> Log-Likelihood: </th> <td> -1148.0</td> \n",
"</tr>\n",
"<tr>\n",
" <th>No. Observations:</th> <td> 397</td> <th> AIC: </th> <td> 2302.</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Df Residuals:</th> <td> 394</td> <th> BIC: </th> <td> 2314.</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Df Model:</th> <td> 2</td> <th> </th> <td> </td> \n",
"</tr>\n",
"<tr>\n",
" <th>Covariance Type:</th> <td>nonrobust</td> <th> </th> <td> </td> \n",
"</tr>\n",
"</table>\n",
"<table class=\"simpletable\">\n",
"<tr>\n",
" <td></td> <th>coef</th> <th>std err</th> <th>t</th> <th>P>|t|</th> <th>[0.025</th> <th>0.975]</th> \n",
"</tr>\n",
"<tr>\n",
" <th>Intercept</th> <td> 42.2035</td> <td> 1.074</td> <td> 39.301</td> <td> 0.000</td> <td> 40.092</td> <td> 44.315</td>\n",
"</tr>\n",
"<tr>\n",
" <th>displacement</th> <td> -0.1393</td> <td> 0.011</td> <td> -12.539</td> <td> 0.000</td> <td> -0.161</td> <td> -0.117</td>\n",
"</tr>\n",
"<tr>\n",
" <th>I(displacement ** 2)</th> <td> 0.0002</td> <td> 2.36e-05</td> <td> 7.242</td> <td> 0.000</td> <td> 0.000</td> <td> 0.000</td>\n",
"</tr>\n",
"</table>\n",
"<table class=\"simpletable\">\n",
"<tr>\n",
" <th>Omnibus:</th> <td>42.434</td> <th> Durbin-Watson: </th> <td> 0.936</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Prob(Omnibus):</th> <td> 0.000</td> <th> Jarque-Bera (JB): </th> <td> 99.950</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Skew:</th> <td> 0.548</td> <th> Prob(JB): </th> <td>1.98e-22</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Kurtosis:</th> <td> 5.200</td> <th> Cond. No. </th> <td>3.36e+05</td>\n",
"</tr>\n",
"</table><br/><br/>Warnings:<br/>[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.<br/>[2] The condition number is large, 3.36e+05. This might indicate that there are<br/>strong multicollinearity or other numerical problems."
],
"text/plain": [
"<class 'statsmodels.iolib.summary.Summary'>\n",
"\"\"\"\n",
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: mpg R-squared: 0.689\n",
"Model: OLS Adj. R-squared: 0.687\n",
"Method: Least Squares F-statistic: 435.6\n",
"Date: Tue, 17 Jul 2018 Prob (F-statistic): 1.55e-100\n",
"Time: 08:25:16 Log-Likelihood: -1148.0\n",
"No. Observations: 397 AIC: 2302.\n",
"Df Residuals: 394 BIC: 2314.\n",
"Df Model: 2 \n",
"Covariance Type: nonrobust \n",
"========================================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"----------------------------------------------------------------------------------------\n",
"Intercept 42.2035 1.074 39.301 0.000 40.092 44.315\n",
"displacement -0.1393 0.011 -12.539 0.000 -0.161 -0.117\n",
"I(displacement ** 2) 0.0002 2.36e-05 7.242 0.000 0.000 0.000\n",
"==============================================================================\n",
"Omnibus: 42.434 Durbin-Watson: 0.936\n",
"Prob(Omnibus): 0.000 Jarque-Bera (JB): 99.950\n",
"Skew: 0.548 Prob(JB): 1.98e-22\n",
"Kurtosis: 5.200 Cond. No. 3.36e+05\n",
"==============================================================================\n",
"\n",
"Warnings:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"[2] The condition number is large, 3.36e+05. This might indicate that there are\n",
"strong multicollinearity or other numerical problems.\n",
"\"\"\""
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"''' 3.7.9f: Including nonlinear terms'''\n",
"\n",
"model_nonlin = smf.ols(formula='mpg ~ displacement + I(displacement**2)', data=df).fit() #important: because we use formula, we need smf instead of sm, and ols in lower case\n",
"model_nonlin.summary()"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Text(0,0.5,'mpg')"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Plot outputs\n",
"plt.scatter(df['displacement'],df['mpg'], color='blue')\n",
"y_hat = model_nonlin.fittedvalues\n",
"plt.scatter(df['displacement'], y_hat, color='red') \n",
"plt.xlabel('displacement')\n",
"plt.ylabel('mpg')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}