gap07 %>% ggplot( aes(x = gdpPercap, y = lifeExp, col = continent)) + geom_point()
Goal is to predict/affect value of \(Y\), based on knowing/manipulating value of \(X\)
\[= \{(y_1,x_1),\ldots,(y_n,x_n) \}\]
gap07Europe %>% ggplot( aes(x = gdpPercap, y = lifeExp )) + geom_point(size = 2) + geom_smooth( method = "lm")
lm()
fits linear model
lm_out = gap07Europe %>% lm( lifeExp ~ gdpPercap, data = . ) summary(lm_out) ## ## Call: ## lm(formula = lifeExp ~ gdpPercap, data = .) ## ## Residuals: ## Min 1Q Median 3Q Max ## -2.79839 -1.30472 0.00807 1.33443 2.87766 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 7.227e+01 6.942e-01 104.113 < 2e-16 *** ## gdpPercap 2.146e-04 2.514e-05 8.537 2.8e-09 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 1.598 on 28 degrees of freedom ## Multiple R-squared: 0.7225, Adjusted R-squared: 0.7125 ## F-statistic: 72.88 on 1 and 28 DF, p-value: 2.795e-09
broom
package converts model info into tidy data-frames
tidy()
for parameters valuesglance()
for model statisticsaugment()
for fitted/residual values to original datalibrary(broom) tidy(lm_out) ## # A tibble: 2 x 5 ## term estimate std.error statistic p.value ## <chr> <dbl> <dbl> <dbl> <dbl> ## 1 (Intercept) 72.3 0.694 104. 8.50e-38 ## 2 gdpPercap 0.000215 0.0000251 8.54 2.80e- 9 augment(lm_out) ## # A tibble: 30 x 9 ## lifeExp gdpPercap .fitted .se.fit .resid .hat .sigma .cooksd ## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> ## 1 76.4 5937. 73.5 0.562 2.88 0.124 1.52 2.62e-1 ## 2 79.8 36126. 80.0 0.403 -0.196 0.0637 1.63 5.47e-4 ## 3 79.4 33693. 79.5 0.364 -0.0616 0.0518 1.63 4.29e-5 ## 4 74.9 7446. 73.9 0.530 0.983 0.110 1.61 2.63e-2 ## 5 73.0 10681. 74.6 0.464 -1.56 0.0845 1.60 4.80e-2 ## 6 75.7 14619. 75.4 0.392 0.339 0.0603 1.63 1.54e-3 ## 7 76.5 22833. 77.2 0.297 -0.686 0.0346 1.62 3.42e-3 ## 8 78.3 35278. 79.8 0.389 -1.51 0.0592 1.60 2.99e-2 ## 9 79.3 33207. 79.4 0.357 -0.0854 0.0498 1.63 7.88e-5 ## 10 80.7 30470. 78.8 0.322 1.85 0.0406 1.59 2.94e-2 ## # ... with 20 more rows, and 1 more variable: .std.resid <dbl>
add_predictions()
in modelr
packagelibrary(modelr) gap_pred = tibble( gdpPercap = (1:4)*10000 ) %>% add_predictions( lm_out , var = "lifeExp") gap_pred ## # A tibble: 4 x 2 ## gdpPercap lifeExp ## <dbl> <dbl> ## 1 10000 74.4 ## 2 20000 76.6 ## 3 30000 78.7 ## 4 40000 80.9
\[ R^2=1 - \frac{MSE(\textrm{model})}{MSE(\textrm{no rel.})} \]