Set the working folder to “Chunk2” in the short course material

Load the r formatted version of the AudiA4 search from cars.com

# files in working folder
dir()
## [1] "AudiA4.rda"         "inClass.html"       "inClass.pdf"       
## [4] "inClass.Rmd"        "inClassChunk2.html" "inClassChunk2.Rmd" 
## [7] "WorldBankCO2.rda"
load("AudiA4.rda")
# data sets in workspace
ls()
## [1] "AudiA4"

This data set is in the form of a data frame that allows separate columns to be either numerical, character, or more exotic types.

head( AudiA4)
##   year price mileage distance engine                color
## 1 2015 42992    4908      353   2.0T                 Gray
## 2 2015 42991    5716      353   2.0T               Silver
## 3 2014 42000    5640      491   2.0T  Ice Silver Metallic
## 4 2014 42000    7161      491   2.0T Dakota Gray Metallic
## 5 2015 40995    1415      489   2.0T      Brilliant Black
## 6 2014 39500   10362       40   2.0T                 Gray
# I am a lazt typist and like to think in thousands ...
mileage<- AudiA4$mileage/1000
price<-  AudiA4$price/1000
year<- AudiA4$year
oldCars<- year < 2010
plot( mileage, price, col=(oldCars + 1), pch=16)

Fitting a line to these data

fit0<- lm( price~mileage)
summary(fit0)
## 
## Call:
## lm(formula = price ~ mileage)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.4540  -2.6258   0.1576   3.1038  16.4742 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 34.183934   0.396794   86.15   <2e-16 ***
## mileage     -0.212263   0.005223  -40.64   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.079 on 361 degrees of freedom
## Multiple R-squared:  0.8206, Adjusted R-squared:  0.8201 
## F-statistic:  1652 on 1 and 361 DF,  p-value: < 2.2e-16

Add the line to the scatterplot

plot( mileage, price)
intercept<- fit0$coefficients[1] # the "a"
slope<- fit0$coefficients[2] # the "b"
abline(intercept, slope, col="orange3", lwd=3 )