# 2.1 # (a) # The data is separated with tabulator (you can verify this e.g. with Notepad) smoking <- read.table("tobacco.txt",header=T,sep="\t") model <- lm(ILL~CONSUMPTION,data=smoking) countries <- c("Iceland","Norway","Sweden","Canada","Denmark", "Austria","USA","Netherlands","Switzerland","Finland","England") # pch sets the marker plot(smoking$CONSUMPTION,smoking$ILL, ylab="cases in 1950", xlab="CONSUMPTION in 1930", pch=16, main="CONSUMPTION/ILL per 100 000 individuals") abline(model,col="red") # The arguments below: x-cord, y-cordd, labels = the shown labels, # cex = size of the labels, pos=position (1,2,3 or 4) # if the labels are unclear, sunbstitute with labels=1:11 text(smoking$CONSUMPTION, smoking$ILL, labels=countries, cex= 0.8,pos=3) # pch sets the marker plot(smoking$CONSUMPTION,smoking$ILL, ylab="cases in 1950", xlab="CONSUMPTION in 1930", pch=16, main="CONSUMPTION/ILL per 100 000 individuals") abline(model,col="red", lwd=0.8) # identify provides a convenient way to identify data points by clicking. # Press esc when you are done identify(smoking$CONSUMPTION,smoking$ILL,labels=countries) # (b) FIT <- model$fit RES <- model$res # (c) plot(smoking$ILL,FIT, ylab="Fits",xlab="Sick",pch=16) # A way to show only the label of USA (observation number 7) text(smoking$ILL,FIT, labels = ifelse(rownames(smoking)=="7", "USA", NA),pos=2) plot(FIT,RES, xlab="Fits",ylab="Residuals",pch=16) abline(a=0,b=0,col="grey",lwd=1) text(FIT,RES, labels = ifelse(rownames(smoking)=="7", countries, NA),pos=3) # (d) The outlierness of USA is visible especially in (FIT,RES) plot # (e) cooksd <- cooks.distance(model) # parameter xaxt ="n" leaves x-axis empty x <-plot(cooksd,xaxt="n",xlab=" ",ylab="Cook's distances") # parameter side = 1 refers to the x-axis, # parameter at gives the points on the x-axis # las=2 rotates the text 90 degrees axis(side=1,at=1:11, labels=countries,las=2) # (f) # Remove the line 7, i.e. USA smoking2 <- smoking[-7,] model2 <- lm(ILL~CONSUMPTION,data=smoking2) summary(model2) # Compare to Homework 1. # 2 # a # run if you have not installed car before # install.packages("car") library(car) hald <- read.table("hald.txt",header=T,sep="\t") fullmodel=lm(HEAT~CHEM1+CHEM2+CHEM3+CHEM4,data=hald) summary(fullmodel) vif(fullmodel) model2 <- lm(CHEM2 ~ CHEM1+CHEM3+CHEM4,data=hald) summary(model2) 1/(1-summary(model2)$r.squared) model3 <- lm(CHEM4 ~ CHEM1 + CHEM2 + CHEM3, data=hald) summary(model3) 1/(1-summary(model3)$r.squared) # b # Note that, we want to minimize the value in the AIC column step(fullmodel) model4 <- lm(HEAT ~ CHEM1 + CHEM2 +CHEM4 , data=hald) summary(model4) b = seq(from=-4, to=4, length.out=9) hist(fullmodel$residuals,col="grey",xlab=" ",main="Residuals of the full model",breaks=b) #