data <- read.table("data/decathlon.txt",header=T,row.names = 1)
#View(DEC)
# install.packages("corrgram")
DEC <- data[,-c(1,12,13)]
library(corrgram)
corrgram(DEC)
cor(DEC)[,7]
#Try tuning
corrgram(DEC, panel=panel.pie, diag.panel = panel.minmax, col.regions = colorRampPalette(c("blue4", "blue3", "blue2", "blue1", "blue", "red", "red1", "red2", "red3", "red4")))
DEC.PCA <- princomp(DEC,cor=TRUE)
#(a)
summary(DEC.PCA)
#Like last week
#(b) Choose 4 components since hard to find interpretations for more.
# --> explains 69% variation which is usually very good for real data.
round(DEC.PCA$loadings[,1:4],2)
#note that the signs can always be reversed
#Comp 1:
# high negative loadings with shot put, discuss throw and high jump
# high positive loadings with R1500 and R400m
# --> Strength again
# Comp 2:
# high negative: R100, Hurdles, Long Jump
# high positive: R1500m
# --> "Explosive" Speed
# such that "endurance" is on the other side
# of the scale
#Comp 3: Special techniques. Something to do with the supporting leg?
#Comp 4: Acrobatics?
plot(DEC.PCA$scores[,1], DEC.PCA$scores[,2])
s <- 3
DEC[49,] <- c(rep(1200,s),rep(800,10-s))
rownames(DEC)[49] <- "outlier"
plot(DEC)
DEC.PCA <- princomp(DEC,cor=T)
scores <- DEC.PCA$scores
# Since the largest variation on the first direction,
# the outlier can easily be detected here
plot(scores[,1],scores[,2],pch=16)
plot(scores[,3], scores[,4], pch=16)