car <- read.table("data/car.txt",header=T,sep="\t")
# Convert X and Y to matrices for future operations
# (note that e.g. matrix multiplication not possible for data.frame objects)
X <- as.matrix(car[,c(6,5)]) # X = (Price,Value)
Y <- as.matrix(car[,c(3,4,7:10)]) # Y = (Economy,Service, Desing,Sport,Safety,Easy.h)
XY <- cbind(X,Y)
# (a)
# Compute sample canonical vectors with corrected scaling
R <- cov(XY)
R11 <- R[1:2,1:2] # cov(X)
R22 <- R[3:8,3:8] # cov(Y)
R21 <- R[3:8,1:2] # cov(Y,X)
R12 <- R[1:2,3:8] # cov(X,Y)
R11.inv <- solve(R11)
R22.inv <- solve(R22)
#Non-zero eigenvalues of M1 and M2 are the same
M1 <- R11.inv %*% R12 %*% R22.inv %*% R21
M2 <- R22.inv %*% R21 %*% R11.inv %*% R12
va1 <- eigen(M1)$vectors[,1]
va2 <- eigen(M1)$vectors[,2]
vb1 <- eigen(M2)$vectors[,1]
vb2 <- eigen(M2)$vectors[,2]
#Remove the "ghost" imaginary parts
vb1=as.numeric(vb1)
vb2=as.numeric(vb2)
# Correct scaling. We scale with the standard deviation of the scores
a1 <- va1/sqrt(va1%*%R11%*%va1)
a2 <- va2/sqrt(va2%*%R11%*%va2)
b1 <- vb1/sqrt(vb1%*%R22%*%vb1)
b2 <- vb2/sqrt(vb2%*%R22%*%vb2)
# Another approach (maybe more intuitive) to get the scaling right
eta1 = X %*% va1 #Scores on the first canonical correlation variable related to X
a1 = va1/sqrt(var(eta1))
var(eta1)-va1%*%R11%*%va1
# The condition for correct scaling is that the following are 1
a1 %*% R11 %*% a1 #Note that here R automatically transposes the first vector
a2 %*% R11 %*% a2
b1 %*% R22 %*% b1
b2 %*% R22 %*% b2
# Note that the following are 0
a1 %*% R11 %*% a2
b1 %*% R22 %*% b2
# (b)
# Score vectors:
eta1 <- X%*%a1
eta2 <- X%*%a2
fii1 <- Y%*%b1
fii2 <- Y%*%b2
# sample canonical vectors scaled such that the score vectors have variance 1,
# this scaling makes them unique up to sign
var(eta1)
var(eta2)
var(fii1)
var(fii2)
# Matrix of the canonical correlations
round(cor(cbind(eta1,eta2,fii1,fii2)),2)
#canonical correlations
# r1 = 0.98
# r2 = |-0.91| = 0.91
# The relationship between both pairs seems to be quite strong.
# Note that the canonical correlations are also the square roots of the non-zero
# eigenvalues of M1,M2
round(sqrt(eigen(M1)$values),2)
# (c)
# Value = "Loss of value" = "How fast the value goes down"
# Note that u1,v1 are unique up to sign!
# a1 <- 0.32Price - 0.62Value
# b1 <- 0.43Economy - 0.21 Service +0Desing - 0.47Sport - 0.22Safety
# -0.4 Easy.h
# Recall: 1 = very good, 6 = very bad
# Here the interpretations are a bit tedious, since high values correspond to very bad and
# low values correspond to very good.
# Variables Price and Value have opposite signs.
# --> Variables Economy, Service, Desing, Sport, Safety and Easy Handling have the
#opposite relation to them
plot(eta1,fii1,xlab="'Value index' of the car",
ylab="'Quality' of the car",pch="")
text(eta1,fii1,labels=paste(car$Type,car$Model))
# In the plot, x-axis:
# The most left point would be a car with Value=6 (very bad) and Price = 1 (very good)
# --> On the left we have cheap cars that lose value fast (e.g. Wartburg, Trabant)
# The most right point would be a car with Value=1 (very good) and Price = 6 (very bad)
# --> On the right we have cars that are expensive but lose value slowly (e.g. BMW, Jaguar, Ferrari)
# We interpet u1 as the value index of the car, such that high scores on the x-axis indicate that
# the car is a "worthy" investment. Note also, that Value has almost twice as much weight in the scores
# here (when compared to Price).
# 6 = very bad, 1 = very good
# v1 <- 0.43Economy - 0.21 Service +0Desing - 0.47Sport - 0.22Safety - 0.4 Easy.h
#y-axis:
# Here, Economy, Sport and Easy handling have the largest weight in this component,
# desing has negligible effect on the scores.
# The uppermost points have very bad Economy and very good Service, Sport, Safety and Easy.h
# --> The uppermost cars use a lot of fuel but the other qualities are good
# (e.g. BMW, Ferrari)
# The bottommost points have very good economy but the other qualities are very bad
# --> The bottommost cars are economical but other qualities are bad.
# v1 can be interpreted as a quality index of the car, such that you get a sporty car that is easy
# to drive in the expense of economy.
# a2 <- - 1.41Price - 1.42 Value
# b2 <- 0.46Economy +0.70Service - 0.06Desing + 0.00Sport - 0.30Safety +1.01Easy.h
# Interpret the second component similarly, might be harder (but not impossible)
# since e.g. Ferrari and Wartburg have
# similar profiles.
plot(eta2,fii2,xlab="u2",
ylab="v2",pch="")
text(eta2,fii2,labels=car$Type)