car <- read.table("CAR.txt",header=T,sep="\t") # Convert X and Y to matrices for future operations # (note that e.g. matrix multiplication not possible for data.frame objects) X <- as.matrix(car[,c(6,5)]) # X = (Price,Value) Y <- as.matrix(car[,c(3,4,7:10)]) # Y = (Economy,Service, Desing,Sport,Safety,Easy.h) XY <- cbind(X,Y) # (a) # Compute sample canonical vectors with corrected scaling R <- cov(XY) R11 <- R[1:2,1:2] # cov(X) R22 <- R[3:8,3:8] # cov(Y) R21 <- R[3:8,1:2] # cov(Y,X) R12 <- R[1:2,3:8] # cov(X,Y) R11.inv <- solve(R11) R22.inv <- solve(R22) #Non-zero eigenvalues of M1 and M2 are the same M1 <- R11.inv %*% R12 %*% R22.inv %*% R21 M2 <- R22.inv %*% R21 %*% R11.inv %*% R12 va1 <- eigen(M1)\$vectors[,1] va2 <- eigen(M1)\$vectors[,2] vb1 <- eigen(M2)\$vectors[,1] vb2 <- eigen(M2)\$vectors[,2] #Remove the "ghost" imaginary parts vb1=as.numeric(vb1) vb2=as.numeric(vb2) # Correct scaling. We scale with the standard deviation of the scores a1 <- va1/sqrt(va1%*%R11%*%va1) a2 <- va2/sqrt(va2%*%R11%*%va2) b1 <- vb1/sqrt(vb1%*%R22%*%vb1) b2 <- vb2/sqrt(vb2%*%R22%*%vb2) # Another approach (maybe more intuitive) to get the scaling right eta1 = X %*% va1 #Scores on the first canonical correlation variable related to X a1 = va1/sqrt(var(eta1)) var(eta1)-va1%*%R11%*%va1 # The condition for correct scaling is that the following are 1 a1 %*% R11 %*% a1 #Note that here R automatically transposes the first vector a2 %*% R11 %*% a2 b1 %*% R22 %*% b1 b2 %*% R22 %*% b2 # Note that the following are 0 a1 %*% R11 %*% a2 b1 %*% R22 %*% b2 # (b) # Score vectors: eta1 <- X%*%a1 eta2 <- X%*%a2 fii1 <- Y%*%b1 fii2 <- Y%*%b2 # sample canonical vectors scaled such that the score vectors have variance 1, # this scaling makes them unique up to sign var(eta1) var(eta2) var(fii1) var(fii2) # Matrix of the canonical correlations round(cor(cbind(eta1,eta2,fii1,fii2)),2) #canonical correlations # r1 = 0.98 # r2 = |-0.91| = 0.91 # The relationship between both pairs seems to be quite strong. # Note that the canonical correlations are also the square roots of the non-zero # eigenvalues of M1,M2 round(sqrt(eigen(M1)\$values),2) # (c) # Value = "Loss of value" = "How fast the value goes down" # Note that u1,v1 are unique up to sign! # u1 <- 0.32Price - 0.62Value # v1 <- 0.43Economy - 0.21 Service +0Desing - 0.47Sport - 0.22Safety # -0.4 Easy.h # Recall: 1 = very good, 6 = very bad # Here the interpretations are a bit tedious, since high values correspond to very bad and # low values correspond to very good. # Variables Price and Value have opposite signs. # --> Variables Economy, Service, Desing, Sport, Safety and Easy Handling have the #opposite relation to them plot(eta1,fii1,xlab="'Value index' of the car", ylab="'Quality' of the car",pch="") text(eta1,fii1,labels=paste(car\$Type,car\$Model)) # In the plot, x-axis: # The most left point would be a car with Value=6 (very bad) and Price = 1 (very good) # --> On the left we have cheap cars that lose value fast (e.g. Wartburg, Trabant) # The most right point would be a car with Value=1 (very good) and Price = 6 (very bad) # --> On the right we have cars that are expensive but lose value slowly (e.g. BMW, Jaguar, Ferrari) # We interpet u1 as the value index of the car, such that high scores on the x-axis indicate that # the car is a "worthy" investment. Note also, that Value has almost twice as much weight in the scores # here (when compared to Price). # 6 = very bad, 1 = very good # v1 <- 0.43Economy - 0.21 Service +0Desing - 0.47Sport - 0.22Safety - 0.4 Easy.h #y-axis: # Here, Economy, Sport and Easy handling have the largest weight in this component, # desing has negligible effect on the scores. # The uppermost points have very bad Economy and very good Service, Sport, Safety and Easy.h # --> The uppermost cars use a lot of fuel but the other qualities are good # (e.g. BMW, Ferrari) # The bottommost points have very good economy but the other qualities are very bad # --> The bottommost cars are economical but other qualities are bad. # v1 can be interpreted as a quality index of the car, such that you get a sporty car that is easy # to drive in the expense of economy. # u2 <- - 1.41Price - 1.42 Value # v2 <- 0.46Economy +0.70Service - 0.06Desing + 0.00Sport - 0.30Safety +1.01Easy.h # Interpret the second component similarly, might be harder (but not impossible) # since e.g. Ferrari and Wartburg have # similar profiles. plot(eta2,fii2,xlab="u2", ylab="v2",pch="") text(eta2,fii2,labels=car\$Type)