tea <- read.table("TEA.txt",header=T,sep="\t") dim(tea) View(tea) library(ca) library(ggplot2) help(mjca) tea.mca <- mjca(tea,lambda="indicator",reti = T) # Setting lambda=indicator, the mca is performed like it is presented # in the lecture slides # you can use "nd=" here as in the last week # Many of the objects here are exactly the same as last week, # check the file R6.2.R from MyCourses names(tea.mca) tea.mca\$factors tea.mca\$levels.n sum(tea.mca\$levels.n) tea.mca\$sv^2 # Note that only 29% of the variation is explained by the first 2 # components. However, we still proceed to analyze the first two components. (tea.mca\$sv[1]^2 + tea.mca\$sv[2]^2) / sum(tea.mca\$sv^2) summary(tea.mca) plot(tea.mca, arrows=c(T,T)) #Points allows us to add the observations to the plot points(tea.mca\$rowcoord) #Use text() like in previous exercises if you want to label the points # Remember the angles are important: # Between different categories: # less than 90 degrees = attaction # more than 90 degrees = repulsion # 90 degrees = independent # Among the same category: # less than 90 degrees = similar profile # more than 90 degrees = profile differs # Mass: mass (weight) of the point # QLT: quality of representation of the variable:category # INR: The inertia of the point # k=1: Principal coordinates of the first dimension # COR: The relative contribution of the principal axis to the point inertia # CTR: The absolute contribution of the point to the inertia of the axis S <- summary(tea.mca) barplot(data.frame(S\$scree)[,3], ylim = c(0,20), names.arg = paste('PC',1:11), main = 'Screeplot', las = 2, xlab = 'Component', ylab = '% of variation explained');box() S\$scree data.frame(S\$scree)[,3] plot(tea.mca, arrows = c(T,T)) plot(ca(tea.mca\$indmat), arrows = c(F,T), map = 'symmetric', what = c('none','all')) # Obtain the principal coordinates scaled with the corresponding eigen value: tea.covariates <- sweep(tea.mca\$colcoord[,1:2],2, tea.mca\$sv[1:2],'*') # Make a normalization function, i.e., to scale values from 0 to 1 (this is for visulization purposes): normalize <- function(x) { (x - min(x)) / (max(x) - min(x)) } # Save the mjca summary object: a <- summary(tea.mca) # Generate the scatter plot. Note how the point size is now scaled according to quality of representation: plot(tea.covariates, xlim = c(-2.5,1), ylim = c(-1.5,2.5), pch = 16, col = 2, cex = normalize(a\$columns\$` qlt`) + 1) # Add arrows. Slight transparency is added to increase visibility. arrows(rep(0,17), rep(0,17), tea.covariates[,1], tea.covariates[,2], length = 0.05, col = rgb(1,0,0,0.25)) # "Cross-hair" is added, i.e., dotted lines crossing x and y axis at 0. abline(h = 0, v = 0, lty = 3) # Add variable:category names to the plot. text(tea.covariates, tea.mca\$levelnames, pos = 2, cex = 0.75) n_pl <- unlist(apply(tea,2,table)) x_ipl<-tea.mca\$indmat n <- nrow(tea) P <- ncol(tea) K <- length(n_pl) T_mat <- (x_ipl - matrix(rep(n_pl,n), ncol = 17, byrow = T)/n) / matrix(rep(sqrt(P*n_pl),n), ncol = 17, byrow = T) V_mat <- t(T_mat) %*% T_mat # Trace(V) = K/P - 1 = sum(T^2) sum(diag(V_mat))