tea <- read.table("TEA.txt",header=T,sep="\t")
dim(tea)
View(tea)
library(ca)
library(ggplot2)
help(mjca)
tea.mca <- mjca(tea,lambda="indicator",reti = T)
# Setting lambda=indicator, the mca is performed like it is presented
# in the lecture slides
# you can use "nd=" here as in the last week
# Many of the objects here are exactly the same as last week,
# check the file R6.2.R from MyCourses
names(tea.mca)
tea.mca$factors
tea.mca$levels.n
sum(tea.mca$levels.n)
tea.mca$sv^2
# Note that only 29% of the variation is explained by the first 2
# components. However, we still proceed to analyze the first two components.
(tea.mca$sv[1]^2 + tea.mca$sv[2]^2) / sum(tea.mca$sv^2)
summary(tea.mca)
plot(tea.mca, arrows=c(T,T))
#Points allows us to add the observations to the plot
points(tea.mca$rowcoord) #Use text() like in previous exercises if you want to label the points
# Remember the angles are important:
# Between different categories:
# less than 90 degrees = attaction
# more than 90 degrees = repulsion
# 90 degrees = independent
# Among the same category:
# less than 90 degrees = similar profile
# more than 90 degrees = profile differs
# Mass: mass (weight) of the point
# QLT: quality of representation of the variable:category
# INR: The inertia of the point
# k=1: Principal coordinates of the first dimension
# COR: The relative contribution of the principal axis to the point inertia
# CTR: The absolute contribution of the point to the inertia of the axis
S <- summary(tea.mca)
barplot(data.frame(S$scree)[,3], ylim = c(0,20), names.arg = paste('PC',1:11), main = 'Screeplot', las = 2, xlab = 'Component', ylab = '% of variation explained');box()
S$scree
data.frame(S$scree)[,3]
plot(tea.mca, arrows = c(T,T))
plot(ca(tea.mca$indmat),
arrows = c(F,T), map = 'symmetric', what = c('none','all'))
# Obtain the principal coordinates scaled with the corresponding eigen value:
tea.covariates <- sweep(tea.mca$colcoord[,1:2],2, tea.mca$sv[1:2],'*')
# Make a normalization function, i.e., to scale values from 0 to 1 (this is for visulization purposes):
normalize <- function(x) {
(x - min(x)) / (max(x) - min(x))
}
# Save the mjca summary object:
a <- summary(tea.mca)
# Generate the scatter plot. Note how the point size is now scaled according to quality of representation:
plot(tea.covariates, xlim = c(-2.5,1), ylim = c(-1.5,2.5), pch = 16, col = 2,
cex = normalize(a$columns$` qlt`) + 1)
# Add arrows. Slight transparency is added to increase visibility.
arrows(rep(0,17), rep(0,17), tea.covariates[,1], tea.covariates[,2],
length = 0.05, col = rgb(1,0,0,0.25))
# "Cross-hair" is added, i.e., dotted lines crossing x and y axis at 0.
abline(h = 0, v = 0, lty = 3)
# Add variable:category names to the plot.
text(tea.covariates, tea.mca$levelnames, pos = 2, cex = 0.75)
n_pl <- unlist(apply(tea,2,table))
x_ipl<-tea.mca$indmat
n <- nrow(tea)
P <- ncol(tea)
K <- length(n_pl)
T_mat <- (x_ipl - matrix(rep(n_pl,n), ncol = 17, byrow = T)/n) /
matrix(rep(sqrt(P*n_pl),n), ncol = 17, byrow = T)
V_mat <- t(T_mat) %*% T_mat
# Trace(V) = K/P - 1 = sum(T^2)
sum(diag(V_mat))