# setwd() data <- read.table("data/sciencedoctorates.txt",header=T,row.names=1) dim(data) S <- data[-13,-9] #remove the total col/row View(S) class(S) D <- S D <- as.matrix(S) # prob table wants a matrix # A <- as.matrix(A) prop.table(D) # table of relative frequencies prop.table(D,1) #row profiles prop.table(D,2) #col profiles rowSums(prop.table(D,1)) colSums(prop.table(D,2)) margin.table(D,1) data[,9] margin.table(D,2) data[13,] margin.table(D) sum(D) v1 <- margin.table(D,1) v2 <- margin.table(D,2) V1 <- matrix(v1,ncol=1) V2 <- matrix(v2,nrow=1) E <- V1 %*% V2 /sum(D) AR.matrix <- D/E #D = original data (number of observations), #E = expected number of observations under independence # Values near 1: The year and science are independent # Values < 1: The science is less frequent in that specific year # Values > 1: The science is more frequent in that specific year View(round(AR.matrix,2)) # A nice way to represent the AR-matrix: library(ggplot2) library(reshape) melted <- melt(AR.matrix) View(melted) range(melted$value) ggplot(melted, aes(x=X1, y=X2, fill=value)) + geom_tile() + scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = median(melted$value), limit = c(0.5, 1.55), name="AR value")