###########################
# MS-C1620
# Statistical inference
# Lecture 10
# ANOVA


library(ggplot2)

# We use the significance level 0.05 throughout the script



########################################
# Analysis problems to solve with ANOVA 


# Chicken weights under different types of feed
?chickwts
head(chickwts)
boxplot(weight ~ feed, data = chickwts)


# Plant yield under different conditions
?PlantGrowth
head(PlantGrowth)
boxplot(weight ~ group, data = PlantGrowth)


# Insect spray effectiveness
?InsectSprays
head(InsectSprays)
boxplot(count ~ spray, data = InsectSprays)





########################################
# Variation between groups vs. within groups


##### Typical case of large SSG (between-group variance) and small SSE (within-group variance)
# == The group expected values are different
n <- 50
set.seed(12345)    # for same results on every run
x <- rnorm(n, -5, 1)
y <- rnorm(n, 0, 1)
z <- rnorm(n, 5, 1)
boxplot(x, y, z)

# SSG
gm <- mean(c(x, y, z))
ssg <- n*(mean(x) - gm)^2 + n*(mean(y) - gm)^2 + n*(mean(z) - gm)^2
gm

# SSE
sse <- sum((x - mean(x))^2) + sum((y - mean(y))^2) + sum((z - mean(z))^2)

ssg
sse
ssg/sse




##### Typical case of small SSG (between-group variance) and large SSE (within-group variance)
# == The group expected values are the same
n <- 50
set.seed(12345)    # for same results on every run
x <- rnorm(n, 0, 1)
y <- rnorm(n, 0, 1)
z <- rnorm(n, 0, 1)
boxplot(x, y, z)

# SSG
gm <- mean(c(x, y, z))
ssg <- n*(mean(x) - gm)^2 + n*(mean(y) - gm)^2 + n*(mean(z) - gm)^2
gm

# SSE
sse <- sum((x - mean(x))^2) + sum((y - mean(y))^2) + sum((z - mean(z))^2)

ssg
sse
ssg/sse


##### Borderline case
n <- 50
set.seed(12345)    # for same results on every run
x <- rnorm(n, 0, 1)
y <- rnorm(n, 1, 1)
z <- rnorm(n, -1, 1)
boxplot(x, y, z)

# SSG
gm <- mean(c(x, y, z))
ssg <- n*(mean(x) - gm)^2 + n*(mean(y) - gm)^2 + n*(mean(z) - gm)^2

# SSE
sse <- sum((x - mean(x))^2) + sum((y - mean(y))^2) + sum((z - mean(z))^2)

ssg
sse
ssg/sse

# Is SSG/SSE "large enough" for us to conclude that it is not plausible
# that the groups have equal expected values
# -> this is what ANOVA will tell us.





######################################
# ANOVA on the data sets


##### Chicken weights
chicken_aov <- aov(weight ~ feed, data = chickwts)

chicken_aov
# Now, SSG = 231129.2 and SSE = 195556.0

summary(chicken_aov)
# F-statistic value = 15.37 and p-value = 6e-10 for
# H0: the expected values of the groups are the same

# We conclude that not all feeds are equally effective in increasing the chickens' weights




##### Plant growth
plant_aov <- aov(weight ~ group, data = PlantGrowth)

plant_aov
summary(plant_aov)
# p-value = 0.0159 < 0.05 => reject H0

# We conclude that not all three conditions are equally effective in increasing plant yield




##### Insect sprays
insect_aov <- aov(count ~ spray, data = InsectSprays)

insect_aov
summary(insect_aov)
# p-value = <2e-16 => reject H0

# We conclude that not all six sprays are equally effective in repelling insects






######################################################
# Pair-wise testing



######
# Illustration of inflated Type I error rate under multiple testing


# We conduct t-tests with the null hypothesis H0: mu = 0 for multiple data sets of size 20
# where H0 is true.

# We use significance level 0.05 -> probability of a Type I error in an individual test = 0.05



t_test <- function(){
  x <- rnorm(20, 0, 1)
  res <- t.test(x, alternative = "two.sided", mu = 0)
  c(res$statistic, res$p.value)
}

t19 <- function(x) dt(x, df = 19)
curve(t19, from = -5, to = 5, lwd = 2, lty = 3)
abline(h = 0, lwd = 1)
q_lower <- qt(0.025, df = 19)
segments(q_lower, 0, q_lower, t19(q_lower), col = 2, lwd = 2)
segments(-q_lower, 0, -q_lower, t19(-q_lower), col = 2, lwd = 2)


# We conduct a single test
res_1 <- t_test()
abline(v = res_1[1])
# Probability of wrong conclusion is 0.05




# We conduct 100 tests
res_100 <- replicate(100, t_test())
abline(v = res_100[1, ])
# Probability of AT LEAST ONE wrong conclusion is >> 0.05 




# Bonferroni correction: we decrease the significance level by dividing it
# by the number of made tests
# (increase the required amount of evidence)
curve(t19, from = -5, to = 5, lwd = 2, lty = 3)
abline(h = 0, lwd = 1)
q_lower <- qt(0.025/100, df = 19)
segments(q_lower, 0, q_lower, t19(q_lower) + 0.1, col = 2, lwd = 2)
segments(-q_lower, 0, -q_lower, t19(-q_lower) + 0.1, col = 2, lwd = 2)

abline(v = res_100[1, ])
# Probability of AT LEAST ONE wrong conclusion is now <= 0.05 


# The Bonferroni correction is equivalent to multiplying each individual p-value by
# the number of tests (and using the original significance level)
# (this interpretation loses the interpretation of p-values of probabilities and thinks of
# them simply as the amount of found evidence)


# Original p-values and the number of significant ones among them
round(res_100[2, ], 3)
sum(res_100[2, ] < 0.05)


# Bonferroni-corrected p-values
round(res_100[2, ]*100, 3)
sum(res_100[2, ]*100 < 0.05)




################################################
# Pair-wise testing on the example data sets


##### Chicken weights
boxplot(weight ~ feed, data = chickwts)

# Original p-values
pairwise.t.test(chickwts$weight, chickwts$feed, p.adjust.method = "none")
# 12 out of 15 pairs differ significantly


# Bonferroni-corrected p-values
pairwise.t.test(chickwts$weight, chickwts$feed, p.adjust.method = "bonferroni")
# 8 out of 15 pairs differ significantly (4 of the 12 didn't carry enough evidence
# that we could consider them different in the light of the multiple testing problem.)



##### Plant yield under different conditions
boxplot(weight ~ group, data = PlantGrowth)


# Original p-values
pairwise.t.test(PlantGrowth$weight, PlantGrowth$group, p.adjust.method = "none")


# Bonferroni-corrected p-values
pairwise.t.test(PlantGrowth$weight, PlantGrowth$group, p.adjust.method = "bonferroni")
# Only differing pair is (trt2, trt1)



##### Insect spray effectiveness
boxplot(count ~ spray, data = InsectSprays)


# Original p-values
pairwise.t.test(InsectSprays$count, InsectSprays$spray, p.adjust.method = "none")
# 9 out of 15 pairs differ


# Bonferroni-corrected p-values
pairwise.t.test(InsectSprays$count, InsectSprays$spray, p.adjust.method = "bonferroni")
# 9 out of 15 pairs differ





############################################################
# Checking assumptions of ANOVA


##### Chicken weights
boxplot(weight ~ feed, data = chickwts)

ggplot(chickwts, aes(x = weight)) +
  geom_histogram(bins = 5) +
  facet_wrap(~ feed, nrow = 3)

# Small group size make normality checking difficult
# -> could be that the individual groups are normal?


# Variance assumption
bartlett.test(weight ~ feed, data = chickwts)
# No evidence against the null hypothesis of equal variances




##### Plant yield under different conditions
boxplot(weight ~ group, data = PlantGrowth)

ggplot(PlantGrowth, aes(x = weight)) +
  geom_histogram(bins = 5) +
  facet_wrap(~ group, nrow = 3)

# Same problems as for the chicken data with regards to checking normality


# Variance assumption
bartlett.test(weight ~ group, data = PlantGrowth)
# No evidence against the null hypothesis of equal variances



##### Insect spray effectiveness
boxplot(count ~ spray, data = InsectSprays)

ggplot(InsectSprays, aes(x = count)) +
  geom_histogram(bins = 5) +
  facet_wrap(~ spray, nrow = 3)

# Same problems as for the chicken data with regards to checking normality


# Variance assumption
bartlett.test(count ~ spray, data = InsectSprays)
# Strong evidence against H0 -> the variances are not equal



##################
# Optional "correction" for the insect spray data:
# Taking a square root of the counts "stabilizes" the variances

InsectSprays$count2 <- sqrt(InsectSprays$count)

boxplot(count2 ~ spray, data = InsectSprays)
ggplot(InsectSprays, aes(x = count2)) +
  geom_histogram(bins = 5) +
  facet_wrap(~ spray, nrow = 3)
bartlett.test(count2 ~ spray, data = InsectSprays)
insect_aov <- aov(count2 ~ spray, data = InsectSprays)
summary(insect_aov)
pairwise.t.test(InsectSprays$count2, InsectSprays$spray, p.adjust.method = "bonferroni")
# 10 out of 15 pairs differ






#####################################################
# Connection between linear regression and ANOVA

# We use the plant data
?PlantGrowth
boxplot(weight ~ group, data = PlantGrowth)

plant_aov <- aov(weight ~ group, data = PlantGrowth)
plant_aov
summary(plant_aov)

plant_reg <- lm(weight ~ group, data = PlantGrowth)
plant_reg
summary(plant_reg)

# The ANOVA p-value is equal to the F-statistic p-value of linear regression


# or the chicken weights data
head(chickwts)
boxplot(weight ~ feed, data = chickwts)

chick_aov <- aov(weight ~ feed, data = chickwts)
chick_aov
summary(chick_aov)

chick_reg <- lm(weight ~ feed, data = chickwts)
chick_reg
summary(chick_reg)

# The ANOVA p-value is equal to the F-statistic p-value of linear regression



# ANOVA-like effects can be added to linear regression to obtain "ANCOVA", analysis
# of covariance, where both continuous and grouping variables are allowed.


library(ggplot2)

# Car data
head(mpg)


# We predict the fuel efficiency (highway miles/gallon) of cars
# Based on their drive (front, rear, 4wd) and engine displacement 
mpg_lm <- lm(hwy ~ displ + drv, data = mpg)
summary(mpg_lm)
# The baseline class of "drv" is 4wd (as it doesn't have it's own coefficient)
# The coefficients of the other groups describe expected devations from this class


# Note: the F-statistic p-value no longer corresponds to the one produced by ANOVA
# as it tests whether all coefficients are zero (and there is also the continuous
# variable in the model).





######################################################
# Non-parametric alternative to ANOVA


##### Plant growth

# Assumption on equal group shape seems plausible(?)
ggplot(PlantGrowth, aes(x = weight)) +
  geom_histogram(bins = 5) +
  facet_wrap(~ group, nrow = 3)


# Differences exist:
kruskal.test(weight ~ group, data = PlantGrowth)


# Same conclusion for pair-wise testing as for ANOVA
pairwise.wilcox.test(PlantGrowth$weight, PlantGrowth$group, p.adjust.method = "bonferroni")






##### Chicken feeds

# Assumption on equal group shape seems plausible(?)
ggplot(chickwts, aes(x = weight)) +
  geom_histogram(bins = 5) +
  facet_wrap(~ feed, nrow = 3)


# Differences exist:
kruskal.test(weight ~ feed, data = chickwts)


# Same conclusion for pair-wise testing as for ANOVA
pairwise.wilcox.test(chickwts$weight, chickwts$feed, p.adjust.method = "bonferroni")






##### Insect sprays
# (it does not matter whether we use the square root transformation as it is an
# increasing function and as such preserves order of ranks!)

# Assumption on equal group shape seems plausible(?)
ggplot(InsectSprays, aes(x = count2)) +
  geom_histogram(bins = 5) +
  facet_wrap(~ spray, nrow = 3)


# Differences exist:
kruskal.test(count2 ~ spray, data = InsectSprays)


# Same conclusion for pair-wise testing as for ANOVA
pairwise.wilcox.test(InsectSprays$count2, InsectSprays$spray, p.adjust.method = "bonferroni")