########################### # MS-C1620 # Statistical inference # Lecture 10 # ANOVA library(ggplot2) # We use the significance level 0.05 throughout the script ######################################## # Analysis problems to solve with ANOVA # Chicken weights under different types of feed ?chickwts head(chickwts) boxplot(weight ~ feed, data = chickwts) # Plant yield under different conditions ?PlantGrowth head(PlantGrowth) boxplot(weight ~ group, data = PlantGrowth) # Insect spray effectiveness ?InsectSprays head(InsectSprays) boxplot(count ~ spray, data = InsectSprays) ######################################## # Variation between groups vs. within groups ##### Typical case of large SSG (between-group variance) and small SSE (within-group variance) # == The group expected values are different n <- 50 set.seed(12345) # for same results on every run x <- rnorm(n, -5, 1) y <- rnorm(n, 0, 1) z <- rnorm(n, 5, 1) boxplot(x, y, z) # SSG gm <- mean(c(x, y, z)) ssg <- n*(mean(x) - gm)^2 + n*(mean(y) - gm)^2 + n*(mean(z) - gm)^2 gm # SSE sse <- sum((x - mean(x))^2) + sum((y - mean(y))^2) + sum((z - mean(z))^2) ssg sse ssg/sse ##### Typical case of small SSG (between-group variance) and large SSE (within-group variance) # == The group expected values are the same n <- 50 set.seed(12345) # for same results on every run x <- rnorm(n, 0, 1) y <- rnorm(n, 0, 1) z <- rnorm(n, 0, 1) boxplot(x, y, z) # SSG gm <- mean(c(x, y, z)) ssg <- n*(mean(x) - gm)^2 + n*(mean(y) - gm)^2 + n*(mean(z) - gm)^2 gm # SSE sse <- sum((x - mean(x))^2) + sum((y - mean(y))^2) + sum((z - mean(z))^2) ssg sse ssg/sse ##### Borderline case n <- 50 set.seed(12345) # for same results on every run x <- rnorm(n, 0, 1) y <- rnorm(n, 1, 1) z <- rnorm(n, -1, 1) boxplot(x, y, z) # SSG gm <- mean(c(x, y, z)) ssg <- n*(mean(x) - gm)^2 + n*(mean(y) - gm)^2 + n*(mean(z) - gm)^2 # SSE sse <- sum((x - mean(x))^2) + sum((y - mean(y))^2) + sum((z - mean(z))^2) ssg sse ssg/sse # Is SSG/SSE "large enough" for us to conclude that it is not plausible # that the groups have equal expected values # -> this is what ANOVA will tell us. ###################################### # ANOVA on the data sets ##### Chicken weights chicken_aov <- aov(weight ~ feed, data = chickwts) chicken_aov # Now, SSG = 231129.2 and SSE = 195556.0 summary(chicken_aov) # F-statistic value = 15.37 and p-value = 6e-10 for # H0: the expected values of the groups are the same # We conclude that not all feeds are equally effective in increasing the chickens' weights ##### Plant growth plant_aov <- aov(weight ~ group, data = PlantGrowth) plant_aov summary(plant_aov) # p-value = 0.0159 < 0.05 => reject H0 # We conclude that not all three conditions are equally effective in increasing plant yield ##### Insect sprays insect_aov <- aov(count ~ spray, data = InsectSprays) insect_aov summary(insect_aov) # p-value = <2e-16 => reject H0 # We conclude that not all six sprays are equally effective in repelling insects ###################################################### # Pair-wise testing ###### # Illustration of inflated Type I error rate under multiple testing # We conduct t-tests with the null hypothesis H0: mu = 0 for multiple data sets of size 20 # where H0 is true. # We use significance level 0.05 -> probability of a Type I error in an individual test = 0.05 t_test <- function(){ x <- rnorm(20, 0, 1) res <- t.test(x, alternative = "two.sided", mu = 0) c(res$statistic, res$p.value) } t19 <- function(x) dt(x, df = 19) curve(t19, from = -5, to = 5, lwd = 2, lty = 3) abline(h = 0, lwd = 1) q_lower <- qt(0.025, df = 19) segments(q_lower, 0, q_lower, t19(q_lower), col = 2, lwd = 2) segments(-q_lower, 0, -q_lower, t19(-q_lower), col = 2, lwd = 2) # We conduct a single test res_1 <- t_test() abline(v = res_1[1]) # Probability of wrong conclusion is 0.05 # We conduct 100 tests res_100 <- replicate(100, t_test()) abline(v = res_100[1, ]) # Probability of AT LEAST ONE wrong conclusion is >> 0.05 # Bonferroni correction: we decrease the significance level by dividing it # by the number of made tests # (increase the required amount of evidence) curve(t19, from = -5, to = 5, lwd = 2, lty = 3) abline(h = 0, lwd = 1) q_lower <- qt(0.025/100, df = 19) segments(q_lower, 0, q_lower, t19(q_lower) + 0.1, col = 2, lwd = 2) segments(-q_lower, 0, -q_lower, t19(-q_lower) + 0.1, col = 2, lwd = 2) abline(v = res_100[1, ]) # Probability of AT LEAST ONE wrong conclusion is now <= 0.05 # The Bonferroni correction is equivalent to multiplying each individual p-value by # the number of tests (and using the original significance level) # (this interpretation loses the interpretation of p-values of probabilities and thinks of # them simply as the amount of found evidence) # Original p-values and the number of significant ones among them round(res_100[2, ], 3) sum(res_100[2, ] < 0.05) # Bonferroni-corrected p-values round(res_100[2, ]*100, 3) sum(res_100[2, ]*100 < 0.05) ################################################ # Pair-wise testing on the example data sets ##### Chicken weights boxplot(weight ~ feed, data = chickwts) # Original p-values pairwise.t.test(chickwts$weight, chickwts$feed, p.adjust.method = "none") # 12 out of 15 pairs differ significantly # Bonferroni-corrected p-values pairwise.t.test(chickwts$weight, chickwts$feed, p.adjust.method = "bonferroni") # 8 out of 15 pairs differ significantly (4 of the 12 didn't carry enough evidence # that we could consider them different in the light of the multiple testing problem.) ##### Plant yield under different conditions boxplot(weight ~ group, data = PlantGrowth) # Original p-values pairwise.t.test(PlantGrowth$weight, PlantGrowth$group, p.adjust.method = "none") # Bonferroni-corrected p-values pairwise.t.test(PlantGrowth$weight, PlantGrowth$group, p.adjust.method = "bonferroni") # Only differing pair is (trt2, trt1) ##### Insect spray effectiveness boxplot(count ~ spray, data = InsectSprays) # Original p-values pairwise.t.test(InsectSprays$count, InsectSprays$spray, p.adjust.method = "none") # 9 out of 15 pairs differ # Bonferroni-corrected p-values pairwise.t.test(InsectSprays$count, InsectSprays$spray, p.adjust.method = "bonferroni") # 9 out of 15 pairs differ ############################################################ # Checking assumptions of ANOVA ##### Chicken weights boxplot(weight ~ feed, data = chickwts) ggplot(chickwts, aes(x = weight)) + geom_histogram(bins = 5) + facet_wrap(~ feed, nrow = 3) # Small group size make normality checking difficult # -> could be that the individual groups are normal? # Variance assumption bartlett.test(weight ~ feed, data = chickwts) # No evidence against the null hypothesis of equal variances ##### Plant yield under different conditions boxplot(weight ~ group, data = PlantGrowth) ggplot(PlantGrowth, aes(x = weight)) + geom_histogram(bins = 5) + facet_wrap(~ group, nrow = 3) # Same problems as for the chicken data with regards to checking normality # Variance assumption bartlett.test(weight ~ group, data = PlantGrowth) # No evidence against the null hypothesis of equal variances ##### Insect spray effectiveness boxplot(count ~ spray, data = InsectSprays) ggplot(InsectSprays, aes(x = count)) + geom_histogram(bins = 5) + facet_wrap(~ spray, nrow = 3) # Same problems as for the chicken data with regards to checking normality # Variance assumption bartlett.test(count ~ spray, data = InsectSprays) # Strong evidence against H0 -> the variances are not equal ################## # Optional "correction" for the insect spray data: # Taking a square root of the counts "stabilizes" the variances InsectSprays$count2 <- sqrt(InsectSprays$count) boxplot(count2 ~ spray, data = InsectSprays) ggplot(InsectSprays, aes(x = count2)) + geom_histogram(bins = 5) + facet_wrap(~ spray, nrow = 3) bartlett.test(count2 ~ spray, data = InsectSprays) insect_aov <- aov(count2 ~ spray, data = InsectSprays) summary(insect_aov) pairwise.t.test(InsectSprays$count2, InsectSprays$spray, p.adjust.method = "bonferroni") # 10 out of 15 pairs differ ##################################################### # Connection between linear regression and ANOVA # We use the plant data ?PlantGrowth boxplot(weight ~ group, data = PlantGrowth) plant_aov <- aov(weight ~ group, data = PlantGrowth) plant_aov summary(plant_aov) plant_reg <- lm(weight ~ group, data = PlantGrowth) plant_reg summary(plant_reg) # The ANOVA p-value is equal to the F-statistic p-value of linear regression # or the chicken weights data head(chickwts) boxplot(weight ~ feed, data = chickwts) chick_aov <- aov(weight ~ feed, data = chickwts) chick_aov summary(chick_aov) chick_reg <- lm(weight ~ feed, data = chickwts) chick_reg summary(chick_reg) # The ANOVA p-value is equal to the F-statistic p-value of linear regression # ANOVA-like effects can be added to linear regression to obtain "ANCOVA", analysis # of covariance, where both continuous and grouping variables are allowed. library(ggplot2) # Car data head(mpg) # We predict the fuel efficiency (highway miles/gallon) of cars # Based on their drive (front, rear, 4wd) and engine displacement mpg_lm <- lm(hwy ~ displ + drv, data = mpg) summary(mpg_lm) # The baseline class of "drv" is 4wd (as it doesn't have it's own coefficient) # The coefficients of the other groups describe expected devations from this class # Note: the F-statistic p-value no longer corresponds to the one produced by ANOVA # as it tests whether all coefficients are zero (and there is also the continuous # variable in the model). ###################################################### # Non-parametric alternative to ANOVA ##### Plant growth # Assumption on equal group shape seems plausible(?) ggplot(PlantGrowth, aes(x = weight)) + geom_histogram(bins = 5) + facet_wrap(~ group, nrow = 3) # Differences exist: kruskal.test(weight ~ group, data = PlantGrowth) # Same conclusion for pair-wise testing as for ANOVA pairwise.wilcox.test(PlantGrowth$weight, PlantGrowth$group, p.adjust.method = "bonferroni") ##### Chicken feeds # Assumption on equal group shape seems plausible(?) ggplot(chickwts, aes(x = weight)) + geom_histogram(bins = 5) + facet_wrap(~ feed, nrow = 3) # Differences exist: kruskal.test(weight ~ feed, data = chickwts) # Same conclusion for pair-wise testing as for ANOVA pairwise.wilcox.test(chickwts$weight, chickwts$feed, p.adjust.method = "bonferroni") ##### Insect sprays # (it does not matter whether we use the square root transformation as it is an # increasing function and as such preserves order of ranks!) # Assumption on equal group shape seems plausible(?) ggplot(InsectSprays, aes(x = count2)) + geom_histogram(bins = 5) + facet_wrap(~ spray, nrow = 3) # Differences exist: kruskal.test(count2 ~ spray, data = InsectSprays) # Same conclusion for pair-wise testing as for ANOVA pairwise.wilcox.test(InsectSprays$count2, InsectSprays$spray, p.adjust.method = "bonferroni")