###########################
# MS-C1620
# Statistical inference
# Lecture 3


library(scales)
library(EnvStats)




# The significance level is always taken to be equal to 0.05 below.



####################################################################################
# One-sample sign test


faithful

# Claim: the location of the eruption times equal to 3 minutes 35 seconds


plot(faithful)
hist(faithful$eruptions, breaks = 25)

# Is the median of the eruption times equal to 3 minutes 35 seconds?
# H0: med == 3.583
# H1: med != 3.583

n <- length(faithful$eruptions)

# Test statistic is the amount of "red" points.
plot(faithful$eruptions, rep(1, n), col = as.factor(faithful$eruptions > 3.583))
abline(v = 3.583)

s <- sum(faithful$eruptions > 3.583)

# If H0 is true, we expect around n/2 = 136 obs. to be larger than 3.583
plot((n/2 - 50):(n/2 + 50), dbinom((n/2 - 50):(n/2 + 50), n, 0.5), type = "b")
abline(h = 0)
abline(v = 0.5*n, lwd = 2)

# The corresponding observed amount is 164

# p-value is the probability of the areas outside the green lines
abline(v = 0.5*n + abs(s - 0.5*n) , col = 3, lwd = 2) # This is what we observe
abline(v = 0.5*n - abs(s - 0.5*n) , col = 3, lwd = 2)

# The one-sample sign test can be performed through "Exact Binomial Test"
# Significance level 5%
binom.test(s, n)
# Extremely small p-value -> reject H0


# The same with normal approximation
z <- (s - n/2)/sqrt(n/4)

curve(dnorm, from = -5, to = 5)
abline(v = 0, lwd = 2)
abline(h = 0)

# p-value is the probability of the areas outside the green lines
abline(v = -abs(z), col = 3, lwd = 2) # This is what we observe
abline(v = abs(z), col = 3, lwd = 2)

2*(1 - pnorm(abs(z)))
# Reject H0







####################################################################################
# One-sample sign test vs. one-sample t-test


# t-test for the same data on significance level 5%

t.test(faithful$eruptions, mu = 3.583)


# Why do the conclusions differ even though both tests test whether the "location" is 3.583?

hist(faithful$eruptions, breaks = 25)








####################################################################################
# Power comparison

# We have a sample of size 20 from N(mu, 1)-distribution 
# H0: mu == 0
# H1: mu != 0

# Tests: one sample t-test vs. one-sample sign test, both on significance level 5%

# Type I error rates

# We generate data where H0 is true and check whether the test "sees" this
x <- rnorm(20, 0, sqrt(1))

# The null distribution of the test statistic with the critical points
t_density <- function(x) dt(x, df = 19)
curve(t_density, from = -5, to = 5)
abline(h = 0)
abline(v = qt(0.025, df = 19), col = 2, lwd = 2)
abline(v = qt(0.975, df = 19), col = 2, lwd = 2)

# Observed value
x <- rnorm(20, 0, sqrt(1))
abline(v = t.test(x)$statistic, lty = 2)

# Repeat multiple times
replicate(20, abline(v = t.test(rnorm(20, 0, sqrt(1)))$statistic, lty = 2))


# Simulated probabilities
mean(replicate(10000, t.test(rnorm(20, 0, sqrt(1)))$p.value < 0.05))
mean(replicate(10000, binom.test(sum(rnorm(20, 0, sqrt(1)) > 0), 20)$p.value < 0.05))




# Type II error when the true value is actually mu = 1

# The null distribution
curve(t_density, from = -4, to = 6)
abline(h = 0)
abline(v = qt(0.025, df = 19), col = 2, lwd = 2)
abline(v = qt(0.975, df = 19), col = 2, lwd = 2)

# Observed value
x <- rnorm(20, 1, sqrt(1))
abline(v = t.test(x)$statistic, lty = 2)

# Repeat multiple times
replicate(20, abline(v = t.test(rnorm(20, 1, sqrt(1)))$statistic, lty = 2))

mean(replicate(10000, t.test(rnorm(20, 1, sqrt(1)))$p.value >= 0.05))
mean(replicate(10000, binom.test(sum(rnorm(20, 1, sqrt(1)) > 0), 20)$p.value >= 0.05))

# Sign test has larger type II error for normally distributed data!






####################################################################################
# Discrete data and the one-sample sign test

# We test whether the population median equals 5 for the following simulated sample
set.seed(012019)
x <- sample(0:10, 30, replace = TRUE)


# Conservative approach -> we choose the one (> or >=) that gives the larger p-value
# -> Put the zeroes on the side which "balances" the signs

# The following might differ
sum(x - 5 > 0)
sum(x - 5 >= 0)
# 17 negative, 2 zeroes, 11 positive -> zeroes will go with the positive ones in the cons. approach


binom.test(sum(x - 5 > 0), 30)$p.value 
binom.test(sum(x - 5 >= 0), 30)$p.value # Conservartive choice



####################################################################################
# Paired sign-test


# Weight of mice before treatment
before <-c(200.1, 190.9, 192.7, 213, 241.4, 196.9, 172.2, 185.5, 205.2, 193.7)
# Weight of mice after treatment
after <-c(392.9, 393.2, 345.1, 393, 434, 427.9, 422, 383.9, 392.3, 352.2)


# Does the treatment affect the weight?
x <- cbind(before, after)
matplot(t(x), type = "l")
abline(v = c(1, 2))

# 
d <- after - before 
plot(d, rep (1, 10))
# Difficult to say whether the difference is normally distributed -> sign test

# H0: med_d == 0, i.e. treatment had no effect
# H1: med_d != 0, i.e. treatment had an effect

binom.test(sum(d > 0), 10)
# -> The treatment had an effect






####################################################################################
# One-sample signed rank test

# Assumes symmetry

nottem

# Average January Temperatures at Nottingham, 1920-1939
nottem_jan <- nottem[seq(from = 1, length.out = 20, by = 12)]


# Seems to be symmetric
hist(nottem_jan)
boxplot(nottem_jan)

# Is the (population) January average equal to 40 degrees Fahrenheit?
# H0: med == 40
# H1: med != 40
# Significance level 5%

n <- length(nottem_jan)
nottem_r <- rank(abs(nottem_jan - 40))
nottem_jan
nottem_r

plot(nottem_jan, rep(1, n))
abline(v = 40)
text(nottem_jan, rep(0.9, n), labels = as.character(nottem_r))


# One-sample signed rank test
wilcox.test(nottem_jan, mu = 40)
# No evidence against the null






####################################################################################
# Power comparison continued

# Type II error of one-sample signed rank test in the same situation as before


# Null distribution of the one-sample signed rank test statistic for H0: med = 0
nulls <- replicate(1000, wilcox.test(rnorm(20, 0, sqrt(1)))$statistic)
hist(nulls, breaks = 30, xlim = c(0, 250))
abline(v = quantile(nulls, 0.025), lwd = 2, col = 2)
abline(v = quantile(nulls, 0.975), lwd = 2, col = 2)

# Draw test statistic values for med = 1
abline(v = replicate(20, wilcox.test(rnorm(20, 1, sqrt(1)))$statistic), lty = 2)

mean(replicate(10000, wilcox.test(rnorm(20, 1, sqrt(1)))$p.value >= 0.05))
# Better Type II error rate than for sign test because of the used extra information!




####################################################################################
# Two-sample rank test

# Data which show the effect of two soporific drugs (increase in hours of sleep compared to control) on 10 patients.
sleep

# Compare whether the drugs differ in their effect at significance level 5%
# H0: med1 == med2
# H1: med1 != med2

# Are the distribution same up to shift?
hist(sleep[sleep$group == 1, 1])
hist(sleep[sleep$group == 2, 1])

plot(sleep[sleep$group == 1, 1], rep(1, 10), xlim = c(-2, 6), cex = 1.5)
abline(h = 1.0)
points(sleep[sleep$group == 2, 1], rep(0.8, 10), col = 2, cex = 1.5)
abline(h = 0.8, col = 2)
# Maybe...


wilcox.test(sleep[sleep$group == 1, 1], sleep[sleep$group == 2, 1])

# p-value 0.069 - not enough evidence against H0

# Remember that t-test gave a p-value 0.079
t.test(sleep[sleep$group == 1, 1], sleep[sleep$group == 2, 1])






####################################################################################
# Two-sample rank test with ranked data

# The 10 best selling albums of all time with the corresponding nationalities (US, non-US) are:
# (https://www.pastemagazine.com/articles/2018/08/the-best-selling-albums-of-all-time.html)

# 10. Alanis Morissette: Jagged Little Pill (non-US)
# 9. Adele: 21 (non-US)
# 8. AC/DC: Back in Black (non-US)
# 7. Fleetwood Mac: Rumours (non-US)
# 6. Whitney Houston (Various Artists): The Bodyguard Soundtrack (US)
# 5. Led Zeppelin: Led Zeppelin IV (non-US)
# 4. Shania Twain: Come on Over (non-US)
# 3. Eagles: Hotel California (US)
# 2. Eagles: Their Greatest Hits 1971-1975 (US)
# 1. Michael Jackson: Thriller (US)

# Is US music more popular (in the sense of more albums sold?). Significance level 5%
# H0: med_US == med_nonUS
# H1: med_US != med_nonUS

# The medians can be thought to be the medians of album sales in the corresponding regions


albums <- data.frame(rank = 1:10,
                     origin = c("US", "US", "US", "non-US", "non-US",
                                "US", "non-US", "non-US", "non-US", "non-US"))

albums[albums$origin == "US", ]$rank
albums[albums$origin == "non-US", ]$rank

wilcox.test(albums[albums$origin == "US", ]$rank, albums[albums$origin == "non-US", ]$rank)

# US albums are more popular (p-value = 0.0381)