##### LECTURE 3 #####

#### LOAD PACKAGES ####

library(broom)

install.packages("ggplot2")
library(ggplot2)

#### PREPARE DATA ####

#create vectors to represent display ad impressions during one week.
impressions.ad1<-c(2371,2400,2132,2603,2360,2388,2346)
impressions.ad2<-c(2980,2856,2933,3180,2856,2856,2877)

#combine the vectors into a matrix called 'impressions' and name the columns by weekday
impressions<-rbind(impressions.ad1,impressions.ad2)
colnames(impressions)<-c("Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday")

#Do the same for ad clicks: 
clicks.ad1<-c(86,87,77,94,85,86,85)
clicks.ad2<-c(90,87,89,96,87,89,88)
clicks<-rbind(clicks.ad1,clicks.ad2)
colnames(clicks)<-c("Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday")

#By subtracting 'clicks' from 'impressions', create a third matrix 'no.clicks' to show how many people saw the ads but did not click on them
no.clicks<-impressions-clicks
rownames(no.clicks)<-c("noclick.ad1","noclick.ad2")

#Sum up the rows to get total clicks and no.clicks for the week, and combine into a matrix
ad.data<-rbind(rowSums(clicks),rowSums(no.clicks))

#rename the columns again
colnames(ad.data)<-c("ad1","ad2")
rownames(ad.data)<-c("clicks","noclicks")

#convert matrix to data frame
ad.data<-as.data.frame(ad.data)

ad.data

#### DOES ONE DISPLAY AD PERFORM BETTER THAN THE OTHER? ####

#Calculate the converion rate
ad.data[1,]/colSums(ad.data)


#### RUN A CHI-SQUARE TEST ####

chi.test<-chisq.test(ad.data)
chi.test


#What else can the function do? Let's examine documentation again

?chisq.test

#In the help documentation on the bottom right of your screen, scroll down to 'Value' to see what additional output the 'chisq.test' function can give you
#For example, running '$p.value' on our test results (see row 107 above) gives us the p-value:
chi.test$p.value


#### CTA experiment ####


# Run a t-test to test for fixed effects (effect of CTA on revenue)
cta.t.test<-t.test(revenue~cta, data=cta_experiment)
cta.t.test

# Run an ANOVA to test for presence of moderator
cta.aov<-aov(revenue~as.factor(cta)+as.factor(promo)+as.factor(cta)*as.factor(promo), data=cta_experiment)
tidy(cta.aov)

# Pairwise comparison of all experiment cells 
TukeyHSD(cta.aov)


# it may be more intuitive to examine this visually, now that we know which differences are significant
ggplot(data=cta_experiment, aes(x=as.factor(cta), y=revenue)) + 
  stat_summary(fun=mean, geom = "bar",fill="blue")

ggplot(data=cta_experiment, aes(x=as.factor(cta), y=revenue, fill=as.factor(promo))) + 
  stat_summary(fun=mean, geom = "bar",position="dodge")