##### LECTURE 3 ##### #### LOAD PACKAGES #### library(broom) install.packages("ggplot2") library(ggplot2) #### PREPARE DATA #### #create vectors to represent display ad impressions during one week. impressions.ad1<-c(2371,2400,2132,2603,2360,2388,2346) impressions.ad2<-c(2980,2856,2933,3180,2856,2856,2877) #combine the vectors into a matrix called 'impressions' and name the columns by weekday impressions<-rbind(impressions.ad1,impressions.ad2) colnames(impressions)<-c("Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday") #Do the same for ad clicks: clicks.ad1<-c(86,87,77,94,85,86,85) clicks.ad2<-c(90,87,89,96,87,89,88) clicks<-rbind(clicks.ad1,clicks.ad2) colnames(clicks)<-c("Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday") #By subtracting 'clicks' from 'impressions', create a third matrix 'no.clicks' to show how many people saw the ads but did not click on them no.clicks<-impressions-clicks rownames(no.clicks)<-c("noclick.ad1","noclick.ad2") #Sum up the rows to get total clicks and no.clicks for the week, and combine into a matrix ad.data<-rbind(rowSums(clicks),rowSums(no.clicks)) #rename the columns again colnames(ad.data)<-c("ad1","ad2") rownames(ad.data)<-c("clicks","noclicks") #convert matrix to data frame ad.data<-as.data.frame(ad.data) ad.data #### DOES ONE DISPLAY AD PERFORM BETTER THAN THE OTHER? #### #Calculate the converion rate ad.data[1,]/colSums(ad.data) #### RUN A CHI-SQUARE TEST #### chi.test<-chisq.test(ad.data) chi.test #What else can the function do? Let's examine documentation again ?chisq.test #In the help documentation on the bottom right of your screen, scroll down to 'Value' to see what additional output the 'chisq.test' function can give you #For example, running '$p.value' on our test results (see row 107 above) gives us the p-value: chi.test$p.value #### CTA experiment #### # Run a t-test to test for fixed effects (effect of CTA on revenue) cta.t.test<-t.test(revenue~cta, data=cta_experiment) cta.t.test # Run an ANOVA to test for presence of moderator cta.aov<-aov(revenue~as.factor(cta)+as.factor(promo)+as.factor(cta)*as.factor(promo), data=cta_experiment) tidy(cta.aov) # Pairwise comparison of all experiment cells TukeyHSD(cta.aov) # it may be more intuitive to examine this visually, now that we know which differences are significant ggplot(data=cta_experiment, aes(x=as.factor(cta), y=revenue)) + stat_summary(fun=mean, geom = "bar",fill="blue") ggplot(data=cta_experiment, aes(x=as.factor(cta), y=revenue, fill=as.factor(promo))) + stat_summary(fun=mean, geom = "bar",position="dodge")