#Load and examine 'watch_data' dataset options(scipen = 999) # This disables annoying scientific notation and give you more reasonable looking numbers head(watch_data) ?options #Example of a regression model #Let's say we wanted to estimate the effect of outdoor advertising on unit sales ?lm lm1 <- lm(TotalSales ~ Outdoor, data = watch_data) summary (lm1) anova (lm1) #Perhaps the company advertises its more expensive products more? #Let's check the correlation between 'Outdoor' and 'PricePerUnit' ?cor cor(watch_data$Outdoor,watch_data$PricePerUnit) #Let's check if unit price affects sales lm (TotalSales ~ PricePerUnit, data = watch_data) # It's possible that the coefficient on Outdoor advertising is biased. # What kind of bias would be expect? lm2 <- lm ( TotalSales~Outdoor + PricePerUnit, data=watch_data) summary(lm2) summary(lm1) #compare to previous model to check if Adjusted r2 has improved # We would also expect the company to run promotions simultaneously across various channels # Run a regression where you add marketing expenditure on other channels: 'Print' and 'Broad' lm3<-lm(TotalSales ~Outdoor +Print +Broad +PricePerUnit, data=watch_data) summary(lm3) summary(lm2) #What about price? Are we underestimating the negative effect of price by not accounting for its association with premium quality products? #We can consider a dichotomous variable 'Premium' that is 1 if a product is a premium product, and 0 when the product is a standard product lm4<-lm(TotalSales ~Outdoor +Print +Broad +PricePerUnit +Premium, data=watch_data) summary(lm4) summary(lm3) # What happens to the coefficient on Price? Why is this? #Notice also the other variables - what happened to their coefficient? #### Moderation in marketing mix models #### # But what if the effect of broadcast advertising is different for different product types: premium vs standard products? lm5<-lm(TotalSales ~Outdoor +Print +Broad +PricePerUnit +Premium +Broad*Premium, data=watch_data) summary(lm5) summary(lm4)