#Load and examine 'watch_data' dataset

options(scipen = 999) # This disables annoying scientific notation and give you more reasonable looking numbers

head(watch_data)
?options
#Example of a regression model
#Let's say we wanted to estimate the effect of outdoor advertising on unit sales
?lm
lm1 <- lm(TotalSales ~ Outdoor, data = watch_data)
summary (lm1)

anova (lm1)


#Perhaps the company advertises its more expensive products more?
#Let's check the correlation between 'Outdoor' and 'PricePerUnit'
?cor
cor(watch_data$Outdoor,watch_data$PricePerUnit)

#Let's check if unit price affects sales
lm (TotalSales ~ PricePerUnit, data = watch_data)

# It's possible that the coefficient on Outdoor advertising is biased.
# What kind of bias would be expect?


lm2 <- lm ( TotalSales~Outdoor
                  + PricePerUnit, data=watch_data)


summary(lm2)
summary(lm1) #compare to previous model to check if Adjusted r2 has improved

# We would also expect the company to run promotions simultaneously across various channels
# Run a regression where you add marketing expenditure on other channels: 'Print' and 'Broad'
lm3<-lm(TotalSales
                ~Outdoor
                +Print
                +Broad
                +PricePerUnit, data=watch_data)


summary(lm3)
summary(lm2)

#What about price? Are we underestimating the negative effect of price by not accounting for its association with premium quality products?
#We can consider a dichotomous variable 'Premium' that is 1 if a product is a premium product, and 0 when the product is a standard product

lm4<-lm(TotalSales
                ~Outdoor
                +Print
                +Broad
                +PricePerUnit
                +Premium, data=watch_data)


summary(lm4)
summary(lm3)

# What happens to the coefficient on Price? Why is this?


#Notice also the other variables - what happened to their coefficient? 


#### Moderation in marketing mix models ####
# But what if the effect of broadcast advertising is different for different product types: premium vs standard products?
lm5<-lm(TotalSales
                ~Outdoor
                +Print
                +Broad
                +PricePerUnit
                +Premium
                +Broad*Premium, data=watch_data)


summary(lm5)
summary(lm4)