# ECON-L1300 # March 2021 / iivo.vehvilainen@aalto.fi # # Mostly copied from Conlon & Gortmaker # see the excellent website: https://pyblp.readthedocs.io # preliminaries ---- # basic R library(data.table) library(ggplot2) library(reshape) library(RColorBrewer) # for iv and logit regression library(AER) library(mlogit) # excel input library(openxlsx) # output library(stargazer) # Python library(reticulate) # for display in class options(digits=4) # helper function to convert R integers to Python integers fi = function(i) return(as.integer(i)) # this needs to point to a valid installation of Python # this is for a HomeBrew install on MacOS use_python("/usr/local/bin/python3") # setup Python libraries (these need to be installed for the Python above) pyblp = import("pyblp") np = import("numpy") pd = import("pandas") # set pyblp options pyblp$options$digits = fi(2) pyblp$options$verbose = TRUE print(pyblp$'__version__') # set working directory setwd("/Users/iivo/Dropbox (Aalto)/Teaching/misc/IO exercises/cereal case") # Lecture 9 Mergers ----------------------------------------------------------------- # Setting Up and Solving the Problem Without Demographics ---- # load data product_data = as.data.table(pd$read_csv(pyblp$data$NEVO_PRODUCTS_LOCATION)) head(product_data[, 1:11]) # the following data is used in demand estimation # market_ids # product_ids # shares # prices # characteristics (sugar, mushy) # demand_instruments0..19 # # for costs and markups, also # firm_ids #stargazer(product_data[1:20,1:10], summary=F, rownames = F, font.size = "tiny") # X1: matrix for the linear demand model (theta1) # - here product_ids are used as fixed effects X1_formulation = pyblp$Formulation('0 + prices', absorb='C(product_ids)') # X2: matrix for the nonlinear demand model (theta2) X2_formulation = pyblp$Formulation('1 + prices + sugar + mushy') product_formulations = c(X1_formulation, X2_formulation) product_formulations # method to integrate over heterogeneous consumer choices mc_integration = pyblp$Integration('monte_carlo', size=fi(50), specification_options=py_dict('seed', fi(0))) mc_integration # compare with Gaussian-Hermite product rule pr_integration = pyblp$Integration('product', size=fi(5)) pr_integration # formulate problem # T number of markets # N number of products across all markets # F number of firms # I number of agents # K1 number of linear demand characteristics # K2 number of nonlinear demand characteristics # D number of demographic variables # MD demand instruments # ED demand side fixed effects mc_problem = pyblp$Problem(product_formulations, product_data, integration=mc_integration) mc_problem pr_problem = pyblp$Problem(product_formulations, product_data, integration=pr_integration) pr_problem # select optimization method # bfgs without bounds used to match nevo bfgs = pyblp$Optimization('bfgs', py_dict('gtol', 1e-4)) bfgs # solve using mc and with unrestricted covariance matrix for random tastes results1 = mc_problem$solve(sigma=np$ones(fi(c(4, 4))), optimization=bfgs) results1 # solve using gaussian product rule and with unrestricted covariance matrix for random tastes results2 = pr_problem$solve(sigma=np$ones(fi(c(4, 4))), optimization=bfgs) results2 # solve using mc and with diagonal covariance matrix for random tastes results3 = mc_problem$solve(sigma=np$eye(fi(4)), optimization=bfgs) results3 # Adding Demographics to the Problem ---- # the following data is used in demand estimation # weights give the weight given to each observation in the integration # nodes are the points in which the unobservables are evaluated # other columns give the observables # load data agent_data = as.data.table(pd$read_csv(pyblp$data$NEVO_AGENTS_LOCATION)) head(agent_data) #stargazer(agent_data[1:20,], summary=F, rownames = F, font.size = "tiny", digits=2) # add observable demographics agent_formulation = pyblp$Formulation('0 + income + income_squared + age + child') agent_formulation # reformulate problem nevo_problem = pyblp$Problem( product_formulations, product_data, agent_formulation, agent_data ) nevo_problem # initialize covariance matrix for random tastes initial_sigma = np$diag(c(0.3302, 2.4526, 0.0163, 0.2441)) # initialize covariance matrix for demographics initial_pi = np$array(matrix(c( c( 5.4819, 0.0, 0.2037, 0.0 ), c(15.8935, -1.2000, 0.0, 2.6342 ), c(-0.2506, 0.0, 0.0511, 0.0 ), c( 1.2650, 0.0, -0.8091, 0.0 ) ), ncol=4, byrow=T)) # bfgs without bounds used to match nevo tighter_bfgs = pyblp$Optimization('bfgs', py_dict('gtol', 1e-5)) # reformulate problem nevo_results = nevo_problem$solve( initial_sigma, initial_pi, optimization=tighter_bfgs, method='1s' ) # see the results nevo_results # Mergers ---- # Marginal Costs and Markups ---- # calculate (constant) marginal costs by using firm_ids costs = nevo_results$compute_costs() p.mc = ggplot(data.table(costs), aes(x=V1))+geom_histogram(bins=50, fill="white", color="black", alpha=0.5)+ggtitle("Marginal Costs") p.mc # pdf(file="p_nevo2001_mc.pdf", width=3.94, height=2.62, pointsize=9) # print(p.mc+theme_bw(base_family="serif", base_size = 8)) # dev.off() # calculate markups markups = nevo_results$compute_markups() p.marks = ggplot(data.table(markups), aes(x=V1))+geom_histogram(bins=50, fill="white", color="black", alpha=0.5)+ggtitle("Markups") p.marks # pdf(file="p_nevo2001_marks.pdf", width=3.94, height=2.62, pointsize=9) # print(p.marks+theme_bw(base_family="serif", base_size = 8)) # dev.off() # calculate hhi hhi = nevo_results$compute_hhi() # calculate profits profits = nevo_results$compute_profits(costs=costs) # calculate surpluses cs = nevo_results$compute_consumer_surpluses() # merger simulation product_data[, merger_ids:=firm_ids] product_data[merger_ids==2, merger_ids:=1] # calculate new prices (assumes costs don't change) changed_prices = nevo_results$compute_prices( firm_ids=product_data[, merger_ids], costs=costs ) # calculate new market shares (assumes costs don't change) changed_shares = nevo_results$compute_shares(changed_prices) # calculate new hhi, markups, profits and cs with changed prices changed_hhi = nevo_results$compute_hhi(firm_ids=product_data[, merger_ids], shares=changed_shares) changed_markups = nevo_results$compute_markups(changed_prices, costs) changed_profits = nevo_results$compute_profits(changed_prices, changed_shares, costs) changed_cs = nevo_results$compute_consumer_surpluses(changed_prices) dt.pic = as.data.table(melt(cbind(changed_hhi,hhi))) dt.pic$name = c('New HHI', 'Old HHI')[dt.pic$X2] ggplot(dt.pic, aes(x=value, color=name))+geom_density(fill="white", alpha=0.5)+ggtitle("HHI Changes") #ggplot(data.table(changed_hhi-hhi), aes(x=V1))+geom_histogram(bins=50, fill="white", color="black", alpha=0.5)+ggtitle("HHI Changes") dt.pic = as.data.table(melt(cbind(changed_markups,markups))) dt.pic$name = c('New markups', 'Old markups')[dt.pic$X2] ggplot(dt.pic, aes(x=value, color=name))+geom_density(fill="white", alpha=0.5)+ggtitle("Markup Changes") #ggplot(data.table(changed_markups-markups), aes(x=V1))+geom_histogram(bins=50, fill="white", color="black", alpha=0.5)+ggtitle("Markup Changes") dt.pic = as.data.table(melt(cbind(changed_profits,profits))) dt.pic$name = c('New profits', 'Old profits')[dt.pic$X2] ggplot(dt.pic, aes(x=value, color=name))+geom_density(fill="white", alpha=0.5)+ggtitle("Profit Changes") + xlim(c(0,0.01)) #ggplot(data.table(changed_profits-profits), aes(x=V1))+geom_histogram(bins=50, fill="white", color="black", alpha=0.5)+ggtitle("Profit Changes") dt.pic = as.data.table(melt(cbind(changed_cs,cs))) dt.pic$name = c('New CS', 'Old CS')[dt.pic$X2] ggplot(dt.pic, aes(x=value, color=name))+geom_density(fill="white", alpha=0.5)+ggtitle("Consumer Surplus Changes")# + xlim(c(0,0.01)) #ggplot(data.table(changed_cs-cs), aes(x=V1))+geom_histogram(bins=50, fill="white", color="black", alpha=0.5)+ggtitle("Consumer Surplus Changes") # Lecture 10 Market power ----------------------------------------------------------------- # Logit demand (manually) ---- c.charnames = names(agent_data)[5:12] # combine mead dt.logit = product_data[agent_data[, lapply(.SD, median), .SDcols=c.charnames, by=market_ids], on=c("market_ids")] dt.logit = dt.logit[dt.logit[, .(outside.shares=1-sum(shares)), by=market_ids], on="market_ids"] dt.logit[, y:=log(shares)-log(outside.shares)] dt.logit[, city_ids := as.factor(city_ids)] c.instruments = names(dt.logit)[grep("demand_", names(dt.logit))] l.logitfm = list() l.logitfm[[1]] = as.formula(paste("y ~ 0 + prices + sugar + mushy | sugar + mushy + factor(brand_ids)")) l.logitfm[[2]] = as.formula(paste("y ~ 0 + prices + product_ids | product_ids + ", paste(c.instruments, collapse="+"), sep="")) l.logitfm[[3]] = as.formula(paste("y ~ 0 + prices + income + age + child + product_ids | income + age + child + product_ids +", paste(c.instruments, collapse="+"), sep="")) l.logitfm[[4]] = as.formula(paste("y ~ 0 + prices + product_ids + city_ids | product_ids + city_ids +", paste(c.instruments, collapse="+"), sep="")) l.logit = list() l.logit[[1]] = lm(y ~ 0 + prices + sugar + mushy, data = dt.logit) l.logit[[2]] = lm(y ~ 0 + prices + product_ids, data = dt.logit) l.logit[[3]] = lm(y ~ 0 + prices + income + age + child + product_ids, data = dt.logit) l.logit[[4]] = ivreg(l.logitfm[[1]], data = dt.logit) l.logit[[5]] = ivreg(l.logitfm[[2]], data = dt.logit) l.logit[[6]] = ivreg(l.logitfm[[3]], data = dt.logit) l.logit[[7]] = ivreg(l.logitfm[[4]], data = dt.logit) c.omitfe = c(paste("product_ids", unique(dt.logit$product_ids), sep=""), paste("city_ids", unique(dt.logit$city_ids), sep="")) stargazer(l.logit, title = "Nevo 2001: Table V", omit=c.omitfe, omit.stat = c("F", "ser"), report=c("vsc"), type="text", single.row = T) #stargazer(l.logit, title = "Nevo 2001: Table V", omit=c.omitfe, omit.stat = c("F", "ser"), report=c("vcs"), type="latex", single.row = F, no.space = T) # Elasticities and Diversion Ratios ---- elasticities = nevo_results$compute_elasticities() diversions = nevo_results$compute_diversion_ratios() # median elasticities dt.ela = data.table(product_data[, .(market_ids, product_ids)], elasticities)[, lapply(.SD, function(x) sprintf("%.3f", median(x),3)), .SDcols=3:26, by=product_ids] setnames(dt.ela, c("product_ids", unique(dt.ela$product_ids))) stargazer(dt.ela, summary=F, rownames = F, type="text") #stargazer(dt.ela, summary=F, rownames = F, type="latex", font.size="tiny", column.sep.width = "-5pt") # median diversions, diversion to outside good on the diagonal dt.dive = data.table(product_data[, .(market_ids, product_ids)], diversions)[, lapply(.SD, function(x) sprintf("%.3f", median(x),3)), .SDcols=3:26, by=product_ids] setnames(dt.dive, c("product_ids", unique(dt.dive$product_ids))) stargazer(dt.dive, summary=F, rownames = F, type="text") #stargazer(dt.dive, summary=F, rownames = F, type="latex", font.size="tiny", column.sep.width = "-5pt") # single market example single_market = product_data[market_ids== 'C01Q1', .I] dt.pic = melt(elasticities[single_market, single_market]) dt.pic$market1 = as.factor((dt.pic$X2-1)) dt.pic$market2 = as.factor((dt.pic$X1-1)) p.ela = ggplot(dt.pic, aes(market1,market2,fill=value))+geom_tile()+scale_y_discrete(limits=rev)+scale_fill_distiller(palette = "YlGnBu") p.ela # pdf(file="p_nevo2001_ela.pdf", width=3.94, height=2.62, pointsize=9) # print(p.ela+theme_bw(base_family="serif", base_size = 8)) # dev.off() # single market example dt.pic = melt(diversions[single_market, single_market]) dt.pic$market1 = as.factor((dt.pic$X2-1)) dt.pic$market2 = as.factor((dt.pic$X1-1)) p.dive = ggplot(dt.pic, aes(market1,market2,fill=value))+geom_tile()+scale_y_discrete(limits=rev)+scale_fill_distiller(palette = "YlGnBu") p.dive # pdf(file="p_nevo2001_dive.pdf", width=3.94, height=2.62, pointsize=9) # print(p.dive+theme_bw(base_family="serif", base_size = 8)) # dev.off() # histogram of elasticities means = nevo_results$extract_diagonal_means(elasticities) aggregates = nevo_results$compute_aggregate_elasticities(factor=0.1) dt.pic = as.data.table(melt(cbind(means,aggregates))) dt.pic$name = c('Mean Own Elasticities', 'Aggregate Elasticities')[dt.pic$X2] p.hista = ggplot(dt.pic, aes(x=value, color=name))+geom_histogram(bins=50, fill="white", alpha=0.5) p.hista # pdf(file="p_nevo2001_hista.pdf", width=3.94, height=2.62, pointsize=9) # print(p.hista+theme_bw(base_family="serif", base_size = 8)) # dev.off() # Full model ---- nevo_results # set up different industry structures product_data[, single_product_ids:=brand_ids] product_data[, joint_ownership_ids:=1] # calculate new prices (assumes costs don't change) single_product = nevo_results$compute_prices(firm_ids=product_data[, single_product_ids], costs=costs) joint_ownership = nevo_results$compute_prices(firm_ids=product_data[, joint_ownership_ids], costs=costs) # calculate new market shares (assumes costs don't change) single_product_markups = nevo_results$compute_markups(single_product, costs) joint_ownership_markups = nevo_results$compute_markups(joint_ownership, costs) # collect and plot dt.data = data.table(t=1:length(markups), cbind(single_product_markups, joint_ownership_markups, markups)) setnames(dt.data, c('t', 'Single', 'Joint', 'Current')) ggplot(melt(dt.data, id="t"), aes(x=value, color=variable))+geom_density(fill="white", alpha=0.5)+ggtitle("Markup Changes") dt.data[, lapply(.SD, mean), .SDcols=c('Single', 'Current', 'Joint')] # Bootstrapping Results ---- # parametric bootstrapping bootstrapped_results = nevo_results$bootstrap(draws=fi(100), seed=fi(0)) # this slow bootstrapped_results bounds = np$percentile( bootstrapped_results$extract_diagonal_means( bootstrapped_results$compute_elasticities() ), q=c(fi(10), fi(90)), axis=fi(0) ) dt.table = data.table(index=as.character(mc_problem$unique_market_ids), "Lower bound"=bounds[1,,1], "Mean Own Elasticity"=as.numeric(means), 'Upper Bound'=bounds[2,,1]) head(dt.table) # Optimal Instruments ---- instrument_results = nevo_results$compute_optimal_instruments(method='approximate') instrument_results updated_problem = instrument_results$to_problem() updated_problem updated_results = updated_problem$solve( nevo_results$sigma, nevo_results$pi, optimization=pyblp$Optimization('bfgs', py_dict('gtol', 1e-5)), method='1s' ) updated_results