import numpy as np
import pandas as pd
import numpy.matlib
import matplotlib as plt
import seaborn as sns
from scipy.optimize import fsolve
import pyblp
from ipywidgets import IntProgress
from IPython.display import display
import time
T = 1000 # number of geographical markets
J = 4 # number of firms
n_sims = 5000 # number of different types of consumers
# Model parameters
beta1 = 2.0 # quality
beta2 = 1/2 # length
beta3 = np.random.normal(4, 4, size=(n_sims, 1)) # Shape (5000, 1)
alpha = -2.0 # price
gamma1 = 1 # observable cost shifter
gamma2 = 1/5 # length
gamma3 = 1 # roof
gamma4 = 1/5; # unobservable quality
X = np.absolute(np.random.standard_normal(size=(T, J)), dtype=np.float64) # Shape (1000, 4)
W = np.absolute(np.random.standard_normal(size=(T, J)), dtype=np.float64) # Shape (1000, 4)
l = np.matlib.repmat(np.random.uniform(5, 10, size=(1, 4)), 1000, 1).astype('float64') # Shape (1000, 4)
xi = np.random.normal(0, 2, size=(T, J)).astype('float64') # Shape (1000, 4)
roof = np.matlib.repmat([1,1,0,0], 1000, 1).astype('float64') # Shape (1000, 4)
mc = gamma1*W + gamma2*l + gamma3*roof + gamma4*xi; # Shape (1000, 4)
p0 = np.random.uniform(5, 10, size=(1, J)).astype('float64') # Shape (1,4)
P_eq = np.zeros((T, J)).astype('float64') # Shape (1000, 4)
ms_share_eq = np.zeros((T, J)).astype('float64') # Shape (1000, 4)
P_eq2 = np.zeros((T, J)).astype('float64') # Shape (1000, 4)
ms_share_eq2 = np.zeros((T, J)).astype('float64') # Shape (1000, 4)
own_derivative = np.zeros((T, J)).astype('float64') # Shape (1000, 4)
def ms_share(beta1, beta2, beta3, alpha, X, xi, roof, l, p):
J = X.size
Si = beta3.size
delta = np.matlib.repmat(beta1*X + beta2*l + alpha*p + xi, Si, 1) # Shape (5000, 4)
mu = numpy.kron(beta3, np.ones((1,4)))*np.matlib.repmat(roof, Si, 1) # Shape (5000, 4) * Shape (5000, 4)
denominator = np.transpose(np.matlib.repmat(1 + np.sum(np.exp(delta + mu), 1), J, 1)) # Shape (5000, 4)
ms_share = np.mean(np.exp(delta + mu)/denominator,0); # Shape (4,)
return ms_share
def partial(beta1, beta2, beta3, alpha, X, xi, roof, l, p):
J = X.size
Si = beta3.size
delta = np.matlib.repmat(beta1*X + beta2*l + alpha*p + xi, Si, 1) # Shape (5000, 4)
mu = numpy.kron(beta3, np.ones((1,4)))*np.matlib.repmat(roof, Si, 1) # Shape (5000, 4) * Shape (5000, 4)
denominator = np.transpose(np.matlib.repmat(1 + np.sum(np.exp(delta + mu), 1), J, 1)) # Shape (5000, 4)
ds_dp = np.mean(alpha*(np.exp(delta + mu)*denominator - np.exp(2*(delta + mu)))/numpy.square(denominator),0) # Shape (4,)
return ds_dp
def joint(beta1, beta2, beta3, alpha, X, xi, roof, l, p, mc):
J = X.size
Si = beta3.size
delta = np.matlib.repmat(beta1*X + beta2*l + alpha*p + xi, Si, 1) # Shape (5000, 4)
mu = numpy.kron(beta3, np.ones((1,4)))*np.matlib.repmat(roof, Si, 1) # Shape (5000, 4) * Shape (5000, 4)
denominator = np.transpose(np.matlib.repmat(1 + np.sum(np.exp(delta + mu), 1), J, 1)) # Shape (5000, 4)
ds_dp = np.mean(alpha*(np.exp(delta + mu)*denominator - np.exp(2*(delta + mu)))/numpy.square(denominator),0) # Shape (4,)
#cross derivative (note symmetrcy dsj_dpk = dsk_dpj)
ds_dpk = np.mean(np.matlib.repmat(-alpha*(np.exp(delta[0,0] + mu[:,0])*np.exp(delta[0,2] + mu[:, 2])), J, 1).transpose()*np.matlib.repmat([1,0,1,0], 1, 1)/numpy.square(denominator),0)
new = np.zeros((1, J)).astype('float64').flatten()
new[2] = ((p[0]-mc[0])*ds_dpk[0])/ds_dp[2]
new[0] = ((p[2]-mc[2])*ds_dpk[2])/ds_dp[0]
return new
joint(beta1, beta2, beta3, alpha, X[1,:], xi[1,:], roof[1,:], l[1,:], p0[0], mc[1,:])
array([-0.02531814, 0. , -0.25435052, 0. ])
f = IntProgress(min=0, max=T) # instantiate the bar
display(f) # display the bar
start = time.time()
for t in range(0, T):
def F(p):
return (p - mc[t,:] + ms_share(beta1, beta2, beta3, alpha, X[t,:], xi[t,:], roof[t,:], l[t,:], p)/partial(beta1, beta2, beta3, alpha, X[t,:], xi[t,:], roof[t,:], l[t,:], p))
P_eq[t] = fsolve(F, p0)
ms_share_eq[t] = ms_share(beta1, beta2, beta3, alpha, X[t,:], xi[t,:], roof[t,:], l[t,:], P_eq[t])
f.value += 1 # increment the progress bar
run_time = time.time() - start
print("Simulation took ", run_time, " seconds.")
Simulation took 131.82363057136536 seconds.
f = IntProgress(min=0, max=T) # instantiate the bar
display(f) # display the bar
start = time.time()
for t in range(0, T):
def F2(p):
return (p - mc[t,:] + ms_share(beta1, beta2, beta3, alpha, X[t,:], xi[t,:], roof[t,:], l[t,:], p)
/partial(beta1, beta2, beta3, alpha, X[t,:], xi[t,:], roof[t,:], l[t,:], p)
+ joint(beta1, beta2, beta3, alpha, X[t,:], xi[t,:], roof[t,:], l[t,:], p, mc[t,:]))
P_eq2[t] = fsolve(F2, p0)
ms_share_eq2[t] = ms_share(beta1, beta2, beta3, alpha, X[t,:], xi[t,:], roof[t,:], l[t,:], P_eq2[t])
f.value += 1 # increment the progress bar
run_time = time.time() - start
print("Simulation took ", run_time, " seconds.")
Simulation took 201.24563789367676 seconds.
prices = P_eq.flatten()
prices_new = P_eq2.flatten()
shares = ms_share_eq.flatten()
shares_new = ms_share_eq2.flatten()
lenght = l.flatten()
quality = X.flatten()
roof = roof.flatten()
cost_shifter = W.flatten()
firm_ids = np.matlib.repmat([1,2,3,4], 1, T).flatten()
market_ids = np.repeat(np.arange(1,1001),J)
# creating the Numpy array
array = np.column_stack((prices,shares,prices_new, shares_new, lenght,quality,roof,cost_shifter,firm_ids,market_ids))
# creating a list of index names
column_values = ['prices', 'shares', 'prices_new', 'shares_new', 'lenght',
'quality', 'roof', 'cost_shifter', 'firm_ids', 'market_ids']
product_data = pd.DataFrame(array,
columns = column_values)
product_data
prices | shares | prices_new | shares_new | lenght | quality | roof | cost_shifter | firm_ids | market_ids | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 5.910280 | 0.322988 | 6.130638 | 0.280232 | 9.289495 | 1.731064 | 1.0 | 1.809457 | 1.0 | 1.0 |
1 | 4.555746 | 0.234198 | 4.632518 | 0.270791 | 6.436202 | 0.670940 | 1.0 | 1.123405 | 2.0 | 1.0 |
2 | 3.194267 | 0.353419 | 3.350598 | 0.333630 | 8.302433 | 1.601594 | 0.0 | 0.063608 | 3.0 | 1.0 |
3 | 3.340081 | 0.005545 | 3.341473 | 0.007135 | 7.254000 | 0.114980 | 0.0 | 1.362842 | 4.0 | 1.0 |
4 | 4.987572 | 0.218543 | 5.028287 | 0.212208 | 9.289495 | 1.883396 | 1.0 | 1.764873 | 1.0 | 2.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
3995 | 2.870091 | 0.200705 | 2.956088 | 0.247826 | 7.254000 | 0.888805 | 0.0 | 0.331444 | 4.0 | 999.0 |
3996 | 3.580606 | 0.283593 | 3.594812 | 0.281257 | 9.289495 | 0.170978 | 1.0 | 0.144692 | 1.0 | 1000.0 |
3997 | 4.794581 | 0.313852 | 4.800505 | 0.316465 | 6.436202 | 1.028149 | 1.0 | 1.415033 | 2.0 | 1000.0 |
3998 | 3.706210 | 0.056498 | 3.792981 | 0.048562 | 8.302433 | 1.079338 | 0.0 | 1.598796 | 3.0 | 1000.0 |
3999 | 1.557284 | 0.043913 | 1.558349 | 0.044801 | 7.254000 | 0.009820 | 0.0 | 0.051920 | 4.0 | 1000.0 |
4000 rows × 10 columns
product_data['change'] = ((product_data['prices_new'] - product_data['prices'])/product_data['prices'])*100
product_data.describe()
prices | shares | prices_new | shares_new | lenght | quality | roof | cost_shifter | firm_ids | market_ids | change | |
---|---|---|---|---|---|---|---|---|---|---|---|
count | 4000.000000 | 4000.000000 | 4000.000000 | 4000.000000 | 4000.000000 | 4000.000000 | 4000.000000 | 4000.000000 | 4000.000000 | 4000.000000 | 4000.000000 |
mean | 3.730021 | 0.207720 | 3.786560 | 0.205013 | 7.820533 | 0.816659 | 0.500000 | 0.804350 | 2.500000 | 500.500000 | 1.656171 |
std | 1.048181 | 0.155626 | 1.050548 | 0.155974 | 1.075705 | 0.612073 | 0.500063 | 0.604820 | 1.118174 | 288.711081 | 2.910614 |
min | 0.941257 | 0.000022 | 0.941340 | 0.000028 | 6.436202 | 0.000031 | 0.000000 | 0.001111 | 1.000000 | 1.000000 | 0.000041 |
25% | 2.954057 | 0.074483 | 3.013760 | 0.070911 | 7.049550 | 0.330628 | 0.000000 | 0.329449 | 1.750000 | 250.750000 | 0.115211 |
50% | 3.672491 | 0.178496 | 3.732701 | 0.175350 | 7.778216 | 0.691540 | 0.500000 | 0.686598 | 2.500000 | 500.500000 | 0.483058 |
75% | 4.452086 | 0.317189 | 4.502783 | 0.313845 | 8.549199 | 1.172601 | 1.000000 | 1.165375 | 3.250000 | 750.250000 | 1.801436 |
max | 8.647254 | 0.732521 | 8.649037 | 0.733033 | 9.289495 | 3.877329 | 1.000000 | 3.596473 | 4.000000 | 1000.000000 | 33.645216 |
# Cost shifter
product_data = product_data.rename(columns={'cost_shifter': 'demand_instruments0'})
# Comp cost shifter
product_data['demand_instruments1']= product_data.groupby(['market_ids'])['demand_instruments0'].transform("sum") - product_data['demand_instruments0']
# Comp quality
product_data['demand_instruments2']= product_data.groupby(['market_ids'])['quality'].transform("sum") - product_data['quality']
product_formulations = (
pyblp.Formulation('-1 + quality + lenght + prices + roof'), ## Linear demand parameters
pyblp.Formulation('-1 + roof') ## Random coef variabels
)
n_sim = 1000
mc_integration = pyblp.Integration('monte_carlo', size = n_sim, specification_options={'seed': 0}) ## Monte carlo simulation for integration in the inner loop
problem = pyblp.Problem(product_formulations, product_data, integration = mc_integration) ## Formulate problem
bfgs = pyblp.Optimization('bfgs', {'gtol': 1e-4}) ## Use Broyden–Fletcher–Goldfarb–Shanno algorithm within PyBlp
initial_sigma = np.array([4]) ## Just a guess
# Solve in parallel
with pyblp.parallel(6):
results = problem.solve(
initial_sigma, # Initial sigmas
optimization=bfgs, ## Artelys Knitro is best practice but commercial
method='1s' ## 1 Step GMM
)
updated_problem = results.compute_optimal_instruments(method='approximate').to_problem()
with pyblp.parallel(6):
updated_results = updated_problem.solve(
results.sigma,
results.pi,
optimization=bfgs,
method='1s'
)
updated_problem
Initializing the problem ... Initialized the problem after 00:00:00. Dimensions: ========================================== T N F I K1 K2 MD ---- ---- --- ------- ---- ---- ---- 1000 4000 4 1000000 4 1 6 ========================================== Formulations: ============================================================ Column Indices: 0 1 2 3 ----------------------------- ------- ------ ------ ---- X1: Linear Characteristics quality lenght prices roof X2: Nonlinear Characteristics roof ============================================================ Starting a pool of 6 processes ... Started the process pool after 00:00:00. Solving the problem ... Nonlinear Coefficient Initial Values: ===================== Sigma: roof ------ ------------- roof +4.000000E+00 ===================== Starting optimization ... At least one error was encountered. As long as the optimization routine does not get stuck at values of theta that give rise to errors, this is not necessarily a problem. If the errors persist or seem to be impacting the optimization results, consider setting an error punishment or following any of the other suggestions below: Encountered a numerical error when computing delta. This problem is often due to prior problems, overflow, or nonpositive shares, and can sometimes be mitigated by choosing smaller initial parameter values, setting more conservative bounds on parameters or shares, rescaling data, removing outliers, changing the floating point precision, or using different optimization, iteration, or integration configurations. Errors encountered: divide by zero. GMM Optimization Objective Fixed Point Contraction Clipped Objective Objective Gradient Step Iterations Evaluations Iterations Evaluations Shares Value Improvement Norm Theta ---- ------------ ----------- ----------- ----------- ------- ------------- ------------- ------------- ------------- 1 0 1 9875 30169 0 +5.752542E-01 +2.606188E-01 +4.000000E+00 1 0 2 10050 30709 0 +5.104579E-01 +6.479631E-02 +2.364361E-01 +4.260619E+00 At least one error was encountered. As long as the optimization routine does not get stuck at values of theta that give rise to errors, this is not necessarily a problem. If the errors persist or seem to be impacting the optimization results, consider setting an error punishment or following any of the other suggestions below: Encountered a numerical error when computing delta. This problem is often due to prior problems, overflow, or nonpositive shares, and can sometimes be mitigated by choosing smaller initial parameter values, setting more conservative bounds on parameters or shares, rescaling data, removing outliers, changing the floating point precision, or using different optimization, iteration, or integration configurations. Errors encountered: divide by zero. 1 0 3 10324 31567 0 +3.181053E-01 +1.923526E-01 +1.305686E-01 +5.303094E+00 1 1 4 10433 31914 0 +2.408354E-01 +7.726992E-02 +1.375896E-02 +6.611382E+00 At least one error was encountered. As long as the optimization routine does not get stuck at values of theta that give rise to errors, this is not necessarily a problem. If the errors persist or seem to be impacting the optimization results, consider setting an error punishment or following any of the other suggestions below: Encountered a numerical error when computing delta. This problem is often due to prior problems, overflow, or nonpositive shares, and can sometimes be mitigated by choosing smaller initial parameter values, setting more conservative bounds on parameters or shares, rescaling data, removing outliers, changing the floating point precision, or using different optimization, iteration, or integration configurations. Errors encountered: divide by zero. 1 2 5 10410 31859 0 +2.399961E-01 +8.392583E-04 +2.938100E-04 +6.486661E+00 At least one error was encountered. As long as the optimization routine does not get stuck at values of theta that give rise to errors, this is not necessarily a problem. If the errors persist or seem to be impacting the optimization results, consider setting an error punishment or following any of the other suggestions below: Encountered a numerical error when computing delta. This problem is often due to prior problems, overflow, or nonpositive shares, and can sometimes be mitigated by choosing smaller initial parameter values, setting more conservative bounds on parameters or shares, rescaling data, removing outliers, changing the floating point precision, or using different optimization, iteration, or integration configurations. Errors encountered: divide by zero. 1 3 6 10418 31901 0 +2.399958E-01 +3.836457E-07 +4.367239E-07 +6.489269E+00 Optimization completed after 00:01:50. Computing the Hessian and estimating standard errors ... Encountered a numerical error when computing delta. This problem is often due to prior problems, overflow, or nonpositive shares, and can sometimes be mitigated by choosing smaller initial parameter values, setting more conservative bounds on parameters or shares, rescaling data, removing outliers, changing the floating point precision, or using different optimization, iteration, or integration configurations. Errors encountered: divide by zero. Computed results after 00:00:54. Problem Results Summary: =============================================================================================== GMM Objective Gradient Clipped Weighting Matrix Covariance Matrix Step Value Norm Hessian Shares Condition Number Condition Number ---- ------------- ------------- ------------- ------- ---------------- ----------------- 1 +2.399958E-01 +4.367239E-07 +1.125303E-01 0 +3.023676E+02 +6.528059E+06 =============================================================================================== Cumulative Statistics: =========================================================================== Computation Optimizer Optimization Objective Fixed Point Contraction Time Converged Iterations Evaluations Iterations Evaluations ----------- --------- ------------ ----------- ----------- ----------- 00:02:44 Yes 4 7 61510 188119 =========================================================================== Nonlinear Coefficient Estimates (Robust SEs in Parentheses): ======================= Sigma: roof ------ --------------- roof +6.489269E+00 (+9.991993E+00) ======================= Beta Estimates (Robust SEs in Parentheses): ================================================================== quality lenght prices roof --------------- --------------- --------------- --------------- +2.336272E+00 +5.794935E-01 -2.299509E+00 +4.932488E+00 (+1.458950E+00) (+3.280394E-01) (+1.217449E+00) (+3.152766E+00) ================================================================== Terminating the pool of 6 processes ... Terminated the process pool after 00:00:00. Computing optimal instruments for theta ... Computed optimal instruments after 00:00:03. Optimal Instrument Results Summary: ======================= Computation Error Term Time Draws ----------- ---------- 00:00:03 1 ======================= Re-creating the problem ... Re-created the problem after 00:00:00. Dimensions: ========================================== T N F I K1 K2 MD ---- ---- --- ------- ---- ---- ---- 1000 4000 4 1000000 4 1 5 ========================================== Formulations: ============================================================ Column Indices: 0 1 2 3 ----------------------------- ------- ------ ------ ---- X1: Linear Characteristics quality lenght prices roof X2: Nonlinear Characteristics roof ============================================================ Starting a pool of 6 processes ... Started the process pool after 00:00:00. Solving the problem ... Nonlinear Coefficient Initial Values: ===================== Sigma: roof ------ ------------- roof +6.489269E+00 ===================== Starting optimization ... At least one error was encountered. As long as the optimization routine does not get stuck at values of theta that give rise to errors, this is not necessarily a problem. If the errors persist or seem to be impacting the optimization results, consider setting an error punishment or following any of the other suggestions below: Encountered a numerical error when computing delta. This problem is often due to prior problems, overflow, or nonpositive shares, and can sometimes be mitigated by choosing smaller initial parameter values, setting more conservative bounds on parameters or shares, rescaling data, removing outliers, changing the floating point precision, or using different optimization, iteration, or integration configurations. Errors encountered: divide by zero. GMM Optimization Objective Fixed Point Contraction Clipped Objective Objective Gradient Step Iterations Evaluations Iterations Evaluations Shares Value Improvement Norm Theta ---- ------------ ----------- ----------- ----------- ------- ------------- ------------- ------------- ------------- 1 0 1 10418 31901 0 +5.773069E+02 +4.522806E+02 +6.489269E+00 At least one error was encountered. As long as the optimization routine does not get stuck at values of theta that give rise to errors, this is not necessarily a problem. If the errors persist or seem to be impacting the optimization results, consider setting an error punishment or following any of the other suggestions below: Encountered a numerical error when computing delta. This problem is often due to prior problems, overflow, or nonpositive shares, and can sometimes be mitigated by choosing smaller initial parameter values, setting more conservative bounds on parameters or shares, rescaling data, removing outliers, changing the floating point precision, or using different optimization, iteration, or integration configurations. Errors encountered: divide by zero. 1 0 2 10307 31502 0 +2.124759E+02 +3.648311E+02 +2.709335E+02 +5.479269E+00 1 1 3 9842 30060 0 +5.943687E-01 +2.118815E+02 +1.381739E+01 +3.970324E+00 1 2 4 9821 30025 0 +2.057578E-03 +5.923111E-01 +8.105387E-01 +3.889233E+00 1 3 5 9833 30059 0 +4.708443E-08 +2.057531E-03 +3.876599E-03 +3.884180E+00 1 4 6 9827 30034 0 +3.942770E-15 +4.708443E-08 +1.121770E-06 +3.884155E+00 Optimization completed after 00:01:52. Computing the Hessian and estimating standard errors ... Computed results after 00:00:56. Problem Results Summary: =============================================================================================== GMM Objective Gradient Clipped Weighting Matrix Covariance Matrix Step Value Norm Hessian Shares Condition Number Condition Number ---- ------------- ------------- ------------- ------- ---------------- ----------------- 1 +3.942770E-15 +1.121770E-06 +1.595855E+02 0 +1.646325E+05 +4.430167E+03 =============================================================================================== Cumulative Statistics: =========================================================================== Computation Optimizer Optimization Objective Fixed Point Contraction Time Converged Iterations Evaluations Iterations Evaluations ----------- --------- ------------ ----------- ----------- ----------- 00:02:48 Yes 5 7 60048 183581 =========================================================================== Nonlinear Coefficient Estimates (Robust SEs in Parentheses): ======================= Sigma: roof ------ --------------- roof +3.884155E+00 (+2.152773E-01) ======================= Beta Estimates (Robust SEs in Parentheses): ================================================================== quality lenght prices roof --------------- --------------- --------------- --------------- +1.963948E+00 +4.959108E-01 -1.988932E+00 +4.127744E+00 (+6.137274E-02) (+2.515030E-02) (+6.893205E-02) (+1.236571E-01) ================================================================== Terminating the pool of 6 processes ... Terminated the process pool after 00:00:00.
Dimensions: ========================================== T N F I K1 K2 MD ---- ---- --- ------- ---- ---- ---- 1000 4000 4 1000000 4 1 5 ========================================== Formulations: ============================================================ Column Indices: 0 1 2 3 ----------------------------- ------- ------ ------ ---- X1: Linear Characteristics quality lenght prices roof X2: Nonlinear Characteristics roof ============================================================
costs = updated_results.compute_costs()
markups = updated_results.compute_markups(costs=costs)
print(markups.reshape((1000, 4, 1)).mean(axis= 0))
Computing marginal costs ... Finished after 00:00:02. Computing markups ... Finished after 00:00:01. [[0.21515159] [0.24395795] [0.2334335 ] [0.24839394]]
diversions = updated_results.compute_diversion_ratios()
print(diversions.reshape((1000, 4, 4)).mean(axis= 0)) # Note diagonal elements are diversion to outside good
Computing diversion ratios with respect to prices ... Finished after 00:00:01. [[0.12931491 0.66942658 0.10302167 0.09823685] [0.6779517 0.12696605 0.09697119 0.09811105] [0.15403161 0.14714563 0.4112486 0.28757415] [0.14947694 0.1462178 0.29176838 0.41253689]]
#Firm 3 and Firm 1 merge
product_data['merger_ids'] = product_data['firm_ids'].replace(3, 1)
changed_prices = updated_results.compute_prices(
firm_ids=product_data['merger_ids'],
costs=costs
)
#format new and old prices into 4*1000 matrix
old_prices = np.array(product_data['prices'])
old_prices = np.reshape(old_prices,(1000,4))
new_prices = changed_prices.reshape((1000, 4))
#calculate change in price by firm
change = np.divide(np.subtract(new_prices, old_prices),old_prices)
print(change.mean(axis= 0))
Solving for equilibrium prices ... Finished after 00:00:07. [0.01377562 0.00498321 0.04618769 0.00321074]
#calculating UPP for firm 3
markup_firm_1 = markups[::4]
diversion31 = diversions[:,[0]]
diversion31 = diversion31[2::4]
p13 = np.divide(old_prices[:,[0]], old_prices[:,[2]])
UPP3 = np.multiply(diversion31,markup_firm_1)
UPP3 = np.multiply(UPP3,p13)
UPP3.mean()*100
6.075665257774066
#calculating UPP for firm 1
markup_firm_3 = markups[2::4]
diversion13 = diversions[:,[2]]
diversion13 = diversion13[0::4]
p31 = np.divide(old_prices[:,[2]], old_prices[:,[0]])
UPP1 = np.multiply(diversion13,markup_firm_3)
UPP1 = np.multiply(UPP1,p31)
UPP1.mean()*100
2.0818489194598078
product_data.groupby('firm_ids')['change'].mean()
firm_ids 1.0 1.348172 2.0 0.490081 3.0 4.468684 4.0 0.317748 Name: change, dtype: float64