from IPython.display import HTML
HTML(filename='contraction_mapping.html')


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pyblp
pyblp.options.verbose = True
pyblp.options.digits = 4
from tabulate import tabulate
import time

product_data = pd.read_excel('boat_data.xlsx')


# Own cost shifter
product_data = product_data.rename(columns={'cost_shifter': 'demand_instruments0'})

# Closest competitor's cost shifter
product_data['demand_instruments1']= product_data.groupby(['market_ids', 'roof'])['demand_instruments0'].transform("sum") - product_data['demand_instruments0']

# Closest competitor's quality
product_data['demand_instruments2']= product_data.groupby(['market_ids', 'roof'])['quality'].transform("sum") - product_data['quality']

# Sum of the other competitors' cost shifters
product_data['demand_instruments3']= product_data.groupby(['market_ids'])['demand_instruments0'].transform("sum") - product_data['demand_instruments0'] - product_data['demand_instruments1']

# Sum of the other competitors' qualities
product_data['demand_instruments4']= product_data.groupby(['market_ids'])['quality'].transform("sum") - product_data['quality'] - product_data['demand_instruments2']


product_data.head()


X1_formulation = pyblp.Formulation('0 + quality + length + prices + roof') #Linear demand parameters
X2_formulation = pyblp.Formulation('0 + roof') #Nonlinear demand model, i.e. random coef variabels. 0 for no random coefficient on the constant
product_formulations = (X1_formulation, X2_formulation)

n_sim = 5000
mc_integration = pyblp.Integration('monte_carlo', size = n_sim, specification_options={'seed': 0}) ## Monte carlo simulation for integration in the inner loop
problem = pyblp.Problem(product_formulations, product_data, integration = mc_integration) ## Formulate problem: combine Formulation, product_data, and the Integration configuration

Initializing the problem ...
Initialized the problem after 00:00:02.

Dimensions:
==========================================
 T     N     F      I      K1    K2    MD 
----  ----  ---  -------  ----  ----  ----
1000  4000   4   5000000   4     1     8  
==========================================

Formulations:
============================================================
       Column Indices:            0       1       2      3  
-----------------------------  -------  ------  ------  ----
 X1: Linear Characteristics    quality  length  prices  roof
X2: Nonlinear Characteristics   roof                        
============================================================


bfgs = pyblp.Optimization('bfgs', {'gtol': 1e-4}) ## Use Broyden–Fletcher–Goldfarb–Shanno algorithm within PyBlp
initial_sigma = np.ones((1,1)) ## An initial guess for the nonlinear parameters 
#Note that as meantioned in the tutorial, this is a simpler, non-default optimization configuration 
#and the tolerance is relatively loose.


# Solve in parallel

with pyblp.parallel(6):
    results = problem.solve(
    initial_sigma, # Initial sigmas
    optimization=bfgs, ## Artelys Knitro is best practice but commercial
    method='1s' ## 1 Step GMM, it should be okay if you didn't specify this
)

Starting a pool of 6 processes ...
Started the process pool after 00:00:00.
Solving the problem ...

Nonlinear Coefficient Initial Values:
==================
Sigma:     roof   
------  ----------
 roof   +1.000E+00
==================
Starting optimization ...

GMM   Computation  Optimization   Objective   Fixed Point  Contraction  Clipped  Objective    Objective    Gradient             
Step     Time       Iterations   Evaluations  Iterations   Evaluations  Shares     Value     Improvement     Norm       Theta   
----  -----------  ------------  -----------  -----------  -----------  -------  ----------  -----------  ----------  ----------
 1     00:03:48         0             1          7504         23273        0     +1.933E+02               +8.415E+01  +1.000E+00
 1     00:03:21         0             2          8837         27088        0     +1.041E+02  +8.913E+01   +8.319E+01  +2.010E+00
 1     00:03:10         0             3          10022        30705        0     +1.249E+02               +1.127E+02  +6.050E+00
 1     00:03:26         0             4          9574         29313        0     +1.214E+01  +9.200E+01   +2.271E+00  +3.968E+00
 1     00:03:51         1             5          9631         29488        0     +1.213E+01  +1.236E-02   +1.977E+00  +4.050E+00
 1     00:03:16         2             6          9605         29408        0     +1.211E+01  +2.084E-02   +1.322E+00  +4.037E+00
 1     00:04:41         3             7          9581         29358        0     +1.209E+01  +1.690E-02   +2.621E-03  +4.012E+00
 1     00:04:20         4             8          9578         29362        0     +1.209E+01  +6.654E-08   +3.522E-06  +4.012E+00

Optimization completed after 00:29:53.
Computing the Hessian and estimating standard errors ...
Computed results after 00:09:12.

Problem Results Summary:
======================================================================================
GMM   Objective    Gradient               Clipped  Weighting Matrix  Covariance Matrix
Step    Value        Norm      Hessian    Shares   Condition Number  Condition Number 
----  ----------  ----------  ----------  -------  ----------------  -----------------
 1    +1.209E+01  +3.522E-06  +5.162E+01     0        +4.491E+02        +1.162E+04    
======================================================================================

Cumulative Statistics:
===========================================================================
Computation  Optimizer  Optimization   Objective   Fixed Point  Contraction
   Time      Converged   Iterations   Evaluations  Iterations   Evaluations
-----------  ---------  ------------  -----------  -----------  -----------
 00:39:05       Yes          5             9          83910       257357   
===========================================================================

Nonlinear Coefficient Estimates (Robust SEs in Parentheses):
====================
Sigma:      roof    
------  ------------
 roof    +4.012E+00 
        (+4.019E-01)
====================

Beta Estimates (Robust SEs in Parentheses):
======================================================
  quality        length        prices         roof    
------------  ------------  ------------  ------------
 +2.008E+00    +5.285E-01    -2.042E+00    +3.812E+00 
(+7.854E-02)  (+3.284E-02)  (+8.561E-02)  (+1.432E-01)
======================================================
Terminating the pool of 6 processes ...
Terminated the process pool after 00:00:00.


results

Problem Results Summary:
======================================================================================
GMM   Objective    Gradient               Clipped  Weighting Matrix  Covariance Matrix
Step    Value        Norm      Hessian    Shares   Condition Number  Condition Number 
----  ----------  ----------  ----------  -------  ----------------  -----------------
 1    +1.209E+01  +3.522E-06  +5.162E+01     0        +4.491E+02        +1.162E+04    
======================================================================================

Cumulative Statistics:
===========================================================================
Computation  Optimizer  Optimization   Objective   Fixed Point  Contraction
   Time      Converged   Iterations   Evaluations  Iterations   Evaluations
-----------  ---------  ------------  -----------  -----------  -----------
 00:39:05       Yes          5             9          83910       257357   
===========================================================================

Nonlinear Coefficient Estimates (Robust SEs in Parentheses):
====================
Sigma:      roof    
------  ------------
 roof    +4.012E+00 
        (+4.019E-01)
====================

Beta Estimates (Robust SEs in Parentheses):
======================================================
  quality        length        prices         roof    
------------  ------------  ------------  ------------
 +2.008E+00    +5.285E-01    -2.042E+00    +3.812E+00 
(+7.854E-02)  (+3.284E-02)  (+8.561E-02)  (+1.432E-01)
======================================================


#Optimal instruments
updated_problem = results.compute_optimal_instruments(method='approximate').to_problem()
with pyblp.parallel(6):
    updated_results = updated_problem.solve(
        results.sigma,
        results.pi,
        optimization=bfgs,
        method='1s'
)

Computing optimal instruments for theta ...
Computed optimal instruments after 00:00:03.

Optimal Instrument Results Summary:
=======================
Computation  Error Term
   Time        Draws   
-----------  ----------
 00:00:03        1     
=======================
Re-creating the problem ...
Re-created the problem after 00:00:00.

Dimensions:
==========================================
 T     N     F      I      K1    K2    MD 
----  ----  ---  -------  ----  ----  ----
1000  4000   4   5000000   4     1     5  
==========================================

Formulations:
============================================================
       Column Indices:            0       1       2      3  
-----------------------------  -------  ------  ------  ----
 X1: Linear Characteristics    quality  length  prices  roof
X2: Nonlinear Characteristics   roof                        
============================================================
Starting a pool of 6 processes ...
Started the process pool after 00:00:00.
Solving the problem ...

Nonlinear Coefficient Initial Values:
==================
Sigma:     roof   
------  ----------
 roof   +4.012E+00
==================
Starting optimization ...

GMM   Computation  Optimization   Objective   Fixed Point  Contraction  Clipped  Objective    Objective    Gradient             
Step     Time       Iterations   Evaluations  Iterations   Evaluations  Shares     Value     Improvement     Norm       Theta   
----  -----------  ------------  -----------  -----------  -----------  -------  ----------  -----------  ----------  ----------
 1     00:02:56         0             1          9578         29362        0     +3.220E-04               +3.200E-01  +4.012E+00
 1     00:02:59         0             2          9456         28925        0     +7.941E+00               +4.963E+01  +3.692E+00
 1     00:02:53         0             3          9578         29361        0     +1.689E-10  +3.220E-04   +2.317E-04  +4.010E+00
 1     00:02:52         1             4          9590         29388        0     +1.902E-18  +1.689E-10   +2.460E-08  +4.010E+00

Optimization completed after 00:11:40.
Computing the Hessian and estimating standard errors ...
Computed results after 00:08:41.

Problem Results Summary:
======================================================================================
GMM   Objective    Gradient               Clipped  Weighting Matrix  Covariance Matrix
Step    Value        Norm      Hessian    Shares   Condition Number  Condition Number 
----  ----------  ----------  ----------  -------  ----------------  -----------------
 1    +1.902E-18  +2.460E-08  +1.590E+02     0        +4.324E+04        +3.907E+03    
======================================================================================

Cumulative Statistics:
===========================================================================
Computation  Optimizer  Optimization   Objective   Fixed Point  Contraction
   Time      Converged   Iterations   Evaluations  Iterations   Evaluations
-----------  ---------  ------------  -----------  -----------  -----------
 00:20:21       Yes          2             5          47792       146424   
===========================================================================

Nonlinear Coefficient Estimates (Robust SEs in Parentheses):
====================
Sigma:      roof    
------  ------------
 roof    +4.010E+00 
        (+2.296E-01)
====================

Beta Estimates (Robust SEs in Parentheses):
======================================================
  quality        length        prices         roof    
------------  ------------  ------------  ------------
 +2.007E+00    +5.284E-01    -2.042E+00    +3.812E+00 
(+6.506E-02)  (+2.911E-02)  (+7.181E-02)  (+1.153E-01)
======================================================
Terminating the pool of 6 processes ...
Terminated the process pool after 00:00:00.


updated_results

Problem Results Summary:
======================================================================================
GMM   Objective    Gradient               Clipped  Weighting Matrix  Covariance Matrix
Step    Value        Norm      Hessian    Shares   Condition Number  Condition Number 
----  ----------  ----------  ----------  -------  ----------------  -----------------
 1    +1.902E-18  +2.460E-08  +1.590E+02     0        +4.324E+04        +3.907E+03    
======================================================================================

Cumulative Statistics:
===========================================================================
Computation  Optimizer  Optimization   Objective   Fixed Point  Contraction
   Time      Converged   Iterations   Evaluations  Iterations   Evaluations
-----------  ---------  ------------  -----------  -----------  -----------
 00:20:21       Yes          2             5          47792       146424   
===========================================================================

Nonlinear Coefficient Estimates (Robust SEs in Parentheses):
====================
Sigma:      roof    
------  ------------
 roof    +4.010E+00 
        (+2.296E-01)
====================

Beta Estimates (Robust SEs in Parentheses):
======================================================
  quality        length        prices         roof    
------------  ------------  ------------  ------------
 +2.007E+00    +5.284E-01    -2.042E+00    +3.812E+00 
(+6.506E-02)  (+2.911E-02)  (+7.181E-02)  (+1.153E-01)
======================================================


elasticities_own = updated_results.compute_elasticities().reshape((1000,4,4)).mean(axis=0).diagonal()
elasticities_own

Computing elasticities with respect to prices ...
Finished after 00:00:02.

array([-5.15695222, -5.16483513, -4.29922201, -4.17912948])


elasticities_ps1_logit = np.array([-5.600759, -5.289546, -4.072071, -3.934288])
elasticities_ps1_nlogit = np.array([-5.695437, -5.484764, -4.065844, -3.933168])

print("Differences in % w.r.t to logit: ", (elasticities_own-elasticities_ps1_logit)*100/elasticities_own)
print("Differences in % w.r.t to nested logit: ", (elasticities_own-elasticities_ps1_nlogit)*100/elasticities_own)

Differences in % w.r.t to logit:  [-8.60598986 -2.41461483  5.28353748  5.85867184]
Differences in % w.r.t to nested logit:  [-10.44191923  -6.19436762   5.42837764   5.88547168]


diversions = updated_results.compute_diversion_ratios().reshape((1000,4,4)).mean(axis=0)
print(diversions[0,2], "and" ,diversions[2,0])

Computing diversion ratios with respect to prices ...
Finished after 00:00:02.

0.10439516533920497 and 0.15222955093717164


D_13_logit = 0.1921416
D_31_logit = 0.3445739 
D_13_nlogit = 0.09510307
D_31_nlogit =  0.2321172 

print("Differences in % w.r.t to logit: ", (diversions[0,2]-D_13_logit)*100/diversions[0,2], "and" , (diversions[2,0]-D_31_logit)*100/diversions[2,0])
print("Differences in % w.r.t to nested logit: ", (diversions[0,2]-D_13_nlogit)*100/diversions[0,2], "and" , (diversions[2,0]-D_31_nlogit)*100/diversions[2,0])

Differences in % w.r.t to logit:  -84.05220143642265 and -126.35151840013832
Differences in % w.r.t to nested logit:  8.90088665410196 and -52.478410775710486

	prices	shares	length	quality	demand_instruments0	roof	firm_ids	market_ids	demand_instruments1	demand_instruments2	demand_instruments3	demand_instruments4
0	5.858942	0.498415	9.336710	1.229133	0.802284	1	1	1	1.520421	0.752459	1.094955	1.098638
1	4.600115	0.102175	7.092352	0.752459	1.520421	1	2	1	0.802284	1.229133	1.094955	1.098638
2	1.000218	0.004851	6.159682	0.341548	0.334998	0	3	1	0.759957	0.757090	2.322705	1.981593
3	3.233179	0.216493	5.780847	0.757090	0.759957	0	4	1	0.334998	0.341548	2.322705	1.981593
4	3.827983	0.354432	9.336710	0.573911	0.164722	1	1	2	0.762669	0.879428	1.483692	2.171100

Problem set 2 model solutions¶

Exercise 1¶

Exercise 3¶

Estimate the model using the exogenous cost and demand shifters as instruments (not optimal instruments)¶

Estimate the demand model constructing approximations to the optimal instruments¶

Problem 4¶

Problem 5¶