/*-----------------------------------------------------------------------------*
 In this example we generate (simulate) our variables
 We build on the following idea: we have ATET=1, but there are two ways of assigning the treatment 
 the first not random, the second is random
 In the first case, we got an estimate of the ATET very different from the true one equal to 1
 In the second one, the difference in outcomes between treated and non-treated is on average = 1, as it should be 
 ==> random assignment solves the problem of causal inference
*-----------------------------------------------------------------------------*/

clear all
set seed 12345

// Generate treatment_nonrandom
set obs 2
gen treatment_nonrandom=_n-1
list  													// or you can use "browse"
expand 10000
tab treatment_nonrandom									// N = 20,000, 50% treated and 50% non treated

// Counterfactual outcomes in the case of non-treatment and treatment, for the treated and non-treated
gen y0=3+rnormal(0,1) 		if treatment_nonrandom==1
replace y0=4+rnormal(0,1) 	if treatment_nonrandom==0	// note that we use "replace" here
gen y1=4+rnormal(0,1) 		if treatment_nonrandom==1
replace y1=5+rnormal(0,1) 	if treatment_nonrandom==0
tab treatment_nonrandom, sum(y0)						
tab treatment_nonrandom, sum(y1)

// Observed outcomes
gen outcome_nonrandom=y1 if treatment_nonrandom==1
replace outcome_nonrandom=y0 if treatment_nonrandom==0
assert outcome_nonrandom != . & y0 !=. & y1 != .		// Check 

// What is ATET?
// On average, y1 when D=1 (4 + noise) - y0 when D=1 (3 + noise) = 1
// Let's check the observed difference in outcomes between treated and controls:
tab treatment_nonrandom, sum(outcome_nonrandom)			// on average, 0
reg outcome_nonrandom treatment_nonrandom				// this is confirmed by regression (non sta diff from 0)

// Now let's compute the ATET, but with random assignment
gen treatment_random = (runiform()>0.5)
gen outcome_random=y1 if treatment_random==1
replace outcome_random=y0 if treatment_random==0
tab treatment_random, sum(outcome_random)				// outs. when treat is assigned randomly: difference is 1, as it should be according to observed difference
reg outcome_random treatment_random