clear *Create paths to folders global data "C:\Users\aapo.kivinen\Dropbox (Aalto)\PhD\teaching\Principles_of_Economic_Analysis\aapo\session two\data" global output "C:\Users\aapo.kivinen\Dropbox (Aalto)\PhD\teaching\Principles_of_Economic_Analysis\aapo\session two\output" *Import data set to Stata import delimited "$data\vaestolaskenta.csv", clear keep vuosi id ika isa_ika tuloluokka yo sp isa_amas *Change variable names to english rename vuosi year rename ika age rename tuloluokka income rename isa_ika fathers_age rename yo high_school rename sp sex rename isa_amas fathers_occupation *Label sex variable label define sex 1 "male", add label define sex 2 "female", add label values sex sex *Label education variable label define high_school 0 "No academic high school degree", add label define high_school 1 "Has academic high school degree", add label values high_school high_school *Extend fathers occupation to all observations egen aux = max(fathers_occupation), by(id) replace fathers_occupation = aux drop aux label define fathers_occupation 1 "Employed", add label define fathers_occupation 2 "Entrepreneur", add label values fathers_occupation fathers_occupation *Calculate fathers age when child is born replace fathers_age = fathers_age-age egen aux = max(fathers_age), by(id) replace fathers_age = aux drop aux *Create age group variable global bin = 10 global binmax = 70 gen age_group = . replace age_group = ${bin}*ceil(age/${bin}) replace age_group = ${binmax}+1 if age> ${binmax} label define age_group 10 "0-10", add label define age_group 20 "10-20", add label define age_group 30 "20-30", add label define age_group 40 "30-40", add label define age_group 50 "40-50", add label define age_group 60 "50-60", add label define age_group 70 "60-70", add label values age_group age_group save "$data\cleaned_data.dta", replace