libname RMA 'C:\Users\classtoolo\Downloads'; run; proc contents data=RMA.AuditAnalytics; run; data fees; set RMA.AuditAnalytics; AF=AUDIT_FEES+AUDIT_RELATED_FEES; if AF=<0 then AF=.; logAF=log(AF); * Big-4 dummy; BIG4=0; if AUDITOR_FKEY<5 then BIG4=1; * delete missing obs; if logAF=. then delete; drop AUDIT_FEES AUDIT_RELATED_FEES; run; * delete dublicate obs; proc sort data=fees nodupkey out=fees1; by COMPANY_FKEY FISCAL_YEAR_ENDED; run; proc contents data=fees1; proc print data=fees1 (obs=100); run; /* Let's import COMPUSTAT data*/ data aaa; set RMA.Compustat; * size proxy; if AT=0 then AT=.; logAT=log(AT); * leverage; LEV=LT/AT; * average total assets; avAT=(AT+lag(AT))/2; if GVKEY^=lag(GVKEY) then avAT=.; * profitability; ROA=IBC/avAT; * loss dummy; * 1. way; LOSS=0; if ROA<0 then LOSS=1; * 2. way; LOSS2=ROA<0; if FYEAR<2000 then delete; if ROA=. then delete; if LEV=. then delete; if CIK=. then delete; keep CONM DATADATE logAT LEV ROA LOSS CIK; run; proc sort data=aaa nodupkey out=aaa1; by CIK DATADATE; run; proc means data=aaa; run; * MERGE; proc sql; create table thesis as select * from aaa1 as a, fees1 as b where a.CIK=b.COMPANY_FKEY and a.DATADATE=b.FISCAL_YEAR_ENDED; run; quit; * descriptive statistics; proc means data=thesis n mean std min p1 p25 median q3 p99 max maxdec=3; var logAF logAT LEV ROA LOSS BIG4; run; * remove outliers; proc univariate data=thesis noprint; var ROA LEV; output out=outliers pctlpts=1 99 pctlpre=ROA_ LEV_ pctlname=P1 P99; run; proc print data=outliers; run; data thesis1; if _n_=1 then set outliers; set thesis; * delete outliers; if ROA=ROA_P99 then delete; if LEV=LEV_P99 then delete; drop ROA_P1 LEV_P1 ROA_P99 LEV_P99; run; * descriptive statistics after removing ROA and LEV outliers; proc means data=thesis1 n mean std min p1 p25 median q3 p99 max maxdec=3; title 'Descriptive statistics'; var logAF logAT LEV ROA LOSS BIG4; run; proc corr pearson spearman nosimple data=thesis1; title 'Correlations'; var logAF logAT LEV ROA LOSS BIG4; run; * OLS model; ods graphics on; proc reg data=thesis1; title 'Fee model'; model logAF=logAT LEV ROA LOSS BIG4 / white vif; run; quit; * Big-4 premium over time; data thesis2; set thesis1; year=year(FISCAL_YEAR_ENDED); run; proc sort data=thesis2; by year; run; ods graphics off; proc reg data=thesis2 outest=parms; title 'Fee model by year'; model logAF=logAT LEV ROA LOSS BIG4; by year; run; quit; * Fama-BacBeth approach; proc means data=parms n mean t prt; title 'average values of coefficient estimates'; var Intercept logAT LEV ROA LOSS BIG4; run;