libname RMA "C:\Users\jarvah1\Downloads"; run; proc print data=RMA.AuditAnalytics (obs=10); run; proc contents data=RMA.AuditAnalytics; run; data fees; set RMA.AuditAnalytics; AF=AUDIT_FEES+AUDIT_RELATED_FEES; logAF=log(1+AF); * Big-4 dummy; BIG4=0; if AUDITOR_FKEY<5 then BIG4=1; * delete zero audit fee obs; if logAF=0 then delete; keep COMPANY_FKEY FISCAL_YEAR_ENDED logAF BIG4; run; proc sort data=fees nodupkey out=fees1; by COMPANY_FKEY FISCAL_YEAR_ENDED; run; proc contents data=RMA.compustat; run; * import compustat data; data compustat; set RMA.Compustat; if AT<10 then delete; * size proxy; logAT=log(AT); * leverage; LEV=LT/AT; * profitability; avAT=(AT + lag(AT))/2; ROA=IBC/avAT; * loss dummy; LOSS=0; if IBC<0 then LOSS=1; if GVKEY^=lag(GVKEY) then delete; if FYEAR<2000 then delete; * let's delete missing obs; if CIK="" then delete; if ROA=. then delete; if LEV=. then delete; keep CIK DATADATE logAT LEV ROA LOSS; run; * merge AuditAnalytics and Compustat; proc sql; create table thesis as select * from Compustat as a, fees1 as b where a.CIK=b.COMPANY_FKEY and a.DATADATE=b.FISCAL_YEAR_ENDED; quit; proc means data=thesis n mean std median min p1 p99 max maxdec=3; run; * delete outliers; proc univariate data=thesis noprint; var ROA LEV; output out=outliers pctlpts=1 99 pctlpre=ROA_ LEV_ pctlname=P1 P99; run; proc print data=outliers; run; data thesis1; if _n_=1 then set outliers; set thesis; if ROA=ROA_P99 then delete; * winsorize; if LEV=>LEV_P99 then LEV=LEV_P99; run; * audit fee model; * Is there a Big-4 premium?; proc reg data=thesis1; model logAF=BIG4 logAT LEV ROA LOSS / white vif; run; quit;