libname rma "\\home.org.aalto.fi\jarvah1\data\Downloads"; run; data aaa; set rma.compustat; * average assets; scale=(AT+lag(AT))/2; if scale=0 then scale=.; NI=IBC/scale; CFO=OANCF/scale; ACC=NI-CFO; lagCFO=lag(OANCF)/scale; PPE=PPEGT/scale; dSALES=dif(SALE)/scale; if gvkey^=lag(gvkey) then delete; run; proc print data=aaa (obs=5); run; proc contents data=aaa; run; proc sort data=aaa; by gvkey descending FYEAR; proc print data=aaa (obs=5); run; data bbb; set aaa; fCFO=lag(OANCF)/scale; if gvkey^=lag(gvkey) then delete; if scale<10 then delete; if lagCFO=. then delete; if ACC=. then delete; if fCFO=. then delete; if PPE=. then delete; if dSALES=. then delete; * delete utility and financial sectors; if SIC>=4900 and SIC<=4999 then delete; if SIC>=6000 and SIC<=6999 then delete; * 2-digit SIC code; SIC2=int(SIC/100); keep gvkey fyear conm SIC2 ACC lagCFO CFO fCFO PPE dSALES; run; /* descriptive statistics */ proc means data=bbb n mean std min p1 median p99 max maxdec=3; var ACC lagCFO CFO fCFO PPE dSALES; run; * delete outliers; proc univariate data=bbb noprint; var ACC lagCFO CFO fCFO PPE dSALES; output out=b1 pctlpts=1 99 pctlpre=ACC_ lagCFO_ CFO_ fCFO_ PPE_ dSALES_ pctlname=P1 P99; run; data ccc; if _n_=1 then set b1; set bbb; * trimming; if ACCACC_P99 then delete; if lagCFOlagCFO_P99 then delete; if CFOCFO_P99 then delete; if fCFOfCFO_P99 then delete; if PPEPPE_P99 then delete; if dSALESdSALES_P99 then delete; keep gvkey fyear conm SIC2 ACC lagCFO CFO fCFO PPE dSALES; run; proc means data=ccc n mean std min p1 median p99 max maxdec=3; var ACC lagCFO CFO fCFO PPE dSALES; run; * require at least 30 obs / industry regression; proc sort data=ccc; by SIC2; proc means data=ccc noprint; output out=c1 (drop=_type_ _freq_) n(ACC)=nbr; by SIC2; data ddd; merge ccc c1; by SIC2; if nbr<30 then delete; run; *****************************; * accruals model or earnings management model; ods graphics off; run; proc reg data=ddd outest=parms adjrsq noprint; model ACC=lagCFO CFO fCFO PPE dSALES; by SIC2; output out=eee residual=DACC predicted=NACC; run; quit; * Fama-McBeth approach ; proc means data=parms n mean t prt median maxdec=3; var intercept lagCFO CFO fCFO PPE dSALES _adjrsq_; run;