libname RMA "C:\Users\jarvah1\Downloads"; run; proc contents data=RMA.Compustat; run; * import compustat data; data compustat; set RMA.Compustat; if AT<10 then delete; * two-digit SIC industry; SIC2=int(SIC/100); * scaling variable; scale=(AT + lag(AT))/2; * Hribar & Collins (2002, JAR); * accruals; ACC=(IBC-OANCF)/scale; * change in sales; dSALES=dif(SALE)/scale; * PPE; PPE=PPEGT/scale; if PPE=. then PPE=0; * operating cash flows; CFO=OANCF/scale; if GVKEY^=lag(GVKEY) then delete; if ACC=. then delete; if dSALES=. then delete; keep GVKEY FYEAR SIC2 ACC DSALES PPE CFO; run; proc means n mean std min p1 p5 q1 median q3 p95 p99 max maxdec=3 data=compustat; var ACC DSALES PPE CFO; run; proc univariate data=compustat noprint; var ACC DSALES PPE CFO; output out=outliers pctlpts=1 99 pctlpre=ACC_ DSALES_ PPE_ CFO_ pctlname=P1 P99; run; proc print data=outliers; run; data aaa; if _n_=1 then set outliers; set compustat; if ACC=ACC_P99 then delete; if dSALES=dSALES_P99 then delete; *if PPE=PPE_P99 then delete; if CFO=CFO_P99 then delete; run; * after removing outliers; proc means n mean std min p1 p5 q1 median q3 p95 p99 max maxdec=3 data=aaa; var ACC DSALES PPE CFO; run; ods graphics off; * pooled model; proc reg data=aaa; model ACC=dSALES PPE CFO; run; * cross-sectional model; proc sort data=aaa; by SIC2 FYEAR; run; proc reg data=aaa outest=parms noprint adjrsq; model ACC=dSALES PPE CFO; by SIC2 FYEAR; run; quit; * Fama-MacBeth (1973) approach; proc means n mean t prt maxdec=3 data=parms; var Intercept dSALES PPE CFO _adjrsq_; run;