***********************************;
/* RESEARCH METHODS IN ACCOUNTING */
/* Exercise 28.1.2020 */
***********************************;
* our library (where the data is located);
libname rma
"\\home.org.aalto.fi\jarvah1\data\Downloads";
run;
/* Research hypotheses:
H1: Reporting a loss reduces the amount of bonus received by the CEO.
H2: Reporting a loss increase the probability of CEO turnover. */
data aaa;
set rma.execucomp;
if CEOANN="CEO"; /* <- keep only CEO observations */
run;
proc contents data=aaa; run;
proc print data=aaa (obs=15);
run;
* remove dublicates;
proc sort data=aaa nodupkey out=bbb; /* There was only 4 duplicates. */
by gvkey year;
run;
* import financial statement data;
data ccc;
set rma.compustat;
proc contents data=ccc; run;
proc print data=ccc (obs=15);
run;
* merge data;
proc sql;
create table ddd as
select * from bbb inner join ccc
on bbb.GVKEY=ccc.GVKEY and
bbb.YEAR=ccc.FYEAR;
quit;
proc sort data=ddd;
by GVKEY FYEAR;
data eee;
set ddd;
* CEO turnover in t+1;
TURNOVER=0; /* <- dummy variable takes value of 0*/
dif=LEFTOFC-DATADATE;
if dif>0 and dif<=365 then TURNOVER=1; /* <- dummy variable takes value of 1 if CEO will leave his/her seat in the incoming year */
* bonus;
if BONUS<0 or BONUS=. then BONUS=0; /* set negative or missing bonus equal to zero */
logBONUS=log(1+BONUS); /* natural logarithm of 1 + bonus */
* independent variables;
Size=log(AT);
if DLC=. then DLC=0; /* DLC = Debt in Current Liabilities - Total */
* leverage;
LEV=(DLC+DLTT)/AT; /* DLTT = Debt in Current Liabilities - Total */
* loss dummy;
LOSS=IBC<0; /* IBC = Income Before Extraordinary Items (from Cash Flow statement)*/
avAT=(AT+lag(AT))/2; /* average assets */
ROA=IBC/avAT; /* return on assets */
if SALE<=0 then SALE=.;
SalesGr=SALE/lag(SALE)-1; /* sales growth */
TobinQ=(PRCC_F*CSHO+LT)/AT; /* Tobin's Q */
SIC2=int(SIC/100); /* two-digit Standard Industrial Classification (SIC) code */
if GVKEY^=lag(GVKEY) then delete; /* we do not want to compare apples with oranges (i.e. company switch cases) */
run;
* delete missing observations in a separate data step;
data fff;
set eee;
if AGE=. then delete;
if Size=. then delete;
if LEV=. then delete;
if ROA=. then delete;
if SalesGr=. then delete;
if TobinQ=. then delete;
* exclude utility companies;
if SIC2=49 then delete;
* exclude financial service companies;
if SIC2>=60 and SIC2<=69 then delete;
* exclude small companies (sales less than 10m);
if SALE<10 then delete;
keep GVKEY YEAR CONM EXEC_FULLNAME AGE TURNOVER /* data looks nice when we keep only the variables what we need */
logBONUS Size LEV ROA SalesGr TobinQ LOSS;
run;
* outliers?;
proc means mean std min p1 median p99 max
maxdec=3 data=fff;
run;
proc univariate data=fff noprint;
var ROA SalesGr TobinQ LEV;
output out=f1 pctlpts=1 99
pctlpre=ROA_ SalesGr_ TobinQ_ LEV_
pctlname=P1 P99;
run;
proc print data=f1; run;
data ggg;
if _n_=1 then set f1;
set fff;
* winsorization;
if ROAROA_P99 then ROA=ROA_P99;
if SalesGrSalesGr_P99 then SalesGr=SalesGr_P99;
if TobinQTobinQ_P99 then TobinQ=TobinQ_P99;
if LEV>LEV_P99 then LEV=LEV_P99;
drop ROA_P1 SalesGr_P1 TobinQ_P1 LEV_P1
ROA_P99 SalesGr_P99 TobinQ_P99 LEV_P99;
run;
* Table 1 - Descriptive statistics;
proc means data=ggg n mean std q1 median q3 maxdec=3;
var TURNOVER logBONUS LOSS AGE LEV ROA SalesGr TobinQ
Size;
run;
* Table 2 - Correlations;
proc corr data=ggg nosimple pearson spearman
outp=pearson;
var TURNOVER logBONUS LOSS AGE LEV ROA SalesGr TobinQ
size;
run;
proc export data=pearson
outfile="\\home.org.aalto.fi\jarvah1\data\Downloads\pearson.xlsx" DBMS=xlsx replace;
run;
/* test for differences: CEO turnover vs. no CEO turnover */
proc ttest data=ggg;
var logBONUS LOSS AGE Size LEV ROA SalesGR TobinQ;
class TURNOVER;
run;
/* To produce a cross-tabulation, list the variables separated by an asterisk. This statement produces a
cross-tabulation showing the number of observations for each combination of CEO turnover by reporting a loss (or profit) */
proc freq data=ggg;
tables TURNOVER*LOSS / nocol nopercent expected chisq; /* CHISQ requests chi-square tests of homogeneity and measures of association. */
run; /* EXPECTED option requests the expected cell frequencies be included in the cells */
* Table 3 - OLS;
ods graphics off;
proc reg data=ggg;
model logBONUS=LOSS AGE Size LEV ROA SalesGR TobinQ / white;
run; quit;
* Table 4 - Logistic regression;
ods graphics on;
proc logistic data=ggg plots(only)=ROC;
class YEAR;
model TURNOVER(EVENT="1")=
LOSS AGE Size LEV ROA SalesGR TobinQ YEAR;
run; quit;
* The coefficient on loss is 0.3241. It tells you that the log-odds increases by .3241 if a company reports a loss. It's odds ratio is 1.383 (which is e power the coefficien estimate)
tells you that the probability of CEO turnover for loss reporting firms is 38.3% higher than for firms that a reporting profits. */
/* One approach to evaluating the predictive power of models for binary outcomes is the ROC curve.
The 45-degree line represents the expected ROC curve for a model with an intercept only, that is, one with no predictive power.
The more the curve departs from te 45-degree line, the greater the predictive power.
The standard statistic for summarizing that departure is the area under the curve, which here is reported as 0.6366. */