# 20 Panel Data Models library(readxl) library(plm) panelx <- read_excel("D:/Programming Guide/data/panelx.xls", col_types = c("numeric","numeric","numeric","numeric"),na = 'NA') head(panelx) data = pdata.frame(panelx,index=c("firm_ident","year")) pdim(data) head(data) summary(data[c("return", "beta")]) pooled = plm(return ~ beta, model="pooling", data = data) summary(pooled) # This pooled regression assumes that the intercepts are the # same for each firm and for each year. This may be an # inappropriate assumption. Thus, next we (separately) # introduce fixed and random effects to the model. # The only aspect to change is the argument model to "within" # in the above specification. fixed = plm(return ~ beta, model="within", data = data) summary(fixed) # We can see that the estimate on the beta parameter is # negative and statistically significant here. An intercept # is not reported as there are 1734 groups (firms), each with # different intercepts (fixed effects). # We now estimate a random effects model. For this, we simply # change the argument model to "random" in the plm function. # We leave all other specifications unchanged and press Enter # to generate the regression output. # The slope estimate is again of a different order of magnitude # compared to both the pooled and the fixed effects regressions. # As the results for the fixed effects and random effects models # are quite different, it is of interest to determine which model # is more suitable for our setting. To check this, we use the # Hausman test. The null hypothesis of the Hausman test is that # the random effects (RE) estimator is indeed an effcient # (and consistent) estimator of the true parameters. If this # is the case, there should be no systematic difference between # the RE and FE estimators and the RE estimator would be preferred # as the more effcient technique. In contrast, if the null is # rejected, the fixed effect estimator needs to be applied. random = plm(return ~ beta, model="random", data = data) summary(random) # To run the Hausman test, we use the function phtest # which only needs the two models as inputs. As we have # stored the two models before, it is sufficient to run # the test by typing phtest(fixed,random). phtest(fixed,random) # The Chisq value for the Hausman test is 12.804 with a # corresponding p-value of 0.0003. Thus, the null # hypothesis that the difference in the coefficients is # not systematic is rejected at the 1% level, implying # that the random effects model is not appropriate and # that the fixed effects specification is to be preferred. ### Performance of family firms #familyfirms <- read.csv("D:/data/familyfirms.csv", header=T) #View(familyfirms) #library(readxl) familyfirms <- read_excel("D:/data/familyfirms.xlsx") View(familyfirms) names(familyfirms) min(familyfirms$agefirm) # Zero values #obs <- which(familyfirms$agefirm == 0) #familyfirms$agefirm[obs] <- 1 obs <- which(familyfirms$agefirm == 0) obs familyfirms$agefirm[obs] <- NA #library(plm) data = pdata.frame(familyfirms,index=c("company","year")) pdim(data) head(data) summary(data[c("agefirm", "meanagef", "assets", "bs_volatility", "founderCEO", "Q")]) pooled = plm(log(Q) ~ founderCEO+log(assets)+log(agefirm)+log(bs_volatility), model="pooling", data = data) summary(pooled) # Fixed effects fixed = plm(log(Q) ~ founderCEO+log(assets)+log(agefirm)+log(bs_volatility), model="within", data = data) summary(fixed) # Random effects random = plm(log(Q) ~ founderCEO+log(assets)+log(agefirm)+log(bs_volatility), model="random", data = data) summary(random) phtest(fixed,random) # The fixed effects specification is to be preferred