# 20 Panel Data Models

library(readxl)
library(plm)

panelx <- read_excel("D:/Programming Guide/data/panelx.xls",
col_types = c("numeric","numeric","numeric","numeric"),na = 'NA')
head(panelx)


data = pdata.frame(panelx,index=c("firm_ident","year"))
pdim(data)
head(data)

summary(data[c("return", "beta")])

pooled = plm(return ~ beta, model="pooling", data = data)
summary(pooled)

# This pooled regression assumes that the intercepts are the 
# same for each firm and for each year. This may be an 
# inappropriate assumption. Thus, next we (separately) 
# introduce fixed and random effects to the model. 
# The only aspect to change is the argument model to "within"
# in the above specification.

fixed = plm(return ~ beta, model="within", data = data)
summary(fixed)

# We can see that the estimate on the beta parameter is 
# negative and statistically significant here. An intercept 
# is not reported as there are 1734 groups (firms), each with
# different intercepts (fixed effects).

# We now estimate a random effects model. For this, we simply
# change the argument model to "random" in the plm function. 
# We leave all other specifications unchanged and press Enter 
# to generate the regression output.

# The slope estimate is again of a different order of magnitude 
# compared to both the pooled and the fixed effects regressions.
# As the results for the fixed effects and random effects models
# are quite different, it is of interest to determine which model
# is more suitable for our setting. To check this, we use the
# Hausman test. The null hypothesis of the Hausman test is that 
# the random effects (RE) estimator is indeed an effcient 
# (and consistent) estimator of the true parameters. If this 
# is the case, there should be no systematic difference between 
# the RE and FE estimators and the RE estimator would be preferred
# as the more effcient technique. In contrast, if the null is 
# rejected, the fixed effect estimator needs to be applied.


random = plm(return ~ beta, model="random", data = data)
summary(random)

# To run the Hausman test, we use the function phtest
# which only needs the two models as inputs. As we have 
# stored the two models before, it is sufficient to run
# the test by typing phtest(fixed,random).

phtest(fixed,random)

# The Chisq value for the Hausman test is 12.804 with a 
# corresponding p-value of 0.0003. Thus, the null
# hypothesis that the difference in the coefficients is
# not systematic is rejected at the 1% level, implying
# that the random effects model is not appropriate and 
# that the fixed effects specification is to be preferred.

### Performance of family firms

#familyfirms <- read.csv("D:/data/familyfirms.csv", header=T)
#View(familyfirms)

#library(readxl)
familyfirms <- read_excel("D:/data/familyfirms.xlsx")
View(familyfirms)
names(familyfirms)

min(familyfirms$agefirm) # Zero values

#obs <- which(familyfirms$agefirm == 0)
#familyfirms$agefirm[obs] <- 1

obs <- which(familyfirms$agefirm == 0)
obs
familyfirms$agefirm[obs] <- NA

#library(plm)

data = pdata.frame(familyfirms,index=c("company","year"))
pdim(data)
head(data)

summary(data[c("agefirm", "meanagef", "assets", "bs_volatility", "founderCEO", "Q")])

pooled = plm(log(Q) ~ founderCEO+log(assets)+log(agefirm)+log(bs_volatility), model="pooling", data = data)
summary(pooled)

# Fixed effects
fixed = plm(log(Q) ~ founderCEO+log(assets)+log(agefirm)+log(bs_volatility), model="within", data = data)
summary(fixed)

# Random effects
random = plm(log(Q) ~ founderCEO+log(assets)+log(agefirm)+log(bs_volatility), model="random", data = data)
summary(random)

phtest(fixed,random)
# The fixed effects specification is to be preferred