library(readxl) library(lubridate) library(TSA) library(NTS) library(NonlinearTSA) library(tsDyn) library(MSwM) library(quantmod) macro <- read_excel("D:/Programming Guide/data/macro.xls", col_types = c("date",rep("numeric",9)),na = 'NA') head(macro) # R Guide 19.1 Dummy Variables for Seasonality # See, R Guide: 7 Multiple Regression Using an APT-Style Model macro$dspread = c(NA, diff(macro$BMINUSA)) macro$dcredit = c(NA , diff(macro$CCREDIT)) macro$dprod = c(NA , diff(macro$INDPRO)) macro$dmoney = c(NA , diff(macro$M1SUPPLY )) macro$inflation = c(NA , diff(log(macro$CPI))) macro$rterm = c(NA , diff(macro$USTB10Y-macro$USTB3M)) macro$dinflation = c(NA ,100*diff(macro$inflation)) macro$rsandp = c(NA ,100*diff(log(macro$SANDP))) macro$ermsoft = c(NA ,100*diff(log(macro$MICROSOFT)))-macro$USTB3M/12 macro$ersandp = macro$rsandp - macro$USTB3M/12 lm_msoft = lm(ermsoft ~ ersandp + dprod + dcredit + dinflation + dmoney + dspread + rterm , data = macro ) summary(lm_msoft) library (car ) linearHypothesis (lm_msoft,c("dprod = 0","dcredit = 0","dmoney = 0","dspread = 0")) # Create monthly dummy variables macro$JANDUM = as.integer(month(macro$Date) == 1) macro$FEBDUM = as.integer(month(macro$Date) == 2) macro$MARDUM = as.integer(month(macro$Date) == 3) macro$APRDUM = as.integer(month(macro$Date) == 4) macro$MAYDUM = as.integer(month(macro$Date) == 5) macro$JUNDUM = as.integer(month(macro$Date) == 6) macro$JULDUM = as.integer(month(macro$Date) == 7) macro$AUGDUM = as.integer(month(macro$Date) == 8) macro$SEPDUM = as.integer(month(macro$Date) == 9) macro$OCTDUM = as.integer(month(macro$Date) == 10) macro$NOVDUM = as.integer(month(macro$Date) == 11) macro$DECDUM = as.integer(month(macro$Date) == 12) ts.plot(macro$ermsoft) # Stepwise regression lm_start = lm(ermsoft ~1,data = macro[-2 ,]) step (lm_start, direction = "forward",scope = formula(lm_msoft)) ## Outliers library(MASS) truehist(macro$ermsoft) macro$Date = as.Date(macro$Date) obs1 <- which(macro$ermsoft==min(macro$ermsoft,na.rm=T)) obs1 macro$Date[obs1] # Take into account big outliers, see R guide page 36-37 #macro$Date = as.Date(macro$Date) macro$APR00DUM = as.integer(macro$Date == as.Date(macro$Date[obs1])) #macro$APR00DUM = as.integer(macro$Date == as.Date ("2000-04-01")) smpl <- macro$ermsoft[(obs1+1):length(macro$ermsoft)] obs2 <- which(macro$ermsoft==min(macro$ermsoft[(obs1+1):length(macro$ermsoft)],na.rm=T)) obs2 macro$Date[obs2] macro$DEC00DUM = as.integer(macro$Date == as.Date (macro$Date[obs2])) #macro$DEC00DUM = as.integer(macro$Date == as.Date ("2000-12-01")) summary(lm(ermsoft ~ ersandp + dprod + dcredit + dinflation + dmoney + dspread + rterm + APR00DUM + DEC00DUM + JANDUM, data = macro )) # "Sell in May and go away" # Summer dummy SUMMERDUM <- macro$JUNDUM + macro$JULDUM + macro$AUGDUM # Dummy for other months OTHERDUM <- macro$JANDUM+macro$FEBDUM+macro$MARDUM+macro$APRDUM+ macro$MAYDUM+macro$SEPDUM+macro$OCTDUM+macro$NOVDUM+macro$DECDUM # Remove constant term using -1 summary(lm(ermsoft ~ ersandp + dprod + dcredit + dinflation + dmoney + dspread + rterm + APR00DUM + DEC00DUM + SUMMERDUM + OTHERDUM -1, data = macro )) #library(quantmod) getSymbols('^GSPC',src='yahoo',from="1928-01-01") head(GSPC) price <- GSPC[,4] plot(price) returns <- monthlyReturn(price,type="log") # Log returns JANDUM <- ifelse(month(returns)==1,1,0) FEBDUM <- ifelse(month(returns)==2,1,0) MARDUM <- ifelse(month(returns)==3,1,0) APRDUM <- ifelse(month(returns)==4,1,0) MAYDUM <- ifelse(month(returns)==5,1,0) JUNDUM <- ifelse(month(returns)==6,1,0) JULDUM <- ifelse(month(returns)==7,1,0) AUGDUM <- ifelse(month(returns)==8,1,0) SEPDUM <- ifelse(month(returns)==9,1,0) OCTDUM <- ifelse(month(returns)==10,1,0) NOVDUM <- ifelse(month(returns)==11,1,0) DECDUM <- ifelse(month(returns)==12,1,0) m1 <- lm(returns~JANDUM) summary(m1) SUMMER <- JUNDUM+JULDUM+AUGDUM m2 <- lm(returns~SUMMER) summary(m2) OTHER <- JANDUM+FEBDUM+MARDUM+APRDUM+MAYDUM+SEPDUM+OCTDUM+NOVDUM+DECDUM m3 <- lm(returns~SUMMER+OTHER-1) summary(m3) ### Cointegration analysis using a dummy library(tseries) library(fUnitRoots) SandPhedge <- read_excel("D:/Programming Guide/data/SandPhedge.xls", col_types = c("date","numeric","numeric")) View(SandPhedge) #head(SandPhedge) # Log prices SandPhedge$lspot = log(SandPhedge$Spot) SandPhedge$lfutures = log(SandPhedge$Futures) # Apply Phillips-Ouliaris test for cointegration dat <- cbind(SandPhedge$lspot,SandPhedge$lfutures) po.test(dat) # H0: no cointegration is rejected # Log returns SandPhedge$rspot = c(NA,100*diff(log(SandPhedge$Spot))) SandPhedge$rfutures = c(NA,100*diff(log(SandPhedge$Futures))) log_lm <- lm(SandPhedge$lspot ~ SandPhedge$lfutures) summary(log_lm) # Unitroot test applied to residuals urdfTest(log_lm$residuals,type="nc",lags=12)@test$test@teststat length(log_lm$residuals) # critical value -3.37 # Unit root in the residuals cannot be rejected -> no cointegration # Note: there is break in the residuals u <- log_lm$residuals[-247] ts.plot(ts(u, start = c(1997,9),frequency=12)) abline(h=0) summary(lm(SandPhedge$rspot[-1] ~ SandPhedge$rfutures[-1]+u-1)) # ECM is statisrically significant -> cointegration # Threshold cointegration Sign <- sign(u) pos <- ifelse(Sign==1,1,0) neg <- ifelse(Sign==-1,1,0) uplus <- pos*u uneg <- neg*u library(MASS) truehist(uplus) truehist(uneg) summary(lm(SandPhedge$rspot[-1] ~ SandPhedge$rfutures[-1]+uplus+uneg)) ### # 19.2 Estimating Markov Switching Models (Brooks: R Guide) #library(MSwM) UKHP <- read_excel("D:/Programming Guide/data/UKHP.xls", col_types = c("date","numeric")) #View(UKHP) head(UKHP) names (UKHP)[2] = "hp" UKHP$dhp = c(NA, 100*diff(UKHP$hp)/UKHP$hp[1:nrow(UKHP)-1]) summary(lm(UKHP$dhp ~1)) # Markov switching model msmodel = msmFit(lm(UKHP$dhp ~1),k=2,sw=c(T,T)) summary(msmodel) par(lwd =2,cex.axis = 2) plot(UKHP$Month[-1],msmodel@Fit@filtProb[,1], type ="l",xlab ="",ylab ="",main="State 2") plot(UKHP$Month[-1],msmodel@Fit@filtProb[,2], type ="l",xlab ="",ylab ="",,main="State 1") # Monthly unemployment rate getSymbols('UNRATE',src='FRED') UN <- UNRATE head(UN) plot(UN) #un <- log(1+UN/100) #plot(un) ar(UN) library(fUnitRoots) adfTest(ts(UN), lags=10, type="c") dates <- date(UN) #AR(1) mod.ar1 <- lm(UN~lag(UN)) summary(mod.ar1) #Model with only intercept mod <-lm(UN ~ 1) mod #Fit regime-switching model msm_intercept <- msmFit(mod, k=2, sw=c(T,T), p=0) summary(msm_intercept) #Fit regime-switching model with AR(1) model msm_ar1 <- msmFit(mod, k=2, sw=c(T,T,T), p=1) summary(msm_ar1) # State 1 probabilities plot(dates[-1],msm_ar1@Fit@filtProb[,1], type ="l",xlab ="",ylab ="",main="State 1 probabilities") # State 2 probabilities plot(dates[-1],msm_ar1@Fit@filtProb[,2], type ="l",xlab ="",ylab ="",main="State 2 probabilities") # Using NTS package #MSM.fit(un, p=1, nregime=2, sw=c(T,T,T)) # TAR model # Using TSA package tar.m <- tar(UN,p1=1,p2=1,d=2, print=T,is.constant1=T,is.constant2=T) tsdiag(tar.m) # Using NTS package Un <- as.double(UN) est <- uTAR(y=Un,p1=2,p2=2,d=1,thrQ=c(0,1),Trim=c(0.1,0.9),include.mean=TRUE,method="NeSS",k0=50) est$coef est$model1 est$model2 uTAR.pred(model=est,orig=length(Un),h=4) # SETAR model using nonlinearTSA package SETAR_model(UN,delay_order=1,lag_length=3, trim_value=0.15) # Using tsDyn package #STAR model fitting with automatic selection of the number of regimes based on LM tests mod.star1 <- star(Un, mTh=c(0,1), control=list(maxit=3000)) mod.star1 mod.star2 <- star(Un, noRegimes=3,mTh=c(0,1), control=list(maxit=3000)) mod.star2 addRegime(mod.star2) # LSTAR #Logistic Smooth Transition AutoRegressive model mod.lstar1 <- lstar(Un, m=2, mTh=c(0,1), control=list(maxit=3000)) mod.lstar1 ### State space models using kalman filter library(dlm) # We consider quartely deseasonilized GDP of the US economy. # Real Gross Domestic Product getSymbols('GDPC1',src='FRED') plot(GDPC1) head(GDPC1) tail(GDPC1) gdp <- ts(GDPC1, frequency=4, start=1947) head(gdp) Lgdp <- log(gdp) # Log transformation plot(gdp, xlab="", ylab="log US GDP") level0 <- Lgdp[1] slope0 <- mean(diff(Lgdp)) slope0*4*100 # Average GDP growth # We first compute the MLE of the 4 unknown parameters, # where the first 2 are the variances of measurement equation # and the state equation. The last two are AR(2) parameters. # The AR(2) parameters are defined by the dlmModArma function. buildGap <- function(u){ trend <- dlmModPoly(dV = 1e-7, dW = exp(u[1:2]), m0 = c(level0,slope0), C0 = 2*diag(2)) gap <- dlmModARMA(ar=u[4:5], sigma2=exp(u[3])) return(trend+gap)} init <- c(-3, -1, -3, .4, .4) outMLE <- dlmMLE(Lgdp, init, buildGap) dlmGap <- buildGap(outMLE$par) sqrt(diag(W(dlmGap))[1:3]) GG(dlmGap)[3:4, 3] # AR(2) parameters, probably nonstationary Mod(polyroot(c(1, -GG(dlmGap)[3 : 4, 3]))) plot(ARMAacf(ar=GG(dlmGap)[3 : 4, 3], lag.max = 20), ylim = c(0, 1), ylab = "ACF", xlab = "Lag", type = "h") # Not stable gdpSmooth <- dlmSmooth(Lgdp, dlmGap) plot(cbind(Lgdp, dropFirst(gdpSmooth$s[,1])), xlab = "", ylab = "Log GDP", lty = c("longdash", "solid"), col = c("darkgrey", "black"), plot.type = "single") plot(dropFirst(gdpSmooth$s[,1:3]), ann = F, yax.flip = TRUE) # Let us see how we could introduce stationarity constraints # in the MLE, using the ARtransPars function buildgapr <- function(u) { trend <- dlmModPoly(dV = 0.000001, dW = c(exp(u[1]), exp(u[2])), m0 = c(Lgdp[1], mean(diff(Lgdp))), C0 = 2 * diag(2)) gap <- dlmModARMA(ar = ARtransPars(u[4 : 5]), sigma2 = exp(u[3])) return(trend + gap) } init <- c(-3, -1, -3, .4, .4) outMLEr <- dlmMLE(Lgdp, init, buildgapr) outMLEr$value parMLEr <- c(exp(outMLEr$par[1 : 3])^.5, ARtransPars(outMLEr$par[4: 5])) round(parMLEr, 4) Mod(polyroot(c(1, -ARtransPars(outMLEr$par[4 : 5])))) plot(ARMAacf(ar = ARtransPars(outMLEr$par[4 : 5]), lag.max = 10), ylim = c(0,1), ylab = "ACF") modr <- buildgapr(outMLEr$par) outFr <- dlmFilter(Lgdp, modr) outSr <- dlmSmooth(outFr) ts.plot(cbind(Lgdp, outSr$s[-1, 1]), col = 1 : 2, ylab="GDP") plot.ts(ts(outSr$s[-1, c(1, 3)],frequency=4,start=1947),main="Potential output and output gap") plot.ts(ts(outSr$s[-1, c(1, 3)],frequency=4,start=1947),main="Potential output and output gap",ylab=c("GDP","Gap")) par(mfrow=c(1,1)) plot.ts(ts(outSr$s[-1, 1],frequency=4,start=1947),main="Potential GDP",ylab="" ) plot.ts(ts(outSr$s[-1, 3],frequency=4,start=1947),main="GDP gap",ylab="" ) # Time-varying CAPM regression model capm <- read.csv("D:/data/capm.csv") #View(capm) head(capm) ibm.r <- diff(log(capm$ibm)) # IBM returns mkt.r <- diff(log(capm$sp500)) # S&P 500 returns rf <- capm$riskfree ibm.exr <- ibm.r-rf[-1] # IBM excess return mkt.exr <- mkt.r-rf[-1] # Market excess return ibm.capm <- lm(ibm.exr~mkt.exr) summary(ibm.capm) # IBM CAPM names(ibm.capm) a <- ibm.capm$coefficients[1] b <- ibm.capm$coefficients[2] buildCapm <- function(u){ dlmModReg(mkt.exr, dV = exp(u[1]), dW = exp(u[2 : 3])) } outMLE <- dlmMLE(ibm.exr, parm = rep(0,3), buildCapm) exp(outMLE$par) outMLE$value mod <- buildCapm(outMLE$par) outS <- dlmSmooth(ibm.exr, mod) plot.ts(dropFirst(ts(outS$s[,1:2],start=c(1990,4),frequency=12)),main = "IBM Alpha and beta") plot.ts(dropFirst(ts(outS$s[,1],start=c(1990,4),frequency=12)),main = "IBM Alpha",ylab="") plot.ts(dropFirst(ts(outS$s[,2],start=c(1990,4),frequency=12)),main = "IBM Beta",ylab="") abline(h=b)