#getwd()
#setwd("D:/Rcode")

library(quantmod)
library(lubridate)
library(forecast)
library(ggplot2)
library(tseries)

# Download data from FED's FRED database

# Annual inflation rate
getSymbols('FPCPITOTLZGUSA',src='FRED')
INF <- FPCPITOTLZGUSA
tail(INF)
class(INF)
plot(INF, main = "US inflation")


# Monthly unemployment rate
getSymbols('UNRATE',src='FRED')
UN <- UNRATE
month <- month(UN)
year <- year(UN)
undata <- cbind(UN,year,month)
head(undata)
tail(undata)
UN <- undata[(undata$year > 1959 & undata$year < 2021) & undata$month==12]
UN <- ts(UN$UNRATE,start=c(1960,1),frequency=1)
plot(UN, main = "US unemployment", lwd=2)

### OLS model
length(INF)
length(UN)
# Phillips curve
m1 <- lm(INF~UN)
summary(m1)
# The slope parameter is significant at 5%
# but the sign is incorrect
# Note that the test is for a one-sided hypothesis
# of the form H0: b=0, the critical value is abt. 1.96
a <- m1$coefficients[1]
b <- m1$coefficients[2]
plot(as.double(UN),as.double(INF),ylab="INF",xlab="UN")
abline(a=a,b=b)

### ARIMA models

# Unemployment
un <- ts(log(1+as.double(UN)/100),start=c(1960,1),frequency=1)
class(un)

# Inflation
inf <- ts(log(1+as.double(INF)/100),start=c(1960))
class(inf)
head(inf)
#par(mfrow=c(2,1))

acf(inf)
pacf(inf)

m2 <- ar.mle(inf,order.max=10) # Select AR order
m2

m3 <- arma(inf,order=c(3,0)) # Estimate AR(3)
summary(m3)
# Remove intercept
m3 <- arma(inf,order=c(3,0),include.intercept = F)
summary(m3)
names(m3)

m3.new <- Arima(inf,order=c(3,0,0))
m3.new

# Plot characteristic roots from ARIMA model
# Produces a plot of the inverse AR and MA roots 
# of an ARIMA model. Inverse roots outside the unit 
# circle are shown in red.

plot(m3.new) 
autoplot(m3.new)

graphics.off()

m4 <- arima(inf,order=c(3,0,0)) # AR(3)
m4

length(inf)
length(un)

m5 <- arima(inf,order=c(3,0,0),xreg=un) # Exogenius variable un
m5
# Note that the parameter for UN has now the correct
# sign but it is not significat

m6 <- arima(inf,order=c(0,0,3)) # MA(3)
m6

m7 <- arima(inf,order=c(1,0,1)) # ARIMA(1,0,1)
m7

# Horse race
BIC(m4,m6,m7)


tsdiag(m7)
par(mar = c(1,1,1,1))# Changing area margins
tsdiag(m7)

predict(m7,6)

f <- predict(m7,6) 
names(f)
fore <- f$pred*100
se <- f$se*100
up <- fore+1.96*se    # 95% confidence limits
down <- fore-1.96*se
dat <- cbind(down,fore,up)
dat

ts.plot(dat,col=c("red","black","red"),main="Inflation forecast",lwd=2)

m8 <- auto.arima(inf)
m8
summary(m8)

# Now, the function identifies ARIMA(0,1,2), siggesting
# that inflation is nonstationary and therefore it has
# to be differenced

m8 <- arima(inf,order=c(0,1,2)) # ARIMA(0,1,2)
m8

# Horse race
BIC(m4,m6,m7,m8)


m7 <- Arima(inf,order=c(1,0,1))
m7

# Plot characteristic roots from ARIMA model
# Produces a plot of the inverse AR and MA roots 
# of an ARIMA model. Inverse roots outside the unit 
# circle are shown in red.

#par(mfrow=c(1,1))
plot(m7) 
autoplot(m7)
graphics.off()

### Backtesting

# US real gross domestic product, annual in 2012 dollars
getSymbols('GDPCA',src='FRED')
plot(GDPCA)
head(GDPCA)
tail(GDPCA)

GDP <- GDPCA$GDPCA 
length(GDP)

g.data <- ts(as.double(GDP),start=c(1929))

# Testing accuracy of the model by sampling:

sample = log(g.data[1:82])  # Estimation sample
test   = log(g.data[83:92]) # Test sample
arma.fit <- auto.arima(sample) # Fit an ARIMA model
arma.fit
arma.forecast <- forecast(arma.fit, h=10)
arma.fit.accuracy <- accuracy(arma.forecast,test)
arma.fit.accuracy

### Moving average in R

# Making the forecast with the moving average (ma)
g.movav <- forecast(ma(g.data, order=3), h=5)
g.movav

plot(g.movav,col="blue",lwd=4,
     main="Forecast of real GDP",
     ylab="In current USD")
years <- seq(from=1929,to=2020,by=1)
lines(years,GDP,col="red",type="b")

## Testing the accuracy of the forecasts
## Backtesting

# Testing accuracy of the model by sampling
g.ts.tst <- ts(g.data[1:20])
g.movav.tst <- forecast(ma(g.ts.tst,order=3),h=5)
accuracy(g.movav.tst,g.data[22:26])

### Basic exponential smmothing
g.exp <- ses(g.data,5,initial="simple")
g.exp # Simple exponential smoothing uses the last value as
      # the forecast and finds confidence intervals around it

plot(g.exp,col="blue",lwd=4,
     main="Forecast of real GDP",
     ylab="In current USD")
years <- seq(from=1929,to=2020,by=1)
lines(years,GDP,col="red",type="b")

### Holt-Winters exponential smoothing
# a.k.a triple exponential smoothing, takes into
# account seasonal trend, here we do not have seasonality
g.exp <- holt(g.data,5,initial="simple")
g.exp # Holt exponential smoothing

plot(g.exp,col="blue",lwd=4,
     main="Forecast of real GDP",
     ylab="In current USD")
#years <- seq(from=1929,to=2020,by=1)
lines(years,GDP,col="red",type="b")

### Seasonal decomposition

# Divide up a time series into three components: the trend, seasonal
# and remainder. Using Loess (STL) the STL is available within R
# via the stl() function

# We use the data nottem
# Average monthly temperature at Nottingham, 1920-1939
nottem.stl <- stl(nottem, s.window="periodic")
plot(nottem.stl)
summary(nottem.stl)

### Exponential models

# Both the Holt-Winters() function from the package stats,
# and the forecast package can be used to exponential smoothing

# Simple exponential: models level
fit <- HoltWinters(g.data, beta=FALSE,gamma=FALSE)
fit

# Double exponential: models level and trend
fit <- HoltWinters(g.data, gamma=FALSE)
fit

# Triple exponential: models level, trend and seasonal
# components. This fails on the example, as there is no
# seasonal component, since the data are annual
# fit <- HoltWinters(g.data)
# fit

# Predictive accuracy
accuracy(forecast(fit,5))

# Predict next 5 future values
forecast(fit,5)

plot(fit,col="blue",lwd=4,
     main="Forecast of real GDP",
     ylab="In current USD")
#years <- seq(from=1929,to=2020,by=1)
lines(years,GDP,col="red",type="b")

# Since the DGP data did not show a seasonal pattern and
# hence it was not possible to use the triple exponential
# mode, we will consider another example. Temperatures should
# certainly display a seasonal pattern. The dataset nottem
# contaibs the daily temperatures in Nottingham from 1920 and
# 1939. We use the Holt-Winters function and plot the results.

@library(datasets)
n.hw <- HoltWinters(nottem)
n.hw
n.hw.fc <- forecast(n.hw,50)
plot(n.hw.fc)