library(forecast) # Arima and auto.arima
library(fracdiff) # Fit and simulate long memory processes

# Exercise 6.1

# a)

# Read data and plot time series
const <- read.table("data/const.txt", header = TRUE, sep = ",", row.names = 1)
const <- ts(const[[1]], start = 1966, frequency = 12)
plot(const)

# Time series does not look stationary. Level of the time series changes and
# there is a seasonal component.

# b)

# Decompose time series
decomp <- stl(const, s.window = "periodic")
plot(decomp)

# c)

# Filter trend
trend_filter <- filter(const, c(1, rep(2, 11), 1) / 24)

# Decomposition trend
trend_stl <- decomp$time.series[, 2]

# Plot trends
plot(const, lty = 3, col = "black")
lines(trend_filter, lty = 2, col = "red")
lines(trend_stl, lty = 1, col = "blue")
legend("topleft", legend = c("Time series", "Filter", "STL"),
       col = c("black", "red", "blue"), lty = c(3, 2, 1))

# Trends almost identical
# Trend given by filter a bit rougher

# 6.2

# a)

# Read data and visualize
data <- read.table("data/arsimulation.txt", header = TRUE, row.names = 1)
data <- ts(data)
plot(data, yax.flip = TRUE)

# Visible peak in first time series X, that has the most heavy-tailed residuals.

# b)

# Fit AR(1) processes
fit_x <- Arima(data[, 1], order = c(1, 0, 0), include.mean = FALSE,
               method = "ML")
fit_y <- Arima(data[, 2], order = c(1, 0, 0), include.mean = FALSE,
               method = "ML")
fit_z <- Arima(data[, 3], order = c(1, 0, 0), include.mean = FALSE,
               method = "ML")

coefs <- c(fit_x$coef, fit_y$coef, fit_z$coef)
names(coefs) <- c("x", "y", "z")
round(coefs, 3)

# Estimates are relatively close to the true value of the parameter -1/2.

# c)

#' Create block bootstrap confidence interval
#'
#' @param series Univariate time series object of class ts.
#' @param m Number of bootstrap samples.
#' @param w Minimum number of consecutive observations in a bootstrap sample.
#' @param alpha Calculate (1-alpha) level confidence interval.
#'
#' @return A vector of length two, giving lower and upper bounds of the
#'    confidence interval.
bootstrap <- function(series, m = 5000, w = 10, alpha = 0.05) {
  n <- length(series)
  boot <- rep(NA, m)

  for (i in 1:m) {
    # Simulate window start and end points. Window size must be at least w.
    start <- 0
    end <- 0
    while (end - start < w - 1) {
      se <- sample(1:n, 2, replace = FALSE)
      start <- min(se)
      end <- max(se)
    }

    # Estimate parameter from bootstrap sample
    boot[i] <- Arima(series[start:end], order = c(1, 0, 0),
                     include.mean = FALSE, method = "ML")$coef[1]
  }

  # Construct confidence interval
  quantile(boot, c(alpha / 2, 1 - alpha / 2))
}

# Compute bootstrap confidence intervals
confint_x <- bootstrap(data[, 1])
confint_y <- bootstrap(data[, 2])
confint_z <- bootstrap(data[, 3])
confint_x
confint_y
confint_z

# d)

# The standard Cauchy distribution is exactly the Student's t-distribution
# with 1 degree of freedom. Student's t-distribution with k degrees of freedom
# has k-1 theoretical moments. As the variance of x_t and the variance of
# epsilon_t are undefined, Process X is not weakly stationary. Recall that,
# weak stationarity contains the assumption that the variance is time invariant
# and finite. For more information about ARMA processes with infinite variance,
# see Chapter 13.3 of (Brockwell and Davis 2009).

# Consequently, the AR(1) processes Y and Z have finite variances.
# Under the assumptions of this exercise and by previous theoretical exercises,
# we have that Processes Y and Z are weakly stationary.

# a)

# Read data
fracsim <- read.csv("data/fracsim.txt")
fracsim <- ts(fracsim[[1]])
length(fracsim) # 1000 observations

# Plot time series and ACF/PACF
par(mfrow = c(2, 2))
plot(fracsim)
acf(fracsim, lag.max = 50, main = "")
pacf(fracsim, lag.max = 50, main = "")

# No trend or seasonality, but ACF decays quite slowly.

# Fit ARMA process
model_auto <- auto.arima(fracsim, d = 0, D = 0, max.p = Inf, max.q = Inf,
                         max.P = 0, max.Q = 0, max.order = Inf)
model_auto

# Time series plot, ACF and PACF of residuals
par(mfrow = c(2, 2))
plot(model_auto$residuals, type = "l", ylab = "residuals")
acf(model_auto$residuals, lag.max = 50, main = "")
pacf(model_auto$residuals, lag.max = 50, main = "")

# Couple significant spikes in ACF/PACF

# b)

# Simulated sample is a realization of fractionally integrated noise
# with d = 0.45.

# Fit fractionally integrated noise mode
model_frac <- fracdiff(fracsim, nar = 0, nma = 0)
model_frac$d # Quite close to true value 0.45

# Residuals of fractionally integrated noise model behave similarly as the
# residuals given by auto.arima model.
par(mfrow = c(2, 2))
plot(model_frac$residuals, type = "l", ylab = "residuals")
acf(model_frac$residuals, lag.max = 50, main = "")
pacf(model_frac$residuals, lag.max = 50, main = "")