setwd("~/School/Prediction/Week_3") # 3.1 # In this course, the main focus is on stationary time series since they # are quite straightforward to analyze (sort of like linear least squares) # and can be approximated by the ARMA processes that we consider in this course. # Stationary time series x_t # (1) E[x_t] = mu for all T # (2) Var[x_t] = sigma^2 < inf for all T # (3) Cov(x_t, x_k) = gamma_{t - k} for all t,k # Thus the level of the time series should not change much, # the range of variation should remain the same # and no deterministic seasonality should exist # Trend = Systematic increase/decrease over the entire time period ts_plotter <- function(ts, lag = 10) { par(mfrow = c(2,2)) plot(ts) acf(ts, lag = lag) pacf(ts, lag = lag) spectr <- spectrum(ts, plot = F) periods <- 1/spectr$freq plot(periods, spectr$spec, xlab = 'Period', ylab = 'Spectral density') sprintf("Period with maximum spectral density is %.1f", periods[which.max(spectr$spec)]) } intel <- read.table('data/intel.txt', header = T) intel_ts <- ts(intel[,-1]) # Intel Stock closing price on exchange days at NYSE ts_plotter(intel_ts[,1]) # No systematic changes in the level over the time period though it does seem to vary # as a function of time # No apparent seasonality, the only peak in the spectrum is at the full period length # While it is difficult to infer whether the series is stationary, the underlying # process (stock prices) are definitely not since they evolve according to # supply and demand. So it's safer to assume non-stationarity. # Intel Stock daily volume on exchange days at NYSE ts_plotter(intel_ts[,2]) # No systematic changes in level but there are spikes in the volume which # have a decay time so the level is not stable # No apparent seasonality, the spectrum peaks are likely caused by the # sudden spikes in volume # Since volume reflects supply/demand, we can infer that the series # is unlikely to be stationary. sunspot <- read.table('data/sunspot.txt', header = T) sunspot_ts <- ts(sunspot$Spots, start = 1749) # Number of annual sunspots ts_plotter(sunspot_ts, lag = 50) # No trends in the level # Apparent seasonality with a (stochastically) changing amplitude, # the approximate length of the period is 11 years though it seems # somewhat unstable # The seasonal component implies variability in the level # so the process is not stationary. mlco2 <- read.table("data/mlco2.txt", header = TRUE, row.names = 1) mlco2_ts <- ts(mlco2$MLCO2, frequency = 12) # Year as the natural time unit # Monthly CO2 measurements of the Mauna Loa volcano ts_plotter(mlco2_ts, lag = 50) # Systematic increasing trend is apparent, possibly linear since the peaks # seem equally distant # Seasonality with a stable period length of 12 months, amplitude also stable # Clear that the level of the series evolves as a function of time -> # the series is not stationary sales <- read.table("data/sales.txt", header = TRUE, row.names = 4) sales_ts <- ts(sales$Sales, start = 1970, frequency = 12) # Monthly sales volume of a wholesaler ts_plotter(sales_ts, lag = 50) # Increasing trend is apparent # Clear seasonal behavior, the period would seem to be 12 months, # though the spectrum maximum is 6 months, perhaps the shape of the period # is distorting it # The amplitude seems to be increasing which makes sense for a larger customer # base # Given these observations, the non-stationarity is apparent passengers <- read.table("data/passengers.txt", header = TRUE, row.names = 4) passengers_ts <- ts(passengers$Passengers, start = 1949, frequency = 12) # Monthly airline passengers on international routes in USA ts_plotter(passengers_ts, lag = 50) # Increasing, slighly curved non-linear trend # Regular seasonality with a period length of 12 months, # amplitude grows as a function of level, reason is similar # to the sales data # Clear that the level of the series evolves as a function of time -> # the series is not stationary # 3.2 par(mfrow = c(1, 2), mar = c(2.5, 2.5, 1.5, 1.5)) plot(passengers_ts, main = "Passengers") plot(log(passengers_ts), main = "Log(Passengers)") # For this time series, the logarithmic transformation seems to be # variance stabilizing in the sense that the amplitude of the seasonal # component remains roughly constant. However, since the trend is probably # sub-exponential in the original scale, the logarithmic transformation # overcompensates the curvature. # 3.3 # Note that while ACF and PACF are defined only for stationary processes, # the estimates can be computed for any time series which can make # identifying non-stationarity easier since many violations are visible # in the plots. # The dashed lines indicate the critical value of a significance test # for a given autocorrelation. Values below the line are statistically # insignificant from zero. ts_plotter(intel_ts[,1]) # Both ACF and PACF decay or vanish quite quickly so # based on those plots, the series is quite stationary. ts_plotter(sunspot_ts, lag = 50) # Note the oscillatory nature of the estimated ACF # Note that certain seasonal processes can be stationary if # the dependence decays fast enough. In this case, the ACF # decays rather slowly with peaks appearing at many # multiples of the period length. # The PACF has some small peaks but they are quite minor # Homework 3.4 ?diff # lag refers to 1 - L^lag # differences refers to (1 - L)^differences # Think about which operations are relevant and in which order # they should be applied.