Portfolio Optimization
Chapter 4 - Financial Data: Time Series Modeling

R code

Published

October 1, 2024

R code examples for Chapter 4 of the book:

Daniel P. Palomar (2024). Portfolio Optimization: Theory and Application. Cambridge University Press.

Loading packages

The following packages are used in the examples:

# basic finance
library(xts)                    # to manipulate time series of stock data
library(pob)                    # book package with financial data
                                # install with: devtools::install_github("dppalomar/pob")

# data
library(RcppRoll)               # rolling methods
library(TTR)                    # exponentially moving average
library(rugarch)                # time series models
library(MARSS)                  # Kalman
library(stochvol)               # to compute volatility envelope

# plotting
library(ggplot2)                # for nice plots
library(patchwork)              # for combining plots
library(scales)

Temporal structure

Example of a synthetic Gaussian AR(1) time series:

library(rugarch)

# specify an AR(1) model with given coefficients and parameters
arma_fixed_spec <- arfimaspec(mean.model = list(armaOrder = c(1,0), include.mean = TRUE), 
                              fixed.pars = list(mu = 0.01, ar1 = -0.9, sigma = 0.2))

# simulate one path
T <- 300
set.seed(42)
path_arma <- arfimapath(arma_fixed_spec, n.sim = T)
x <- path_arma@path$seriesSim

data.frame(t = 1:T, x = x) |>
  ggplot(aes(x = t, y = x)) +
  geom_line(linewidth = 0.8, color = "blue", show.legend = FALSE) +
  xlab(element_blank()) + ylab(element_blank()) + ggtitle("AR(1) time series") 

Mean modeling

Moving average (MA)

Forecasting with moving average:

library(RcppRoll)  # fast rolling means

# extract market data
y <- log(SP500_2015to2020$index["2020"])
y0 <- y[1]
x <- diff(y)[-1]
colnames(y) <- colnames(x) <- "true"
y_all <- y
x_all <- x


# MA(10) on log-prices y
y_forecast <- xts(roll_meanr(y, n = 20, fill = NA), index(y))
colnames(y_forecast) <- "MA(20) on log-prices"
y_all <- cbind(y_all, lag(y_forecast),         check.names = FALSE)
x_all <- cbind(x_all, lag(y_forecast - y)[-1], check.names = FALSE)

# MA(10) on log-returns x
x_forecast <- xts(roll_meanr(x, n = 20, fill = NA), index(x))
colnames(x_forecast) <- "MA(20) on log-returns"
x_all <- cbind(x_all, lag(x_forecast)[-1], check.names = FALSE)
y_all <- cbind(y_all, lag(x_forecast + y), check.names = FALSE)

y_error <- xts(apply(as.matrix(y_all), 2, function(column) (column - as.vector(y_all[, 1]))), index(y_all))


# plots
p_y <- ggplot(fortify(y_all, melt = TRUE), aes(x = Index, y = Value, color = Series)) +
  geom_line(linewidth = 1) +
  scale_x_date(date_breaks = "2 months", date_labels = "%b %Y", date_minor_breaks = "1 day") +
  scale_color_manual(values = c("black", hue_pal()(2))) +
  theme(legend.title = element_blank()) +
  xlab(element_blank()) + ylab(element_blank()) + ggtitle("Log-price time series")

p_x <- ggplot(fortify(x_all, melt = TRUE), aes(x = Index, y = Value, color = Series)) +
  geom_line(linewidth = 1) +
  scale_x_date(date_breaks = "2 months", date_labels = "%b %Y", date_minor_breaks = "1 day") +
  scale_color_manual(values = c("black", hue_pal()(2))) +
  theme(legend.title = element_blank()) +
  xlab(element_blank()) + ylab(element_blank()) + ggtitle("Log-return time series")

p_y_error <- ggplot(fortify(y_error, melt = TRUE), aes(x = Index, y = Value, color = Series)) +
  geom_line(linewidth = 1) +
  scale_x_date(date_breaks = "2 months", date_labels = "%b %Y", date_minor_breaks = "1 day") +
  scale_color_manual(values = c("black", hue_pal()(2))) +
  theme(legend.title = element_blank()) +
  xlab(element_blank()) + ylab(element_blank()) + ggtitle("Forecast error time series")

p_y / p_x / p_y_error + plot_layout(guides = "collect")

# table
df_MSE <- colMeans(y_error^2, na.rm = TRUE)[-1] |> rbind() |> as.data.frame()
knitr::kable(df_MSE, booktabs = TRUE, linesep = "", digits = 6)
MA(20) on log-prices MA(20) on log-returns
0.004032 0.000725

ARMA

Forecasting with ARMA models:

library(rugarch)

# extract market data
y <- log(SP500_2015to2020$index["2020"])
x <- diff(y)[-1]
colnames(y) <- colnames(x) <- "true"
y_all <- y
x_all <- x

T <- nrow(x)
T_tst <- round(0.8*nrow(x))



# model specification of an iid
spec <- arfimaspec(mean.model = list(armaOrder = c(0,0), include.mean = TRUE))
modelroll <- arfimaroll(spec = spec, data = x, n.ahead = 1,
                        forecast.length = T_tst, refit.every = 10, refit.window = "moving")
x_forecast <- xts(c(rep(NA, T-T_tst), modelroll@forecast$density$Mu), index(x))
colnames(x_forecast) <- "i.i.d."
x_all <- cbind(x_all, x_forecast, check.names = FALSE)
y_all <- cbind(y_all, x_forecast + lag(y), check.names = FALSE)



# model specification of an AR(1)
spec <- arfimaspec(mean.model = list(armaOrder = c(1,0), include.mean = TRUE))
modelroll <- arfimaroll(spec = spec, data = x, n.ahead = 1,
                        forecast.length = T_tst, refit.every = 10, refit.window = "moving")
x_forecast <- xts(c(rep(NA, T-T_tst), modelroll@forecast$density$Mu), index(x))
colnames(x_forecast) <- "AR(1)"
x_all <- cbind(x_all, x_forecast, check.names = FALSE)
y_all <- cbind(y_all, x_forecast + lag(y), check.names = FALSE)



# model specification of an MA(1)
spec <- arfimaspec(mean.model = list(armaOrder = c(0,1), include.mean = TRUE))
modelroll <- arfimaroll(spec = spec, data = x, 
                        forecast.length = T_tst, refit.every = 10, refit.window = "moving")
x_forecast <- xts(c(rep(NA, T-T_tst), modelroll@forecast$density$Mu), index(x))
colnames(x_forecast) <- "MA(1)"
x_all <- cbind(x_all, x_forecast, check.names = FALSE)
y_all <- cbind(y_all, x_forecast + lag(y), check.names = FALSE)


# model specification of an ARMA(1,1)
spec <- arfimaspec(mean.model = list(armaOrder = c(1,1), include.mean = TRUE))
modelroll <- arfimaroll(spec = spec, data = x, 
                        forecast.length = T_tst, refit.every = 10, refit.window = "moving")
x_forecast <- xts(c(rep(NA, T-T_tst), modelroll@forecast$density$Mu), index(x))
colnames(x_forecast) <- "ARMA(1,1)"
x_all <- cbind(x_all, x_forecast, check.names = FALSE)
y_all <- cbind(y_all, x_forecast + lag(y), check.names = FALSE)

y_error <- xts(apply(as.matrix(y_all), 2, function(column) (column - as.vector(y_all[, 1]))), index(y_all))


# plots
p_y <- ggplot(fortify(y_all, melt = TRUE), aes(x = Index, y = Value, color = Series)) +
  geom_line(linewidth = 1) +
  scale_x_date(date_breaks = "2 months", date_labels = "%b %Y", date_minor_breaks = "1 day") +
  scale_color_manual(values = c("darkslategray", hue_pal()(4))) +
  theme(legend.title = element_blank()) +
  xlab(element_blank()) + ylab(element_blank()) + ggtitle("Log-price time series")

p_x <- ggplot(fortify(x_all, melt = TRUE), aes(x = Index, y = Value, color = Series)) +
  geom_line(linewidth = 1) +
  scale_x_date(date_breaks = "2 months", date_labels = "%b %Y", date_minor_breaks = "1 day") +
  scale_color_manual(values = c("darkslategray", hue_pal()(4))) +
  theme(legend.title = element_blank()) +
  xlab(element_blank()) + ylab(element_blank()) + ggtitle("Log-return time series")

p_y_error <- ggplot(fortify(y_error, melt = TRUE), aes(x = Index, y = Value, color = Series)) +
  geom_line(linewidth = 1) +
  scale_x_date(date_breaks = "2 months", date_labels = "%b %Y", date_minor_breaks = "1 day") +
  scale_color_manual(values = c("darkslategray", hue_pal()(4))) +
  theme(legend.title = element_blank()) +
  xlab(element_blank()) + ylab(element_blank()) + ggtitle("Forecast error time series")

p_y / p_x / p_y_error + plot_layout(guides = "collect")

# table
df_MSE <- colMeans(y_error^2, na.rm = TRUE)[-1] |> rbind() |> as.data.frame()
knitr::kable(df_MSE, booktabs = TRUE, linesep = "", digits = 6)
i.i.d. AR(1) MA(1) ARMA(1,1)
0.000754 0.000793 0.000805 0.000914

Kalman

library(MARSS)

# extract market data
y <- log(SP500_2015to2020$index["2020"])
x <- diff(y)[-1]
mean_x <- as.numeric(mean(x))
var_x <- as.numeric(var(x))
colnames(y) <- colnames(x) <- "true"
y_all <- y
x_all <- x


# Kalman on log-returns x
model <- MARSS(rbind(as.vector(x)),
               model = list(Z = matrix(1),  A = matrix(0), R = matrix(var_x),      #  observation equation
                            B = matrix(1),  U = matrix(0), Q = matrix(list("1")),  # state transition equation
                            x0 = matrix(mean_x), V0 = matrix(var_x)),              # initial state
               silent = TRUE)
kfks <- MARSSkf(model)
x_filtering <- xts(c(kfks$xtt), index(x))  # the forecast is just the filtering delayed in our case because B = 1
colnames(x_filtering) <- "Kalman on log-returns (dynamic)"
x_all <- cbind(x_all, lag(x_filtering)[-1], check.names = FALSE)
y_all <- cbind(y_all, lag(x_filtering + y), check.names = FALSE)


# Kalman on log-prices y (static mu)
model <- MARSS(rbind(as.vector(y)),
               model = list(Z = matrix(1),  A = matrix(0),          R = matrix(list("var_obs")),  #  observation equation
                            B = matrix(1),  U = matrix(list("mu")), Q = matrix(list("var_state")),  # state transition equation
                            x0 = matrix(as.numeric(y[1])), V0 = matrix(var_x)),            # initial state
               silent = TRUE)
kfks <- MARSSkf(model)
y_filtering <- xts(c(kfks$xtt), index(y))
colnames(y_filtering) <- "Kalman on log-prices (static)"
y_all <- cbind(y_all, lag(y_filtering),         check.names = FALSE)
x_all <- cbind(x_all, lag(y_filtering - y)[-1], check.names = FALSE)


# Kalman on log-prices y (dynamic mu)
model <- MARSS(rbind(as.vector(y)),
               model = list(Z = rbind(c(1, 0)), A = matrix(0), R = matrix(list("1")),   # observation equation
                            B = rbind(c(1, 1),                                          # state transition equation
                                      c(0, 1)),                                         #
                            U = cbind(c(0, 0)), Q = matrix(list("1", 0, 0, "2"), 2, 2), #
                            x0 = cbind(c(as.numeric(y[1]), mean_x)), V0 = var_x*diag(2L)), # initial state
               silent = TRUE)


kfks <- MARSSkf(model)
y_filtering <- xts(c(kfks$xtt[1, ]), index(y))
colnames(y_filtering) <- "Kalman on log-prices (dynamic)"
y_all <- cbind(y_all, lag(y_filtering),         check.names = FALSE)
x_all <- cbind(x_all, lag(y_filtering - y)[-1], check.names = FALSE)

y_error <- xts(apply(as.matrix(y_all), 2, function(column) (column - as.vector(y_all[, 1]))), index(y_all))


# plots
p_y <- ggplot(fortify(y_all, melt = TRUE), aes(x = Index, y = Value, color = Series)) +
  geom_line(linewidth = 1) +
  scale_x_date(date_breaks = "2 months", date_labels = "%b %Y", date_minor_breaks = "1 day") +
  scale_color_manual(values = c("darkslategray", hue_pal()(3))) +
  theme(legend.title = element_blank()) +
  xlab(element_blank()) + ylab(element_blank()) + ggtitle("Log-price time series")

p_x <- ggplot(fortify(x_all, melt = TRUE), aes(x = Index, y = Value, color = Series)) +
  geom_line(linewidth = 1) +
  scale_x_date(date_breaks = "2 months", date_labels = "%b %Y", date_minor_breaks = "1 day") +
  scale_color_manual(values = c("darkslategray", hue_pal()(3))) +
  theme(legend.title = element_blank()) +
  xlab(element_blank()) + ylab(element_blank()) + ggtitle("Log-return time series")

p_y_error <- ggplot(fortify(y_error, melt = TRUE), aes(x = Index, y = Value, color = Series)) +
  geom_line(linewidth = 1) +
  scale_x_date(date_breaks = "2 months", date_labels = "%b %Y", date_minor_breaks = "1 day") +
  scale_color_manual(values = c("darkslategray", hue_pal()(3))) +
  theme(legend.title = element_blank()) +
  xlab(element_blank()) + ylab(element_blank()) + ggtitle("Forecast error time series")

p_y / p_x / p_y_error + plot_layout(guides = "collect")

# table
df_MSE <- colMeans(y_error^2, na.rm = TRUE)[-1] |> rbind() |> as.data.frame()

knitr::kable(df_MSE, booktabs = TRUE, linesep = "", digits = 6)
Kalman on log-returns (dynamic) Kalman on log-prices (static) Kalman on log-prices (dynamic)
0.000632 0.00056 0.000557

Volatility/Variance modeling

Moving average (MA)

Volatility envelope with moving averages:

library(RcppRoll)  # fast rolling means

# extract market data
y <- log(SP500_2015to2020$index["2020"])
x <- diff(y)[-1]
x_all <- abs(x)
colnames(x_all) <- "absolute residuals"



# MA(5) on squared returns x^2
vol_forecast <- xts(sqrt(roll_meanr(x^2, n = 5, fill = NA)), index(x))
colnames(vol_forecast) <- "MA(5) volatility"
x_all <- cbind(x_all, lag(vol_forecast)[-1], check.names = FALSE)


# MA(10) on squared returns x^2
vol_forecast <- xts(sqrt(roll_meanr(x^2, n = 10, fill = NA)), index(x))
colnames(vol_forecast) <- "MA(10) volatility"
x_all <- cbind(x_all, lag(vol_forecast)[-1], check.names = FALSE)


# MA(20) on squared returns x^2
vol_forecast <- xts(sqrt(roll_meanr(x^2, n = 20, fill = NA)), index(x))
colnames(vol_forecast) <- "MA(20) volatility"
x_all <- cbind(x_all, lag(vol_forecast)[-1], check.names = FALSE)

# plot
ggplot(fortify(x_all, melt = TRUE), aes(x = Index, y = Value, color = Series, linewidth = Series)) +
  geom_line() +
  scale_color_manual(values = c("darkslategray", hue_pal()(3))) +
  scale_linewidth_manual(values = c(0.5, 1.2, 1.2, 1.2)) +
  scale_x_date(date_breaks = "2 months", date_labels = "%b %Y", date_minor_breaks = "1 day") +
  theme(legend.title = element_blank()) + 
  xlab(element_blank()) + ylab(element_blank()) + ggtitle("Residual time series and envelope")

EWMA

Volatility envelope with EWMA:

library(TTR)   # alpha = 2/(n + 1) or n = 2/alpha - 1

# extract market data
y <- log(SP500_2015to2020$index["2020"])
x <- diff(y)[-1]
x_all <- abs(x)
colnames(x_all) <- "absolute residuals"


# EMA(0.33) on log-prices y (alpha = 2/(n + 1) = 0.33 or n = 2/alpha - 1)
vol_forecast <- sqrt(EMA(x^2, n = 5))
colnames(vol_forecast) <- "EWMA(0.33) volatility"
x_all <- cbind(x_all, lag(vol_forecast)[-1], check.names = FALSE)


# EMA(0.18) on log-prices y (alpha = 2/(10 + 1) = 0.18)
vol_forecast <- sqrt(EMA(x^2, n = 10))
colnames(vol_forecast) <- "EWMA(0.18) volatility"
x_all <- cbind(x_all, lag(vol_forecast)[-1], check.names = FALSE)


# EMA(0.10) on log-prices y (alpha = 2/(10 + 1) = 0.10)
vol_forecast <- sqrt(EMA(x^2, n = 20))
colnames(vol_forecast) <- "EWMA(0.10) volatility"
x_all <- cbind(x_all, lag(vol_forecast)[-1], check.names = FALSE)



# plot
ggplot(fortify(x_all, melt = TRUE), aes(x = Index, y = Value, color = Series, linewidth = Series)) +
  geom_line() +
  scale_color_manual(values = c("darkslategray", hue_pal()(3))) +
  scale_linewidth_manual(values = c(0.5, 1.2, 1.2, 1.2)) +
  scale_x_date(date_breaks = "2 months", date_labels = "%b %Y", date_minor_breaks = "1 day") +
  theme(legend.title = element_blank()) + 
  xlab(element_blank()) + ylab(element_blank()) + ggtitle("Residual time series and envelope")

GARCH

Volatility envelope with GARCH models:

library(rugarch)

# extract market data
y <- log(SP500_2015to2020$index["2020::2020"])
x <- diff(y)[-1]
x_all <- abs(x)
colnames(x_all) <- "absolute residuals"

T <- nrow(x)
T_tst <- round(0.8*nrow(x))


# model specification of an ARCH(5)
spec <- ugarchspec(mean.model     = list(armaOrder = c(0,0), include.mean = FALSE),
                   variance.model = list(model = "sGARCH", garchOrder = c(5,0)))
#spec@model$pars
modelroll <- ugarchroll(spec = spec, data = x, n.ahead = 1,
                        forecast.length = T_tst, refit.every = 10, refit.window = "moving")
vol_forecast <- xts(c(rep(NA, T-T_tst), modelroll@forecast$density$Sigma), index(x))
colnames(vol_forecast) <- "ARCH(5)"
x_all <- cbind(x_all, lag(vol_forecast)[-1], check.names = FALSE)

# model specification of an GARCH(1,1)
spec <- ugarchspec(mean.model     = list(armaOrder = c(0,0), include.mean = FALSE),
                   variance.model = list(model = "sGARCH", garchOrder = c(1,1)))
modelroll <- ugarchroll(spec = spec, data = x, n.ahead = 1,
                        forecast.length = T_tst, refit.every = 10, refit.window = "moving")
vol_forecast <- xts(c(rep(NA, T-T_tst), modelroll@forecast$density$Sigma), index(x))
colnames(vol_forecast) <- "GARCH(1,1)"
x_all <- cbind(x_all, lag(vol_forecast)[-1], check.names = FALSE)


# model specification of an GARCH(5,1)
spec <- ugarchspec(mean.model     = list(armaOrder = c(0,0), include.mean = FALSE),
                   variance.model = list(model = "sGARCH", garchOrder = c(1,5)))
modelroll <- ugarchroll(spec = spec, data = x, n.ahead = 1,
                        forecast.length = T_tst, refit.every = 10, refit.window = "moving")
vol_forecast <- xts(c(rep(NA, T-T_tst), modelroll@forecast$density$Sigma), index(x))
colnames(vol_forecast) <- "GARCH(5,1)"
x_all <- cbind(x_all, lag(vol_forecast)[-1], check.names = FALSE)


# plot
ggplot(fortify(x_all, melt = TRUE), aes(x = Index, y = Value, color = Series, linewidth = Series)) +
  geom_line() +
  scale_color_manual(values = c("darkslategray", hue_pal()(3))) +
  scale_linewidth_manual(values = c(0.5, 1.2, 1.2, 1.2)) +
  scale_x_date(date_breaks = "2 months", date_labels = "%b %Y", date_minor_breaks = "1 day") +
  theme(legend.title = element_blank()) + 
  xlab(element_blank()) + ylab(element_blank()) + ggtitle("Residual time series and envelope")

Stochastic volatility (SV)

Volatility envelope with SV modeling via MCMC:

library(stochvol)

# extract market data
y <- log(SP500_2015to2020$index["2020"])
x <- diff(y)[-1]
x_all <- abs(x)
colnames(x_all) <- "absolute residuals"


# SV modeling
res <- svsample(x)
Done!
Summarizing posterior draws...
vol_forecast <- xts(res$summary$sd[, "mean"], index(x))
colnames(vol_forecast) <- "SV"
x_all <- cbind(x_all, lag(vol_forecast)[-1], check.names = FALSE)


# plot
ggplot(fortify(x_all, melt = TRUE), aes(x = Index, y = Value, color = Series, linewidth = Series)) +
  geom_line() +
  scale_color_manual(values = c("darkslategray", hue_pal()(1))) +
  scale_linewidth_manual(values = c(0.5, 1.2)) +
  scale_x_date(date_breaks = "2 months", date_labels = "%b %Y", date_minor_breaks = "1 day") +
  theme(legend.title = element_blank()) + 
  xlab(element_blank()) + ylab(element_blank()) + ggtitle("Residual time series and envelope")

SV via Kalman

Volatility envelope with SV modeling via Kalman filter (Palomar, 2024, Chapter 4):

library(MARSS)

# extract market data
y <- log(SP500_2015to2020$index["2020"])
x <- diff(y)[-1]
env_smoothing <- env_forecast <- abs(x)
colnames(env_smoothing) <- colnames(env_forecast) <- "absolute residuals"


# Kalman random walk plus Gaussian noise model
model <- MARSS(rbind(as.vector(log(x^2))),
               model = list(Z = "identity",  A = matrix(-1.27), R = matrix(pi^2/2),             # observation equation
                            B = "identity",  U = "zero",        Q = matrix(list("var_state")),  # state transition equation
                            x0 = matrix(log(var(x))), V0 = matrix(1e-3)),                      # initial state
               silent = TRUE)
kfks <- MARSSkf(model)
h_forecast  <- xts(c(kfks$xtt1), index(x))
h_smoothing <- xts(c(kfks$xtT), index(x))
colnames(h_smoothing) <- colnames(h_forecast) <- "SV - random walk"
env_forecast  <- cbind(env_forecast,  exp(h_forecast/2),  check.names = FALSE)
env_smoothing <- cbind(env_smoothing, exp(h_smoothing/2), check.names = FALSE)


# Kalman AR(1) plus Gaussian noise model
model <- MARSS(rbind(as.vector(log(x^2))),
               model = list(Z = "identity",          A = matrix(-1.27), R = matrix(pi^2/2),  # observation equation
                            B = matrix(list("phi")), U = matrix(list("gamma")),              # state transition equation
                            Q = matrix(list("var_state")),
                            x0 = matrix(log(var(x))), V0 = matrix(1e-3)),                    # initial state
               silent = TRUE)
kfks <- MARSSkf(model)
h_forecast  <- xts(c(kfks$xtt1), index(x))
h_smoothing <- xts(c(kfks$xtT), index(x))
colnames(h_smoothing) <- colnames(h_forecast) <- "SV - AR(1)"
env_forecast  <- cbind(env_forecast,  exp(h_forecast/2),  check.names = FALSE)
env_smoothing <- cbind(env_smoothing, exp(h_smoothing/2), check.names = FALSE)



# plot
p_forecast <- ggplot(fortify(env_forecast, melt = TRUE), 
                     aes(x = Index, y = Value, color = Series, linewidth = Series)) +
  geom_line() +
  scale_color_manual(values = c("darkslategray", hue_pal()(2))) +
  scale_linewidth_manual(values = c(0.5, 1.2, 1.2)) +
  scale_x_date(date_breaks = "2 months", date_labels = "%b %Y", date_minor_breaks = "1 day") +
  theme(legend.title = element_blank()) + 
  xlab(element_blank()) + ylab(element_blank()) + ggtitle("Residual time series and envelope forecast")

p_smoothing <- ggplot(fortify(env_smoothing, melt = TRUE), 
                      aes(x = Index, y = Value, color = Series, linewidth = Series)) +
  geom_line() +
  scale_color_manual(values = c("darkslategray", hue_pal()(2))) +
  scale_linewidth_manual(values = c(0.5, 1.2, 1.2)) +
  scale_x_date(date_breaks = "2 months", date_labels = "%b %Y", date_minor_breaks = "1 day") +
  theme(legend.title = element_blank()) +
  xlab(element_blank()) + ylab(element_blank()) + ggtitle("Residual time series and envelope smoothing")

p_forecast

p_smoothing

References

Palomar, D. P. (2024). Portfolio optimization: Theory and application. Cambridge University Press.