# basic finance
library(xts) # to manipulate time series of stock data
library(pob) # book package with financial data
# install with: devtools::install_github("dppalomar/pob")
# data
library(RcppRoll) # rolling methods
library(TTR) # exponentially moving average
library(rugarch) # time series models
library(MARSS) # Kalman
library(stochvol) # to compute volatility envelope
# plotting
library(ggplot2) # for nice plots
library(patchwork) # for combining plots
library(scales)
Portfolio Optimization
Chapter 4 - Financial Data: Time Series Modeling
R code
R code examples for Chapter 4 of the book:
Daniel P. Palomar (2024). Portfolio Optimization: Theory and Application. Cambridge University Press.
Loading packages
The following packages are used in the examples:
Temporal structure
Example of a synthetic Gaussian AR(1) time series:
library(rugarch)
# specify an AR(1) model with given coefficients and parameters
<- arfimaspec(mean.model = list(armaOrder = c(1,0), include.mean = TRUE),
arma_fixed_spec fixed.pars = list(mu = 0.01, ar1 = -0.9, sigma = 0.2))
# simulate one path
<- 300
T set.seed(42)
<- arfimapath(arma_fixed_spec, n.sim = T)
path_arma <- path_arma@path$seriesSim
x
data.frame(t = 1:T, x = x) |>
ggplot(aes(x = t, y = x)) +
geom_line(linewidth = 0.8, color = "blue", show.legend = FALSE) +
xlab(element_blank()) + ylab(element_blank()) + ggtitle("AR(1) time series")
Mean modeling
Moving average (MA)
Forecasting with moving average:
library(RcppRoll) # fast rolling means
# extract market data
<- log(SP500_2015to2020$index["2020"])
y <- y[1]
y0 <- diff(y)[-1]
x colnames(y) <- colnames(x) <- "true"
<- y
y_all <- x
x_all
# MA(10) on log-prices y
<- xts(roll_meanr(y, n = 20, fill = NA), index(y))
y_forecast colnames(y_forecast) <- "MA(20) on log-prices"
<- cbind(y_all, lag(y_forecast), check.names = FALSE)
y_all <- cbind(x_all, lag(y_forecast - y)[-1], check.names = FALSE)
x_all
# MA(10) on log-returns x
<- xts(roll_meanr(x, n = 20, fill = NA), index(x))
x_forecast colnames(x_forecast) <- "MA(20) on log-returns"
<- cbind(x_all, lag(x_forecast)[-1], check.names = FALSE)
x_all <- cbind(y_all, lag(x_forecast + y), check.names = FALSE)
y_all
<- xts(apply(as.matrix(y_all), 2, function(column) (column - as.vector(y_all[, 1]))), index(y_all))
y_error
# plots
<- ggplot(fortify(y_all, melt = TRUE), aes(x = Index, y = Value, color = Series)) +
p_y geom_line(linewidth = 1) +
scale_x_date(date_breaks = "2 months", date_labels = "%b %Y", date_minor_breaks = "1 day") +
scale_color_manual(values = c("black", hue_pal()(2))) +
theme(legend.title = element_blank()) +
xlab(element_blank()) + ylab(element_blank()) + ggtitle("Log-price time series")
<- ggplot(fortify(x_all, melt = TRUE), aes(x = Index, y = Value, color = Series)) +
p_x geom_line(linewidth = 1) +
scale_x_date(date_breaks = "2 months", date_labels = "%b %Y", date_minor_breaks = "1 day") +
scale_color_manual(values = c("black", hue_pal()(2))) +
theme(legend.title = element_blank()) +
xlab(element_blank()) + ylab(element_blank()) + ggtitle("Log-return time series")
<- ggplot(fortify(y_error, melt = TRUE), aes(x = Index, y = Value, color = Series)) +
p_y_error geom_line(linewidth = 1) +
scale_x_date(date_breaks = "2 months", date_labels = "%b %Y", date_minor_breaks = "1 day") +
scale_color_manual(values = c("black", hue_pal()(2))) +
theme(legend.title = element_blank()) +
xlab(element_blank()) + ylab(element_blank()) + ggtitle("Forecast error time series")
/ p_x / p_y_error + plot_layout(guides = "collect") p_y
# table
<- colMeans(y_error^2, na.rm = TRUE)[-1] |> rbind() |> as.data.frame()
df_MSE ::kable(df_MSE, booktabs = TRUE, linesep = "", digits = 6) knitr
MA(20) on log-prices | MA(20) on log-returns |
---|---|
0.004032 | 0.000725 |
ARMA
Forecasting with ARMA models:
library(rugarch)
# extract market data
<- log(SP500_2015to2020$index["2020"])
y <- diff(y)[-1]
x colnames(y) <- colnames(x) <- "true"
<- y
y_all <- x
x_all
<- nrow(x)
T <- round(0.8*nrow(x))
T_tst
# model specification of an iid
<- arfimaspec(mean.model = list(armaOrder = c(0,0), include.mean = TRUE))
spec <- arfimaroll(spec = spec, data = x, n.ahead = 1,
modelroll forecast.length = T_tst, refit.every = 10, refit.window = "moving")
<- xts(c(rep(NA, T-T_tst), modelroll@forecast$density$Mu), index(x))
x_forecast colnames(x_forecast) <- "i.i.d."
<- cbind(x_all, x_forecast, check.names = FALSE)
x_all <- cbind(y_all, x_forecast + lag(y), check.names = FALSE)
y_all
# model specification of an AR(1)
<- arfimaspec(mean.model = list(armaOrder = c(1,0), include.mean = TRUE))
spec <- arfimaroll(spec = spec, data = x, n.ahead = 1,
modelroll forecast.length = T_tst, refit.every = 10, refit.window = "moving")
<- xts(c(rep(NA, T-T_tst), modelroll@forecast$density$Mu), index(x))
x_forecast colnames(x_forecast) <- "AR(1)"
<- cbind(x_all, x_forecast, check.names = FALSE)
x_all <- cbind(y_all, x_forecast + lag(y), check.names = FALSE)
y_all
# model specification of an MA(1)
<- arfimaspec(mean.model = list(armaOrder = c(0,1), include.mean = TRUE))
spec <- arfimaroll(spec = spec, data = x,
modelroll forecast.length = T_tst, refit.every = 10, refit.window = "moving")
<- xts(c(rep(NA, T-T_tst), modelroll@forecast$density$Mu), index(x))
x_forecast colnames(x_forecast) <- "MA(1)"
<- cbind(x_all, x_forecast, check.names = FALSE)
x_all <- cbind(y_all, x_forecast + lag(y), check.names = FALSE)
y_all
# model specification of an ARMA(1,1)
<- arfimaspec(mean.model = list(armaOrder = c(1,1), include.mean = TRUE))
spec <- arfimaroll(spec = spec, data = x,
modelroll forecast.length = T_tst, refit.every = 10, refit.window = "moving")
<- xts(c(rep(NA, T-T_tst), modelroll@forecast$density$Mu), index(x))
x_forecast colnames(x_forecast) <- "ARMA(1,1)"
<- cbind(x_all, x_forecast, check.names = FALSE)
x_all <- cbind(y_all, x_forecast + lag(y), check.names = FALSE)
y_all
<- xts(apply(as.matrix(y_all), 2, function(column) (column - as.vector(y_all[, 1]))), index(y_all))
y_error
# plots
<- ggplot(fortify(y_all, melt = TRUE), aes(x = Index, y = Value, color = Series)) +
p_y geom_line(linewidth = 1) +
scale_x_date(date_breaks = "2 months", date_labels = "%b %Y", date_minor_breaks = "1 day") +
scale_color_manual(values = c("darkslategray", hue_pal()(4))) +
theme(legend.title = element_blank()) +
xlab(element_blank()) + ylab(element_blank()) + ggtitle("Log-price time series")
<- ggplot(fortify(x_all, melt = TRUE), aes(x = Index, y = Value, color = Series)) +
p_x geom_line(linewidth = 1) +
scale_x_date(date_breaks = "2 months", date_labels = "%b %Y", date_minor_breaks = "1 day") +
scale_color_manual(values = c("darkslategray", hue_pal()(4))) +
theme(legend.title = element_blank()) +
xlab(element_blank()) + ylab(element_blank()) + ggtitle("Log-return time series")
<- ggplot(fortify(y_error, melt = TRUE), aes(x = Index, y = Value, color = Series)) +
p_y_error geom_line(linewidth = 1) +
scale_x_date(date_breaks = "2 months", date_labels = "%b %Y", date_minor_breaks = "1 day") +
scale_color_manual(values = c("darkslategray", hue_pal()(4))) +
theme(legend.title = element_blank()) +
xlab(element_blank()) + ylab(element_blank()) + ggtitle("Forecast error time series")
/ p_x / p_y_error + plot_layout(guides = "collect") p_y
# table
<- colMeans(y_error^2, na.rm = TRUE)[-1] |> rbind() |> as.data.frame()
df_MSE ::kable(df_MSE, booktabs = TRUE, linesep = "", digits = 6) knitr
i.i.d. | AR(1) | MA(1) | ARMA(1,1) |
---|---|---|---|
0.000754 | 0.000793 | 0.000805 | 0.000914 |
Kalman
library(MARSS)
# extract market data
<- log(SP500_2015to2020$index["2020"])
y <- diff(y)[-1]
x <- as.numeric(mean(x))
mean_x <- as.numeric(var(x))
var_x colnames(y) <- colnames(x) <- "true"
<- y
y_all <- x
x_all
# Kalman on log-returns x
<- MARSS(rbind(as.vector(x)),
model model = list(Z = matrix(1), A = matrix(0), R = matrix(var_x), # observation equation
B = matrix(1), U = matrix(0), Q = matrix(list("1")), # state transition equation
x0 = matrix(mean_x), V0 = matrix(var_x)), # initial state
silent = TRUE)
<- MARSSkf(model)
kfks <- xts(c(kfks$xtt), index(x)) # the forecast is just the filtering delayed in our case because B = 1
x_filtering colnames(x_filtering) <- "Kalman on log-returns (dynamic)"
<- cbind(x_all, lag(x_filtering)[-1], check.names = FALSE)
x_all <- cbind(y_all, lag(x_filtering + y), check.names = FALSE)
y_all
# Kalman on log-prices y (static mu)
<- MARSS(rbind(as.vector(y)),
model model = list(Z = matrix(1), A = matrix(0), R = matrix(list("var_obs")), # observation equation
B = matrix(1), U = matrix(list("mu")), Q = matrix(list("var_state")), # state transition equation
x0 = matrix(as.numeric(y[1])), V0 = matrix(var_x)), # initial state
silent = TRUE)
<- MARSSkf(model)
kfks <- xts(c(kfks$xtt), index(y))
y_filtering colnames(y_filtering) <- "Kalman on log-prices (static)"
<- cbind(y_all, lag(y_filtering), check.names = FALSE)
y_all <- cbind(x_all, lag(y_filtering - y)[-1], check.names = FALSE)
x_all
# Kalman on log-prices y (dynamic mu)
<- MARSS(rbind(as.vector(y)),
model model = list(Z = rbind(c(1, 0)), A = matrix(0), R = matrix(list("1")), # observation equation
B = rbind(c(1, 1), # state transition equation
c(0, 1)), #
U = cbind(c(0, 0)), Q = matrix(list("1", 0, 0, "2"), 2, 2), #
x0 = cbind(c(as.numeric(y[1]), mean_x)), V0 = var_x*diag(2L)), # initial state
silent = TRUE)
<- MARSSkf(model)
kfks <- xts(c(kfks$xtt[1, ]), index(y))
y_filtering colnames(y_filtering) <- "Kalman on log-prices (dynamic)"
<- cbind(y_all, lag(y_filtering), check.names = FALSE)
y_all <- cbind(x_all, lag(y_filtering - y)[-1], check.names = FALSE)
x_all
<- xts(apply(as.matrix(y_all), 2, function(column) (column - as.vector(y_all[, 1]))), index(y_all))
y_error
# plots
<- ggplot(fortify(y_all, melt = TRUE), aes(x = Index, y = Value, color = Series)) +
p_y geom_line(linewidth = 1) +
scale_x_date(date_breaks = "2 months", date_labels = "%b %Y", date_minor_breaks = "1 day") +
scale_color_manual(values = c("darkslategray", hue_pal()(3))) +
theme(legend.title = element_blank()) +
xlab(element_blank()) + ylab(element_blank()) + ggtitle("Log-price time series")
<- ggplot(fortify(x_all, melt = TRUE), aes(x = Index, y = Value, color = Series)) +
p_x geom_line(linewidth = 1) +
scale_x_date(date_breaks = "2 months", date_labels = "%b %Y", date_minor_breaks = "1 day") +
scale_color_manual(values = c("darkslategray", hue_pal()(3))) +
theme(legend.title = element_blank()) +
xlab(element_blank()) + ylab(element_blank()) + ggtitle("Log-return time series")
<- ggplot(fortify(y_error, melt = TRUE), aes(x = Index, y = Value, color = Series)) +
p_y_error geom_line(linewidth = 1) +
scale_x_date(date_breaks = "2 months", date_labels = "%b %Y", date_minor_breaks = "1 day") +
scale_color_manual(values = c("darkslategray", hue_pal()(3))) +
theme(legend.title = element_blank()) +
xlab(element_blank()) + ylab(element_blank()) + ggtitle("Forecast error time series")
/ p_x / p_y_error + plot_layout(guides = "collect") p_y
# table
<- colMeans(y_error^2, na.rm = TRUE)[-1] |> rbind() |> as.data.frame()
df_MSE
::kable(df_MSE, booktabs = TRUE, linesep = "", digits = 6) knitr
Kalman on log-returns (dynamic) | Kalman on log-prices (static) | Kalman on log-prices (dynamic) |
---|---|---|
0.000632 | 0.00056 | 0.000557 |
Volatility/Variance modeling
Moving average (MA)
Volatility envelope with moving averages:
library(RcppRoll) # fast rolling means
# extract market data
<- log(SP500_2015to2020$index["2020"])
y <- diff(y)[-1]
x <- abs(x)
x_all colnames(x_all) <- "absolute residuals"
# MA(5) on squared returns x^2
<- xts(sqrt(roll_meanr(x^2, n = 5, fill = NA)), index(x))
vol_forecast colnames(vol_forecast) <- "MA(5) volatility"
<- cbind(x_all, lag(vol_forecast)[-1], check.names = FALSE)
x_all
# MA(10) on squared returns x^2
<- xts(sqrt(roll_meanr(x^2, n = 10, fill = NA)), index(x))
vol_forecast colnames(vol_forecast) <- "MA(10) volatility"
<- cbind(x_all, lag(vol_forecast)[-1], check.names = FALSE)
x_all
# MA(20) on squared returns x^2
<- xts(sqrt(roll_meanr(x^2, n = 20, fill = NA)), index(x))
vol_forecast colnames(vol_forecast) <- "MA(20) volatility"
<- cbind(x_all, lag(vol_forecast)[-1], check.names = FALSE)
x_all
# plot
ggplot(fortify(x_all, melt = TRUE), aes(x = Index, y = Value, color = Series, linewidth = Series)) +
geom_line() +
scale_color_manual(values = c("darkslategray", hue_pal()(3))) +
scale_linewidth_manual(values = c(0.5, 1.2, 1.2, 1.2)) +
scale_x_date(date_breaks = "2 months", date_labels = "%b %Y", date_minor_breaks = "1 day") +
theme(legend.title = element_blank()) +
xlab(element_blank()) + ylab(element_blank()) + ggtitle("Residual time series and envelope")
EWMA
Volatility envelope with EWMA:
library(TTR) # alpha = 2/(n + 1) or n = 2/alpha - 1
# extract market data
<- log(SP500_2015to2020$index["2020"])
y <- diff(y)[-1]
x <- abs(x)
x_all colnames(x_all) <- "absolute residuals"
# EMA(0.33) on log-prices y (alpha = 2/(n + 1) = 0.33 or n = 2/alpha - 1)
<- sqrt(EMA(x^2, n = 5))
vol_forecast colnames(vol_forecast) <- "EWMA(0.33) volatility"
<- cbind(x_all, lag(vol_forecast)[-1], check.names = FALSE)
x_all
# EMA(0.18) on log-prices y (alpha = 2/(10 + 1) = 0.18)
<- sqrt(EMA(x^2, n = 10))
vol_forecast colnames(vol_forecast) <- "EWMA(0.18) volatility"
<- cbind(x_all, lag(vol_forecast)[-1], check.names = FALSE)
x_all
# EMA(0.10) on log-prices y (alpha = 2/(10 + 1) = 0.10)
<- sqrt(EMA(x^2, n = 20))
vol_forecast colnames(vol_forecast) <- "EWMA(0.10) volatility"
<- cbind(x_all, lag(vol_forecast)[-1], check.names = FALSE)
x_all
# plot
ggplot(fortify(x_all, melt = TRUE), aes(x = Index, y = Value, color = Series, linewidth = Series)) +
geom_line() +
scale_color_manual(values = c("darkslategray", hue_pal()(3))) +
scale_linewidth_manual(values = c(0.5, 1.2, 1.2, 1.2)) +
scale_x_date(date_breaks = "2 months", date_labels = "%b %Y", date_minor_breaks = "1 day") +
theme(legend.title = element_blank()) +
xlab(element_blank()) + ylab(element_blank()) + ggtitle("Residual time series and envelope")
GARCH
Volatility envelope with GARCH models:
library(rugarch)
# extract market data
<- log(SP500_2015to2020$index["2020::2020"])
y <- diff(y)[-1]
x <- abs(x)
x_all colnames(x_all) <- "absolute residuals"
<- nrow(x)
T <- round(0.8*nrow(x))
T_tst
# model specification of an ARCH(5)
<- ugarchspec(mean.model = list(armaOrder = c(0,0), include.mean = FALSE),
spec variance.model = list(model = "sGARCH", garchOrder = c(5,0)))
#spec@model$pars
<- ugarchroll(spec = spec, data = x, n.ahead = 1,
modelroll forecast.length = T_tst, refit.every = 10, refit.window = "moving")
<- xts(c(rep(NA, T-T_tst), modelroll@forecast$density$Sigma), index(x))
vol_forecast colnames(vol_forecast) <- "ARCH(5)"
<- cbind(x_all, lag(vol_forecast)[-1], check.names = FALSE)
x_all
# model specification of an GARCH(1,1)
<- ugarchspec(mean.model = list(armaOrder = c(0,0), include.mean = FALSE),
spec variance.model = list(model = "sGARCH", garchOrder = c(1,1)))
<- ugarchroll(spec = spec, data = x, n.ahead = 1,
modelroll forecast.length = T_tst, refit.every = 10, refit.window = "moving")
<- xts(c(rep(NA, T-T_tst), modelroll@forecast$density$Sigma), index(x))
vol_forecast colnames(vol_forecast) <- "GARCH(1,1)"
<- cbind(x_all, lag(vol_forecast)[-1], check.names = FALSE)
x_all
# model specification of an GARCH(5,1)
<- ugarchspec(mean.model = list(armaOrder = c(0,0), include.mean = FALSE),
spec variance.model = list(model = "sGARCH", garchOrder = c(1,5)))
<- ugarchroll(spec = spec, data = x, n.ahead = 1,
modelroll forecast.length = T_tst, refit.every = 10, refit.window = "moving")
<- xts(c(rep(NA, T-T_tst), modelroll@forecast$density$Sigma), index(x))
vol_forecast colnames(vol_forecast) <- "GARCH(5,1)"
<- cbind(x_all, lag(vol_forecast)[-1], check.names = FALSE)
x_all
# plot
ggplot(fortify(x_all, melt = TRUE), aes(x = Index, y = Value, color = Series, linewidth = Series)) +
geom_line() +
scale_color_manual(values = c("darkslategray", hue_pal()(3))) +
scale_linewidth_manual(values = c(0.5, 1.2, 1.2, 1.2)) +
scale_x_date(date_breaks = "2 months", date_labels = "%b %Y", date_minor_breaks = "1 day") +
theme(legend.title = element_blank()) +
xlab(element_blank()) + ylab(element_blank()) + ggtitle("Residual time series and envelope")
Stochastic volatility (SV)
Volatility envelope with SV modeling via MCMC:
library(stochvol)
# extract market data
<- log(SP500_2015to2020$index["2020"])
y <- diff(y)[-1]
x <- abs(x)
x_all colnames(x_all) <- "absolute residuals"
# SV modeling
<- svsample(x) res
Done!
Summarizing posterior draws...
<- xts(res$summary$sd[, "mean"], index(x))
vol_forecast colnames(vol_forecast) <- "SV"
<- cbind(x_all, lag(vol_forecast)[-1], check.names = FALSE)
x_all
# plot
ggplot(fortify(x_all, melt = TRUE), aes(x = Index, y = Value, color = Series, linewidth = Series)) +
geom_line() +
scale_color_manual(values = c("darkslategray", hue_pal()(1))) +
scale_linewidth_manual(values = c(0.5, 1.2)) +
scale_x_date(date_breaks = "2 months", date_labels = "%b %Y", date_minor_breaks = "1 day") +
theme(legend.title = element_blank()) +
xlab(element_blank()) + ylab(element_blank()) + ggtitle("Residual time series and envelope")
SV via Kalman
Volatility envelope with SV modeling via Kalman filter (Palomar, 2024, Chapter 4):
library(MARSS)
# extract market data
<- log(SP500_2015to2020$index["2020"])
y <- diff(y)[-1]
x <- env_forecast <- abs(x)
env_smoothing colnames(env_smoothing) <- colnames(env_forecast) <- "absolute residuals"
# Kalman random walk plus Gaussian noise model
<- MARSS(rbind(as.vector(log(x^2))),
model model = list(Z = "identity", A = matrix(-1.27), R = matrix(pi^2/2), # observation equation
B = "identity", U = "zero", Q = matrix(list("var_state")), # state transition equation
x0 = matrix(log(var(x))), V0 = matrix(1e-3)), # initial state
silent = TRUE)
<- MARSSkf(model)
kfks <- xts(c(kfks$xtt1), index(x))
h_forecast <- xts(c(kfks$xtT), index(x))
h_smoothing colnames(h_smoothing) <- colnames(h_forecast) <- "SV - random walk"
<- cbind(env_forecast, exp(h_forecast/2), check.names = FALSE)
env_forecast <- cbind(env_smoothing, exp(h_smoothing/2), check.names = FALSE)
env_smoothing
# Kalman AR(1) plus Gaussian noise model
<- MARSS(rbind(as.vector(log(x^2))),
model model = list(Z = "identity", A = matrix(-1.27), R = matrix(pi^2/2), # observation equation
B = matrix(list("phi")), U = matrix(list("gamma")), # state transition equation
Q = matrix(list("var_state")),
x0 = matrix(log(var(x))), V0 = matrix(1e-3)), # initial state
silent = TRUE)
<- MARSSkf(model)
kfks <- xts(c(kfks$xtt1), index(x))
h_forecast <- xts(c(kfks$xtT), index(x))
h_smoothing colnames(h_smoothing) <- colnames(h_forecast) <- "SV - AR(1)"
<- cbind(env_forecast, exp(h_forecast/2), check.names = FALSE)
env_forecast <- cbind(env_smoothing, exp(h_smoothing/2), check.names = FALSE)
env_smoothing
# plot
<- ggplot(fortify(env_forecast, melt = TRUE),
p_forecast aes(x = Index, y = Value, color = Series, linewidth = Series)) +
geom_line() +
scale_color_manual(values = c("darkslategray", hue_pal()(2))) +
scale_linewidth_manual(values = c(0.5, 1.2, 1.2)) +
scale_x_date(date_breaks = "2 months", date_labels = "%b %Y", date_minor_breaks = "1 day") +
theme(legend.title = element_blank()) +
xlab(element_blank()) + ylab(element_blank()) + ggtitle("Residual time series and envelope forecast")
<- ggplot(fortify(env_smoothing, melt = TRUE),
p_smoothing aes(x = Index, y = Value, color = Series, linewidth = Series)) +
geom_line() +
scale_color_manual(values = c("darkslategray", hue_pal()(2))) +
scale_linewidth_manual(values = c(0.5, 1.2, 1.2)) +
scale_x_date(date_breaks = "2 months", date_labels = "%b %Y", date_minor_breaks = "1 day") +
theme(legend.title = element_blank()) +
xlab(element_blank()) + ylab(element_blank()) + ggtitle("Residual time series and envelope smoothing")
p_forecast
p_smoothing
References
Palomar, D. P. (2024). Portfolio optimization: Theory and application. Cambridge University Press.