Portfolio Optimization
Chapter 2 - Financial Data: Stylized Facts

R code

Published

September 14, 2024

R code examples for Chapter 2 of the book:

Daniel P. Palomar (2024). Portfolio Optimization: Theory and Application. Cambridge University Press.

Loading packages

The following packages are used in the examples:

# basic finance
library(xts)                    # to manipulate time series of stock data
library(quantmod)               # to download stock data
library(pob)                    # book package with financial data
library(stochvol)               # to compute volatility envelope

# plotting
library(ggplot2)                # for nice plots
library(patchwork)              # for combining plots
library(viridisLite)            # for nice colors
library(reshape2)               # to reshape data

Prices and returns

Price time series of S&P 500:

# get data from Yahoo!Finance
library(quantmod)
sp500_prices  <- Ad(getSymbols("^GSPC", from = "2007-01-01", to = "2022-11-04", auto.assign = FALSE))

ggplot(fortify(sp500_prices, melt = TRUE), aes(x = Index, y = Value)) +
  geom_line(linewidth = 0.8, color = "blue", show.legend = FALSE) +
  scale_x_date(date_breaks = "1 year", date_labels = "%Y", date_minor_breaks = "1 month") +
  scale_y_log10() +
  xlab(element_blank()) + ylab(element_blank()) + ggtitle("S&P 500 price") 

Price time series of Bitcoin:

# get data from Yahoo!Finance
btc_prices <- Ad(getSymbols("BTC-USD", from = "2017-01-01", to = "2022-11-04", auto.assign = FALSE))

ggplot(fortify(btc_prices, melt = TRUE), aes(x = Index, y = Value)) +
  geom_line(linewidth = 0.8, color = "blue", show.legend = FALSE) +
  scale_x_date(date_breaks = "1 year", date_labels = "%Y", date_minor_breaks = "1 month") +
  scale_y_log10() +
  ggtitle("BTC price") + xlab(element_blank()) + ylab(element_blank())

Daily log-return time series of S&P 500:

sp500_returns <- diff(log(sp500_prices))[-1]
#sp500_linreturns <- exp(sp500_returns) - 1

ggplot(fortify(sp500_returns, melt = TRUE), aes(x = Index, y = Value)) +
  geom_line(linewidth = 0.5, color = "blue", show.legend = FALSE) +
  scale_x_date(date_breaks = "1 year", date_labels = "%Y", date_minor_breaks = "1 month") +
  ggtitle("S&P 500 log-returns") + xlab(element_blank()) + ylab(element_blank())

We can observe: High-volatility period during global financial crisis in 2008, as well as the high peak in volatility in early 2020 due to the COVID-19 pandemic.

Daily log-return time series of Bitcoin:

btc_returns <- diff(log(btc_prices))[-1]
#btc_returns <- na.omit(btc_returns)
#btc_linreturns   <- exp(btc_returns) - 1

ggplot(fortify(btc_returns, melt = TRUE), aes(x = Index, y = Value)) +
  geom_line(linewidth = 0.5, color = "blue", show.legend = FALSE) +
  scale_x_date(date_breaks = "1 year", date_labels = "%Y", date_minor_breaks = "1 month") +
  ggtitle("BTC log-returns") + xlab(element_blank()) + ylab(element_blank())

We can observe: Bitcoin flash crash on March 12, 2020, with a drop close to 50% in a single day.

Non-Gaussianity

Asymmetry and heavy tails

Histogram of S&P 500 log-returns at different frequencies (with Gaussian fit):

plot_histogram <- function(returns, shift = 0.0, sd_factor = 1, nbins = 50) {
  ggplot(fortify(returns, melt = TRUE), aes(x = Value)) +
    geom_histogram(aes(y = after_stat(density)), bins = nbins, fill = "blue", alpha = 0.3, col = "gray31") +
    stat_function(fun = dnorm, args = list(mean = mean(returns, na.rm = TRUE) + shift, sd = sd_factor*sd(returns, na.rm = TRUE)),
                  color = "navy", linewidth = 1) +
    theme(axis.text.y = element_blank()) +
    xlab("return") + ylab("density")
}

sp500_monthlyreturns   <- diff(log(sp500_prices), 20)[-c(1:20)]
sp500_quarterlyreturns <- diff(log(sp500_prices), 60)[-c(1:60)]

p1 <- plot_histogram(sp500_returns, shift = 5e-4, sd_factor = 0.50)          + ggtitle("Daily returns")
p2 <- plot_histogram(sp500_monthlyreturns, shift = 1e-2, sd_factor = 0.65)   + ggtitle("Monthly returns")  + ylab(element_blank())
p3 <- plot_histogram(sp500_quarterlyreturns, shift = 2e-2, sd_factor = 0.52) + ggtitle("Quarterly returns") + ylab(element_blank())
p1 | p2 | p3

Histogram of Bitcoin log-returns at different frequencies (with Gaussian fit):

# use data from package pob
data(cryptos_2017to2021)
btc_hourlyprices <- cryptos_2017to2021$hourly[, "BTC"]
btc_hourlyreturns <- diff(log(btc_hourlyprices))[-1]

btc_monthlyreturns   <- diff(log(btc_prices), 30)[-c(1:30)]

p1 <- plot_histogram(btc_hourlyreturns, shift = 5e-4, sd_factor = 0.65)                + ggtitle("Hourly returns")
p2 <- plot_histogram(btc_returns, shift = 5e-4, sd_factor = 0.55, nbins = 40)          + ggtitle("Daily returns")  + ylab(element_blank())
p3 <- plot_histogram(btc_monthlyreturns, shift = -2e-2, sd_factor = 0.75, nbins = 20)  + ggtitle("Monthly returns")  + ylab(element_blank())

p1 | p2 | p3

Skewness

Skewness of S&P 500 log-returns:

library(PerformanceAnalytics)

# compute kurtosis vs frequency (observe aggregational Gaussianity)
SP500_kurtosis <- NULL
SP500_skewness <- NULL
for(l in 1:60) {
  SP500_kurtosis[l] <- kurtosis(diff(log(sp500_prices), l), method="excess")
  SP500_skewness[l] <- skewness(diff(log(sp500_prices), l))
}

btc_kurtosis <- NULL
btc_skewness <- NULL
for(l in 1:60) {
  btc_kurtosis[l] <- kurtosis(diff(log(btc_prices), l), method="excess")
  btc_skewness[l] <- skewness(diff(log(btc_prices), l))
}
ggplot(data.frame("x" = 1:60, "y" = SP500_skewness), aes(x, y)) +
  geom_line(color = "blue", linewidth = 1.2) +
  scale_x_continuous(limits = c(0, 60), breaks = seq(0, 60, by = 10), minor_breaks = NULL) +
  coord_cartesian(ylim = c(-2, 0)) +
  ggtitle("") + xlab("period [days]") + ylab("skewness")

Skewness of Bitcoin log-returns:

ggplot(data.frame("x" = 1:60, "y" = btc_skewness), aes(x, y)) +
  geom_line(color = "blue", linewidth = 1.2) +
  scale_x_continuous(limits = c(0, 60), breaks = seq(0, 60, by = 5), minor_breaks = NULL) +
  ggtitle("") + xlab("period [days]") + ylab("skewness")

Heavy tails and kurtosis

Q-Q plots of S&P 500 log-returns at different frequencies:

p1 <- ggplot(data.frame(y = as.vector(sp500_returns)), aes(sample = y)) +
  stat_qq(color = "blue", alpha = 0.5) +
  stat_qq_line(color = "gray15", linewidth = 1.0) + coord_cartesian(ylim = c(-0.12, 0.12)) +
  theme_bw() + ggtitle("Daily returns") + xlab("theoretical Gaussian quantiles") + ylab("sample quantiles")
p2 <- ggplot(data.frame(y = as.vector(sp500_monthlyreturns)), aes(sample = y)) +
  stat_qq(color = "blue", alpha = 0.5) +
  stat_qq_line(color = "gray15", linewidth = 1.0) + coord_cartesian(ylim = c(-0.22, 0.22)) +
  theme_bw() + ggtitle("Monthly returns") + xlab("theoretical Gaussian quantiles") + ylab(element_blank())
p3 <- ggplot(data.frame(y = as.vector(sp500_quarterlyreturns)), aes(sample = y)) +
  stat_qq(color = "blue", alpha = 0.5) +
  stat_qq_line(color = "gray15", linewidth = 1.0) + coord_cartesian(ylim = c(-0.35, 0.35)) +
  theme_bw() + ggtitle("Quarterly returns") + xlab("theoretical Gaussian quantiles") + ylab(element_blank())

p1 | p2 | p3

Q-Q plots of Bitcoin log-returns at different frequencies:

p1 <- ggplot(data.frame(y = as.vector(btc_hourlyreturns)), aes(sample = y)) +
  stat_qq(color = "blue", alpha = 0.5) +
  stat_qq_line(color = "gray15", linewidth = 1.0) + coord_cartesian(ylim = c(-0.12, 0.12)) +
  theme_bw() + ggtitle("Hourly returns") + xlab("theoretical Gaussian quantiles") + ylab("sample quantiles")
p2 <- ggplot(data.frame(y = as.vector(btc_returns)), aes(sample = y)) +
  stat_qq(color = "blue", alpha = 0.5) +
  stat_qq_line(color = "gray15", linewidth = 1.0) + coord_cartesian(ylim = c(-0.22, 0.22)) +
  theme_bw() + ggtitle("Daily returns") + xlab("theoretical Gaussian quantiles") + ylab(element_blank())
p3 <- ggplot(data.frame(y = as.vector(btc_monthlyreturns)), aes(sample = y)) +
  stat_qq(color = "blue", alpha = 0.5) +
  stat_qq_line(color = "gray15", linewidth = 1.0) + coord_cartesian(ylim = c(-1.00, 1.00)) +
  theme_bw() + ggtitle("Monthly returns") + xlab("theoretical Gaussian quantiles") + ylab(element_blank())

p1 | p2 | p3

Excess kurtosis of S&P 500 log-returns:

ggplot(data.frame("x" = 1:60, "y" = SP500_kurtosis), aes(x, y)) +
  geom_line(color = "blue", linewidth = 1.2) +
  scale_x_continuous(limits = c(0, 60), breaks = seq(0, 60, by = 10), minor_breaks = NULL) +
  scale_y_continuous(limits = c(0, 12), breaks = seq(0, 12, by = 3), minor_breaks = NULL) +
  ggtitle("") + xlab("period [days]") + ylab("excess kurtosis")

Excess kurtosis of Bitcoin log-returns:

ggplot(data.frame("x" = 1:60, "y" = btc_kurtosis), aes(x, y)) +
  geom_line(color = "blue", linewidth = 1.2) +
  scale_x_continuous(limits = c(0, 60), breaks = seq(0, 60, by = 10), minor_breaks = NULL) +
  scale_y_continuous(limits = c(-0.3, 12), breaks = seq(0, 12, by = 3), minor_breaks = NULL) +
  ggtitle("") + xlab("period [days]") + ylab("excess kurtosis")

Temporal structure

Linear structure

Autocorrelation of S&P 500 daily log-returns:

library(ggfortify)

sp500_acf  <- acf(sp500_returns, plot = FALSE)
sp500_pacf <- pacf(sp500_returns, plot = FALSE)

p1 <- autoplot(sp500_acf, conf.int.fill = "blue", conf.int.value = 0.98, conf.int.type = "ma") +
  xlab("lag") + ylab(element_blank()) +
  scale_x_continuous(limits = c(0, NA)) + scale_y_continuous(limits = c(-0.10, 1)) +
  ggtitle("ACF")
  
p2 <- autoplot(sp500_pacf, conf.int.fill = "blue", conf.int.value = 0.98, conf.int.type = "ma") +
  xlab("lag") + ylab(element_blank()) +
  scale_x_continuous(limits = c(0, NA)) + scale_y_continuous(limits = c(-0.10, 1)) +
  ggtitle("PACF")

p1 / p2

Autocorrelation of Bitcoin daily log-returns:

library(ggfortify)

btc_acf  <- acf(btc_returns, plot = FALSE)
btc_pacf <- pacf(btc_returns, plot = FALSE)

p1 <- autoplot(btc_acf, conf.int.fill = "blue", conf.int.value = 0.98, conf.int.type = "ma") +
  xlab("lag") + ylab(element_blank()) +
  scale_x_continuous(limits = c(0, NA)) + scale_y_continuous(limits = c(-0.10, 1)) +
  ggtitle("ACF")
p2 <- autoplot(btc_pacf, conf.int.fill = "blue", conf.int.value = 0.98, conf.int.type = "ma") +
  xlab("lag") + ylab(element_blank()) +
scale_x_continuous(limits = c(0, NA)) + scale_y_continuous(limits = c(-0.10, 1)) +
  ggtitle("PACF")

p1 / p2

Nonlinear structure

Volatility clustering in S&P 500:

# compute envelope
library(stochvol)
res <- svsample(sp500_returns - mean(sp500_returns))
Done!
Summarizing posterior draws...
sp500_envelope <- xts(res$summary$sd[, 1], index(sp500_returns))

# plot with envelope
ggplot(fortify(cbind(sp500_returns, sp500_envelope), melt = TRUE),
       aes(x = Index, y = Value, color = Series, linewidth = Series, alpha = Series)) +
  geom_line(show.legend = FALSE) +
  scale_color_manual(values = c("blue", "red")) +
  scale_linewidth_manual(values = c(0.5, 1.2)) +
  scale_alpha_manual(values = c(0.6, 1)) +
  scale_x_date(date_breaks = "1 year", date_labels = "%Y", date_minor_breaks = "1 month") +
  ggtitle("S&P 500 return and envelope") + xlab(element_blank()) + ylab(element_blank())

Volatility clustering in Bitcoin:

# compute envelope
library(stochvol)
res <- svsample(btc_returns - mean(btc_returns))
Done!
Summarizing posterior draws...
btc_envelope <- xts(res$summary$sd[, 1], index(btc_returns))

# plot with envelope
ggplot(fortify(cbind(btc_returns, btc_envelope), melt = TRUE),
       aes(x = Index, y = Value, color = Series, linewidth = Series, alpha = Series)) +
  geom_line(show.legend = FALSE) +
  scale_color_manual(values = c("blue", "red")) +
  scale_linewidth_manual(values = c(0.5, 1.2)) +
  scale_alpha_manual(values = c(0.6, 1)) +
  scale_x_date(date_breaks = "1 year", date_labels = "%Y", date_minor_breaks = "1 month") +
  ggtitle("BTC return and envelope") + xlab(element_blank()) + ylab(element_blank())

Autocorrelation of absolute value of S&P 500 daily log-returns:

library(ggfortify)

sp500_abs_acf  <- acf(abs(sp500_returns), plot = FALSE)
sp500_abs_pacf <- pacf(abs(sp500_returns), plot = FALSE)

p1 <- autoplot(sp500_abs_acf, conf.int.fill = "blue", conf.int.value = 0.98, conf.int.type = "ma") +
  xlab("lag") + ylab(element_blank()) +
  scale_x_continuous(limits = c(0, NA)) + scale_y_continuous(limits = c(-0.15, 1)) +
  ggtitle("ACF")

p2 <- autoplot(sp500_abs_pacf, conf.int.fill = "blue", conf.int.value = 0.98, conf.int.type = "ma") +
  xlab("lag") + ylab(element_blank()) +
  scale_x_continuous(limits = c(0, NA)) + scale_y_continuous(limits = c(-0.10, 1)) +
  ggtitle("PACF")

p1 / p2

Autocorrelation of absolute value of Bitcoin daily log-returns:

library(ggfortify)

btc_abs_acf  <- acf(abs(btc_returns), plot = FALSE)
btc_abs_pacf <- pacf(abs(btc_returns), plot = FALSE)

p1 <- autoplot(btc_abs_acf, conf.int.fill = "blue", conf.int.value = 0.98, conf.int.type = "ma") +
  xlab("lag") + ylab(element_blank()) +
  scale_x_continuous(limits = c(0, NA)) + scale_y_continuous(limits = c(-0.10, 1)) +
  ggtitle("ACF")
p2 <- autoplot(btc_abs_pacf, conf.int.fill = "blue", conf.int.value = 0.98, conf.int.type = "ma") +
  xlab("lag") + ylab(element_blank()) +
  scale_x_continuous(limits = c(0, NA)) + scale_y_continuous(limits = c(-0.10, 1)) +
  ggtitle("PACF")

p1 / p2

Standardized S&P 500 log-returns after factoring out the volatility envelope:

ggplot(fortify(sp500_returns/sp500_envelope, melt = TRUE), aes(x = Index, y = Value)) + 
  geom_line(linewidth = 0.5, color = "blue", show.legend = FALSE) +
  scale_x_date(date_breaks = "1 year", date_labels = "%Y", date_minor_breaks = "1 month") +
  ggtitle("Standardized S&P 500 log-returns") + xlab(element_blank()) + ylab(element_blank())

Standardized Bitcoin log-returns after factoring out the volatility envelope:

ggplot(fortify(btc_returns/btc_envelope, melt = TRUE), aes(x = Index, y = Value)) +
  geom_line(linewidth = 0.5, color = "blue", show.legend = FALSE) +
  scale_x_date(date_breaks = "1 year", date_labels = "%Y", date_minor_breaks = "1 month") +
  ggtitle("Standardized BTC log-returns") + xlab(element_blank()) + ylab(element_blank())

Asset structure

Correlation matrix of returns for stocks and cryptocurrencies:

library(viridisLite)

# use data from package pob
data(SP500_2015to2020)
set.seed(42)
prices <- tail(SP500_2015to2020$stocks[, sample(ncol(SP500_2015to2020$stocks), 40)], 200)
X <- diff(log(prices))[-1]
cormat_sp500 <- cor(X)

reorder_cormat <- function(cormat){
  dd <- as.dist((1 - cormat)/2)
  hc <- hclust(dd)
  cormat <-cormat[hc$order, hc$order]
}

p_SP500_cormat <- cormat_sp500 |> reorder_cormat() |> melt() |>
  ggplot(aes(x=Var1, y=Var2, fill=value)) + 
  geom_tile() +
  scale_fill_viridis_c(na.value = "white") +
  theme(axis.text.x = element_text(size = 7, angle = 90, hjust = 1, vjust = 1)) +
  theme(legend.position = "none") +
  xlab(element_blank()) + ylab(element_blank()) #+ ggtitle("Correlation matrix of some stocks from S&P 500") 
# use data from package pob
data(cryptos_2017to2021)
prices <- tail(cryptos_2017to2021$hourly[, 1:40], 200)
X <- diff(log(prices))[-1]
cormat_btc <- cor(X)

p_cryptos_cormat <- cormat_btc |> reorder_cormat() |> melt() |>
  ggplot(aes(x=Var1, y=Var2, fill=value)) + 
  geom_tile() +
  scale_fill_viridis_c(na.value = "white") +
  theme(axis.text.x = element_text(size = 7, angle = 90, hjust = 1, vjust = 1)) +
  #theme(legend.position = "none") +
  xlab(element_blank()) + ylab(element_blank()) #+ ggtitle("Correlation matrix of some stocks from S&P 500")
(p_SP500_cormat + ggtitle("S&P 500 stocks")) | (p_cryptos_cormat + ggtitle("Cryptocurrencies"))