# basic finance
library(xts) # to manipulate time series of stock data
library(quantmod) # to download stock data
library(pob) # book package with financial data
# install with: devtools::install_github("dppalomar/pob")
library(stochvol) # to compute volatility envelope
# plotting
library(ggplot2) # for nice plots
library(patchwork) # for combining plots
library(viridisLite) # for nice colors
library(reshape2) # to reshape data
Portfolio Optimization
Chapter 2 - Financial Data: Stylized Facts
R code
R code examples for Chapter 2 of the book:
Daniel P. Palomar (2024). Portfolio Optimization: Theory and Application. Cambridge University Press.
Loading packages
The following packages are used in the examples:
Prices and returns
Price time series of S&P 500:
# get data from Yahoo!Finance
library(quantmod)
<- Ad(getSymbols("^GSPC", from = "2007-01-01", to = "2022-11-04", auto.assign = FALSE))
sp500_prices
ggplot(fortify(sp500_prices, melt = TRUE), aes(x = Index, y = Value)) +
geom_line(linewidth = 0.8, color = "blue", show.legend = FALSE) +
scale_x_date(date_breaks = "1 year", date_labels = "%Y", date_minor_breaks = "1 month") +
scale_y_log10() +
xlab(element_blank()) + ylab(element_blank()) + ggtitle("S&P 500 price")
Price time series of Bitcoin:
# get data from Yahoo!Finance
<- Ad(getSymbols("BTC-USD", from = "2017-01-01", to = "2022-11-04", auto.assign = FALSE))
btc_prices
ggplot(fortify(btc_prices, melt = TRUE), aes(x = Index, y = Value)) +
geom_line(linewidth = 0.8, color = "blue", show.legend = FALSE) +
scale_x_date(date_breaks = "1 year", date_labels = "%Y", date_minor_breaks = "1 month") +
scale_y_log10() +
ggtitle("BTC price") + xlab(element_blank()) + ylab(element_blank())
Daily log-return time series of S&P 500:
<- diff(log(sp500_prices))[-1]
sp500_returns #sp500_linreturns <- exp(sp500_returns) - 1
ggplot(fortify(sp500_returns, melt = TRUE), aes(x = Index, y = Value)) +
geom_line(linewidth = 0.5, color = "blue", show.legend = FALSE) +
scale_x_date(date_breaks = "1 year", date_labels = "%Y", date_minor_breaks = "1 month") +
ggtitle("S&P 500 log-returns") + xlab(element_blank()) + ylab(element_blank())
We can observe: High-volatility period during global financial crisis in 2008, as well as the high peak in volatility in early 2020 due to the COVID-19 pandemic.
Daily log-return time series of Bitcoin:
<- diff(log(btc_prices))[-1]
btc_returns #btc_returns <- na.omit(btc_returns)
#btc_linreturns <- exp(btc_returns) - 1
ggplot(fortify(btc_returns, melt = TRUE), aes(x = Index, y = Value)) +
geom_line(linewidth = 0.5, color = "blue", show.legend = FALSE) +
scale_x_date(date_breaks = "1 year", date_labels = "%Y", date_minor_breaks = "1 month") +
ggtitle("BTC log-returns") + xlab(element_blank()) + ylab(element_blank())
We can observe: Bitcoin flash crash on March 12, 2020, with a drop close to 50% in a single day.
Non-Gaussianity
Asymmetry and heavy tails
Histogram of S&P 500 log-returns at different frequencies (with Gaussian fit):
<- function(returns, shift = 0.0, sd_factor = 1, nbins = 50) {
plot_histogram ggplot(fortify(returns, melt = TRUE), aes(x = Value)) +
geom_histogram(aes(y = after_stat(density)), bins = nbins, fill = "blue", alpha = 0.3, col = "gray31") +
stat_function(fun = dnorm, args = list(mean = mean(returns, na.rm = TRUE) + shift, sd = sd_factor*sd(returns, na.rm = TRUE)),
color = "navy", linewidth = 1) +
theme(axis.text.y = element_blank()) +
xlab("return") + ylab("density")
}
<- diff(log(sp500_prices), 20)[-c(1:20)]
sp500_monthlyreturns <- diff(log(sp500_prices), 60)[-c(1:60)]
sp500_quarterlyreturns
<- plot_histogram(sp500_returns, shift = 5e-4, sd_factor = 0.50) + ggtitle("Daily returns")
p1 <- plot_histogram(sp500_monthlyreturns, shift = 1e-2, sd_factor = 0.65) + ggtitle("Monthly returns") + ylab(element_blank())
p2 <- plot_histogram(sp500_quarterlyreturns, shift = 2e-2, sd_factor = 0.52) + ggtitle("Quarterly returns") + ylab(element_blank())
p3 | p2 | p3 p1
Histogram of Bitcoin log-returns at different frequencies (with Gaussian fit):
# use data from package pob
data(cryptos_2017to2021)
<- cryptos_2017to2021$hourly[, "BTC"]
btc_hourlyprices <- diff(log(btc_hourlyprices))[-1]
btc_hourlyreturns
<- diff(log(btc_prices), 30)[-c(1:30)]
btc_monthlyreturns
<- plot_histogram(btc_hourlyreturns, shift = 5e-4, sd_factor = 0.65) + ggtitle("Hourly returns")
p1 <- plot_histogram(btc_returns, shift = 5e-4, sd_factor = 0.55, nbins = 40) + ggtitle("Daily returns") + ylab(element_blank())
p2 <- plot_histogram(btc_monthlyreturns, shift = -2e-2, sd_factor = 0.75, nbins = 20) + ggtitle("Monthly returns") + ylab(element_blank())
p3
| p2 | p3 p1
Skewness
Skewness of S&P 500 log-returns:
library(PerformanceAnalytics)
Attaching package: 'PerformanceAnalytics'
The following object is masked from 'package:graphics':
legend
# compute kurtosis vs frequency (observe aggregational Gaussianity)
<- NULL
SP500_kurtosis <- NULL
SP500_skewness for(l in 1:60) {
<- kurtosis(diff(log(sp500_prices), l), method="excess")
SP500_kurtosis[l] <- skewness(diff(log(sp500_prices), l))
SP500_skewness[l]
}
<- NULL
btc_kurtosis <- NULL
btc_skewness for(l in 1:60) {
<- kurtosis(diff(log(btc_prices), l), method="excess")
btc_kurtosis[l] <- skewness(diff(log(btc_prices), l))
btc_skewness[l] }
ggplot(data.frame("x" = 1:60, "y" = SP500_skewness), aes(x, y)) +
geom_line(color = "blue", linewidth = 1.2) +
scale_x_continuous(limits = c(0, 60), breaks = seq(0, 60, by = 10), minor_breaks = NULL) +
coord_cartesian(ylim = c(-2, 0)) +
ggtitle("") + xlab("period [days]") + ylab("skewness")
Skewness of Bitcoin log-returns:
ggplot(data.frame("x" = 1:60, "y" = btc_skewness), aes(x, y)) +
geom_line(color = "blue", linewidth = 1.2) +
scale_x_continuous(limits = c(0, 60), breaks = seq(0, 60, by = 5), minor_breaks = NULL) +
ggtitle("") + xlab("period [days]") + ylab("skewness")
Heavy tails and kurtosis
Q-Q plots of S&P 500 log-returns at different frequencies:
<- ggplot(data.frame(y = as.vector(sp500_returns)), aes(sample = y)) +
p1 stat_qq(color = "blue", alpha = 0.5) +
stat_qq_line(color = "gray15", linewidth = 1.0) + coord_cartesian(ylim = c(-0.12, 0.12)) +
theme_bw() + ggtitle("Daily returns") + xlab("theoretical Gaussian quantiles") + ylab("sample quantiles")
<- ggplot(data.frame(y = as.vector(sp500_monthlyreturns)), aes(sample = y)) +
p2 stat_qq(color = "blue", alpha = 0.5) +
stat_qq_line(color = "gray15", linewidth = 1.0) + coord_cartesian(ylim = c(-0.22, 0.22)) +
theme_bw() + ggtitle("Monthly returns") + xlab("theoretical Gaussian quantiles") + ylab(element_blank())
<- ggplot(data.frame(y = as.vector(sp500_quarterlyreturns)), aes(sample = y)) +
p3 stat_qq(color = "blue", alpha = 0.5) +
stat_qq_line(color = "gray15", linewidth = 1.0) + coord_cartesian(ylim = c(-0.35, 0.35)) +
theme_bw() + ggtitle("Quarterly returns") + xlab("theoretical Gaussian quantiles") + ylab(element_blank())
| p2 | p3 p1
Q-Q plots of Bitcoin log-returns at different frequencies:
<- ggplot(data.frame(y = as.vector(btc_hourlyreturns)), aes(sample = y)) +
p1 stat_qq(color = "blue", alpha = 0.5) +
stat_qq_line(color = "gray15", linewidth = 1.0) + coord_cartesian(ylim = c(-0.12, 0.12)) +
theme_bw() + ggtitle("Hourly returns") + xlab("theoretical Gaussian quantiles") + ylab("sample quantiles")
<- ggplot(data.frame(y = as.vector(btc_returns)), aes(sample = y)) +
p2 stat_qq(color = "blue", alpha = 0.5) +
stat_qq_line(color = "gray15", linewidth = 1.0) + coord_cartesian(ylim = c(-0.22, 0.22)) +
theme_bw() + ggtitle("Daily returns") + xlab("theoretical Gaussian quantiles") + ylab(element_blank())
<- ggplot(data.frame(y = as.vector(btc_monthlyreturns)), aes(sample = y)) +
p3 stat_qq(color = "blue", alpha = 0.5) +
stat_qq_line(color = "gray15", linewidth = 1.0) + coord_cartesian(ylim = c(-1.00, 1.00)) +
theme_bw() + ggtitle("Monthly returns") + xlab("theoretical Gaussian quantiles") + ylab(element_blank())
| p2 | p3 p1
Excess kurtosis of S&P 500 log-returns:
ggplot(data.frame("x" = 1:60, "y" = SP500_kurtosis), aes(x, y)) +
geom_line(color = "blue", linewidth = 1.2) +
scale_x_continuous(limits = c(0, 60), breaks = seq(0, 60, by = 10), minor_breaks = NULL) +
scale_y_continuous(limits = c(0, 12), breaks = seq(0, 12, by = 3), minor_breaks = NULL) +
ggtitle("") + xlab("period [days]") + ylab("excess kurtosis")
Excess kurtosis of Bitcoin log-returns:
ggplot(data.frame("x" = 1:60, "y" = btc_kurtosis), aes(x, y)) +
geom_line(color = "blue", linewidth = 1.2) +
scale_x_continuous(limits = c(0, 60), breaks = seq(0, 60, by = 10), minor_breaks = NULL) +
scale_y_continuous(limits = c(-0.3, 12), breaks = seq(0, 12, by = 3), minor_breaks = NULL) +
ggtitle("") + xlab("period [days]") + ylab("excess kurtosis")
Temporal structure
Linear structure
Autocorrelation of S&P 500 daily log-returns:
library(ggfortify)
<- acf(sp500_returns, plot = FALSE)
sp500_acf <- pacf(sp500_returns, plot = FALSE)
sp500_pacf
<- autoplot(sp500_acf, conf.int.fill = "blue", conf.int.value = 0.98, conf.int.type = "ma") +
p1 xlab("lag") + ylab(element_blank()) +
scale_x_continuous(limits = c(0, NA)) + scale_y_continuous(limits = c(-0.10, 1)) +
ggtitle("ACF")
<- autoplot(sp500_pacf, conf.int.fill = "blue", conf.int.value = 0.98, conf.int.type = "ma") +
p2 xlab("lag") + ylab(element_blank()) +
scale_x_continuous(limits = c(0, NA)) + scale_y_continuous(limits = c(-0.10, 1)) +
ggtitle("PACF")
/ p2 p1
Autocorrelation of Bitcoin daily log-returns:
library(ggfortify)
<- acf(btc_returns, plot = FALSE)
btc_acf <- pacf(btc_returns, plot = FALSE)
btc_pacf
<- autoplot(btc_acf, conf.int.fill = "blue", conf.int.value = 0.98, conf.int.type = "ma") +
p1 xlab("lag") + ylab(element_blank()) +
scale_x_continuous(limits = c(0, NA)) + scale_y_continuous(limits = c(-0.10, 1)) +
ggtitle("ACF")
<- autoplot(btc_pacf, conf.int.fill = "blue", conf.int.value = 0.98, conf.int.type = "ma") +
p2 xlab("lag") + ylab(element_blank()) +
scale_x_continuous(limits = c(0, NA)) + scale_y_continuous(limits = c(-0.10, 1)) +
ggtitle("PACF")
/ p2 p1
Nonlinear structure
Volatility clustering in S&P 500:
# compute envelope
library(stochvol)
<- svsample(sp500_returns - mean(sp500_returns)) res
Done!
Summarizing posterior draws...
<- xts(res$summary$sd[, 1], index(sp500_returns))
sp500_envelope
# plot with envelope
ggplot(fortify(cbind(sp500_returns, sp500_envelope), melt = TRUE),
aes(x = Index, y = Value, color = Series, linewidth = Series, alpha = Series)) +
geom_line(show.legend = FALSE) +
scale_color_manual(values = c("blue", "red")) +
scale_linewidth_manual(values = c(0.5, 1.2)) +
scale_alpha_manual(values = c(0.6, 1)) +
scale_x_date(date_breaks = "1 year", date_labels = "%Y", date_minor_breaks = "1 month") +
ggtitle("S&P 500 return and envelope") + xlab(element_blank()) + ylab(element_blank())
Volatility clustering in Bitcoin:
# compute envelope
library(stochvol)
<- svsample(btc_returns - mean(btc_returns)) res
Done!
Summarizing posterior draws...
<- xts(res$summary$sd[, 1], index(btc_returns))
btc_envelope
# plot with envelope
ggplot(fortify(cbind(btc_returns, btc_envelope), melt = TRUE),
aes(x = Index, y = Value, color = Series, linewidth = Series, alpha = Series)) +
geom_line(show.legend = FALSE) +
scale_color_manual(values = c("blue", "red")) +
scale_linewidth_manual(values = c(0.5, 1.2)) +
scale_alpha_manual(values = c(0.6, 1)) +
scale_x_date(date_breaks = "1 year", date_labels = "%Y", date_minor_breaks = "1 month") +
ggtitle("BTC return and envelope") + xlab(element_blank()) + ylab(element_blank())
Autocorrelation of absolute value of S&P 500 daily log-returns:
library(ggfortify)
<- acf(abs(sp500_returns), plot = FALSE)
sp500_abs_acf <- pacf(abs(sp500_returns), plot = FALSE)
sp500_abs_pacf
<- autoplot(sp500_abs_acf, conf.int.fill = "blue", conf.int.value = 0.98, conf.int.type = "ma") +
p1 xlab("lag") + ylab(element_blank()) +
scale_x_continuous(limits = c(0, NA)) + scale_y_continuous(limits = c(-0.15, 1)) +
ggtitle("ACF")
<- autoplot(sp500_abs_pacf, conf.int.fill = "blue", conf.int.value = 0.98, conf.int.type = "ma") +
p2 xlab("lag") + ylab(element_blank()) +
scale_x_continuous(limits = c(0, NA)) + scale_y_continuous(limits = c(-0.10, 1)) +
ggtitle("PACF")
/ p2 p1
Autocorrelation of absolute value of Bitcoin daily log-returns:
library(ggfortify)
<- acf(abs(btc_returns), plot = FALSE)
btc_abs_acf <- pacf(abs(btc_returns), plot = FALSE)
btc_abs_pacf
<- autoplot(btc_abs_acf, conf.int.fill = "blue", conf.int.value = 0.98, conf.int.type = "ma") +
p1 xlab("lag") + ylab(element_blank()) +
scale_x_continuous(limits = c(0, NA)) + scale_y_continuous(limits = c(-0.10, 1)) +
ggtitle("ACF")
<- autoplot(btc_abs_pacf, conf.int.fill = "blue", conf.int.value = 0.98, conf.int.type = "ma") +
p2 xlab("lag") + ylab(element_blank()) +
scale_x_continuous(limits = c(0, NA)) + scale_y_continuous(limits = c(-0.10, 1)) +
ggtitle("PACF")
/ p2 p1
Standardized S&P 500 log-returns after factoring out the volatility envelope:
ggplot(fortify(sp500_returns/sp500_envelope, melt = TRUE), aes(x = Index, y = Value)) +
geom_line(linewidth = 0.5, color = "blue", show.legend = FALSE) +
scale_x_date(date_breaks = "1 year", date_labels = "%Y", date_minor_breaks = "1 month") +
ggtitle("Standardized S&P 500 log-returns") + xlab(element_blank()) + ylab(element_blank())
Standardized Bitcoin log-returns after factoring out the volatility envelope:
ggplot(fortify(btc_returns/btc_envelope, melt = TRUE), aes(x = Index, y = Value)) +
geom_line(linewidth = 0.5, color = "blue", show.legend = FALSE) +
scale_x_date(date_breaks = "1 year", date_labels = "%Y", date_minor_breaks = "1 month") +
ggtitle("Standardized BTC log-returns") + xlab(element_blank()) + ylab(element_blank())
Asset structure
Correlation matrix of returns for stocks and cryptocurrencies:
library(viridisLite)
# use data from package pob
data(SP500_2015to2020)
set.seed(42)
<- tail(SP500_2015to2020$stocks[, sample(ncol(SP500_2015to2020$stocks), 40)], 200)
prices <- diff(log(prices))[-1]
X <- cor(X)
cormat_sp500
<- function(cormat){
reorder_cormat <- as.dist((1 - cormat)/2)
dd <- hclust(dd)
hc <-cormat[hc$order, hc$order]
cormat
}
<- cormat_sp500 |> reorder_cormat() |> melt() |>
p_SP500_cormat ggplot(aes(x=Var1, y=Var2, fill=value)) +
geom_tile() +
scale_fill_viridis_c(na.value = "white") +
theme(axis.text.x = element_text(size = 7, angle = 90, hjust = 1, vjust = 1)) +
theme(legend.position = "none") +
xlab(element_blank()) + ylab(element_blank()) #+ ggtitle("Correlation matrix of some stocks from S&P 500")
# use data from package pob
data(cryptos_2017to2021)
<- tail(cryptos_2017to2021$hourly[, 1:40], 200)
prices <- diff(log(prices))[-1]
X <- cor(X)
cormat_btc
<- cormat_btc |> reorder_cormat() |> melt() |>
p_cryptos_cormat ggplot(aes(x=Var1, y=Var2, fill=value)) +
geom_tile() +
scale_fill_viridis_c(na.value = "white") +
theme(axis.text.x = element_text(size = 7, angle = 90, hjust = 1, vjust = 1)) +
#theme(legend.position = "none") +
xlab(element_blank()) + ylab(element_blank()) #+ ggtitle("Correlation matrix of some stocks from S&P 500")
+ ggtitle("S&P 500 stocks")) | (p_cryptos_cormat + ggtitle("Cryptocurrencies")) (p_SP500_cormat