# basic finance
library(xts) # to manipulate time series of stock data
library(quantmod) # to download stock data
library(pob) # book package with financial data
# install with: devtools::install_github("dppalomar/pob")
library(stochvol) # to compute volatility envelope
# plotting
library(ggplot2) # for nice plots
library(patchwork) # for combining plots
library(viridisLite) # for nice colors
library(reshape2) # to reshape dataR Code for Portfolio Optimization
Chapter 2 – Financial Data: Stylized Facts
Chapter 2 – Financial Data: Stylized Facts
Daniel P. Palomar (2025). Portfolio Optimization: Theory and Application. Cambridge University Press.
Last update: February 17, 2025
Loading packages
The following packages are used in the examples:
Prices and returns
Price time series of S&P 500:
# get data from Yahoo!Finance
library(quantmod)
sp500_prices <- Ad(getSymbols("^GSPC", from = "2007-01-01", to = "2022-11-04", auto.assign = FALSE))
ggplot(fortify(sp500_prices, melt = TRUE), aes(x = Index, y = Value)) +
geom_line(linewidth = 0.8, color = "blue", show.legend = FALSE) +
scale_x_date(date_breaks = "1 year", date_labels = "%Y", date_minor_breaks = "1 month") +
scale_y_log10() +
xlab(element_blank()) + ylab(element_blank()) + ggtitle("S&P 500 price") Price time series of Bitcoin:
# get data from Yahoo!Finance
btc_prices <- Ad(getSymbols("BTC-USD", from = "2017-01-01", to = "2022-11-04", auto.assign = FALSE))
ggplot(fortify(btc_prices, melt = TRUE), aes(x = Index, y = Value)) +
geom_line(linewidth = 0.8, color = "blue", show.legend = FALSE) +
scale_x_date(date_breaks = "1 year", date_labels = "%Y", date_minor_breaks = "1 month") +
scale_y_log10() +
ggtitle("BTC price") + xlab(element_blank()) + ylab(element_blank())Daily log-return time series of S&P 500:
sp500_returns <- diff(log(sp500_prices))[-1]
#sp500_linreturns <- exp(sp500_returns) - 1
ggplot(fortify(sp500_returns, melt = TRUE), aes(x = Index, y = Value)) +
geom_line(linewidth = 0.5, color = "blue", show.legend = FALSE) +
scale_x_date(date_breaks = "1 year", date_labels = "%Y", date_minor_breaks = "1 month") +
ggtitle("S&P 500 log-returns") + xlab(element_blank()) + ylab(element_blank())We can observe: High-volatility period during global financial crisis in 2008, as well as the high peak in volatility in early 2020 due to the COVID-19 pandemic.
Daily log-return time series of Bitcoin:
btc_returns <- diff(log(btc_prices))[-1]
#btc_returns <- na.omit(btc_returns)
#btc_linreturns <- exp(btc_returns) - 1
ggplot(fortify(btc_returns, melt = TRUE), aes(x = Index, y = Value)) +
geom_line(linewidth = 0.5, color = "blue", show.legend = FALSE) +
scale_x_date(date_breaks = "1 year", date_labels = "%Y", date_minor_breaks = "1 month") +
ggtitle("BTC log-returns") + xlab(element_blank()) + ylab(element_blank())We can observe: Bitcoin flash crash on March 12, 2020, with a drop close to 50% in a single day.
Non-Gaussianity
Asymmetry and heavy tails
Histogram of S&P 500 log-returns at different frequencies (with Gaussian fit):
plot_histogram <- function(returns, shift = 0.0, sd_factor = 1, nbins = 50) {
ggplot(fortify(returns, melt = TRUE), aes(x = Value)) +
geom_histogram(aes(y = after_stat(density)), bins = nbins, fill = "blue", alpha = 0.3, col = "gray31") +
stat_function(fun = dnorm, args = list(mean = mean(returns, na.rm = TRUE) + shift, sd = sd_factor*sd(returns, na.rm = TRUE)),
color = "navy", linewidth = 1) +
theme(axis.text.y = element_blank()) +
xlab("return") + ylab("density")
}
sp500_monthlyreturns <- diff(log(sp500_prices), 20)[-c(1:20)]
sp500_quarterlyreturns <- diff(log(sp500_prices), 60)[-c(1:60)]
p1 <- plot_histogram(sp500_returns, shift = 5e-4, sd_factor = 0.50) + ggtitle("Daily returns")
p2 <- plot_histogram(sp500_monthlyreturns, shift = 1e-2, sd_factor = 0.65) + ggtitle("Monthly returns") + ylab(element_blank())
p3 <- plot_histogram(sp500_quarterlyreturns, shift = 2e-2, sd_factor = 0.52) + ggtitle("Quarterly returns") + ylab(element_blank())
p1 | p2 | p3Histogram of Bitcoin log-returns at different frequencies (with Gaussian fit):
# use data from package pob
data(cryptos_2017to2021)
btc_hourlyprices <- cryptos_2017to2021$hourly[, "BTC"]
btc_hourlyreturns <- diff(log(btc_hourlyprices))[-1]
btc_monthlyreturns <- diff(log(btc_prices), 30)[-c(1:30)]
p1 <- plot_histogram(btc_hourlyreturns, shift = 5e-4, sd_factor = 0.65) + ggtitle("Hourly returns")
p2 <- plot_histogram(btc_returns, shift = 5e-4, sd_factor = 0.55, nbins = 40) + ggtitle("Daily returns") + ylab(element_blank())
p3 <- plot_histogram(btc_monthlyreturns, shift = -2e-2, sd_factor = 0.75, nbins = 20) + ggtitle("Monthly returns") + ylab(element_blank())
p1 | p2 | p3Skewness
Skewness of S&P 500 log-returns:
library(PerformanceAnalytics)
# compute kurtosis vs frequency (observe aggregational Gaussianity)
SP500_kurtosis <- NULL
SP500_skewness <- NULL
for(l in 1:60) {
SP500_kurtosis[l] <- kurtosis(diff(log(sp500_prices), l), method="excess")
SP500_skewness[l] <- skewness(diff(log(sp500_prices), l))
}
btc_kurtosis <- NULL
btc_skewness <- NULL
for(l in 1:60) {
btc_kurtosis[l] <- kurtosis(diff(log(btc_prices), l), method="excess")
btc_skewness[l] <- skewness(diff(log(btc_prices), l))
}ggplot(data.frame("x" = 1:60, "y" = SP500_skewness), aes(x, y)) +
geom_line(color = "blue", linewidth = 1.2) +
scale_x_continuous(limits = c(0, 60), breaks = seq(0, 60, by = 10), minor_breaks = NULL) +
coord_cartesian(ylim = c(-2, 0)) +
ggtitle("") + xlab("period [days]") + ylab("skewness")Skewness of Bitcoin log-returns:
ggplot(data.frame("x" = 1:60, "y" = btc_skewness), aes(x, y)) +
geom_line(color = "blue", linewidth = 1.2) +
scale_x_continuous(limits = c(0, 60), breaks = seq(0, 60, by = 5), minor_breaks = NULL) +
ggtitle("") + xlab("period [days]") + ylab("skewness")Heavy tails and kurtosis
Q-Q plots of S&P 500 log-returns at different frequencies:
p1 <- ggplot(data.frame(y = as.vector(sp500_returns)), aes(sample = y)) +
stat_qq(color = "blue", alpha = 0.5) +
stat_qq_line(color = "gray15", linewidth = 1.0) + coord_cartesian(ylim = c(-0.12, 0.12)) +
theme_bw() + ggtitle("Daily returns") + xlab("theoretical Gaussian quantiles") + ylab("sample quantiles")
p2 <- ggplot(data.frame(y = as.vector(sp500_monthlyreturns)), aes(sample = y)) +
stat_qq(color = "blue", alpha = 0.5) +
stat_qq_line(color = "gray15", linewidth = 1.0) + coord_cartesian(ylim = c(-0.22, 0.22)) +
theme_bw() + ggtitle("Monthly returns") + xlab("theoretical Gaussian quantiles") + ylab(element_blank())
p3 <- ggplot(data.frame(y = as.vector(sp500_quarterlyreturns)), aes(sample = y)) +
stat_qq(color = "blue", alpha = 0.5) +
stat_qq_line(color = "gray15", linewidth = 1.0) + coord_cartesian(ylim = c(-0.35, 0.35)) +
theme_bw() + ggtitle("Quarterly returns") + xlab("theoretical Gaussian quantiles") + ylab(element_blank())
p1 | p2 | p3Q-Q plots of Bitcoin log-returns at different frequencies:
p1 <- ggplot(data.frame(y = as.vector(btc_hourlyreturns)), aes(sample = y)) +
stat_qq(color = "blue", alpha = 0.5) +
stat_qq_line(color = "gray15", linewidth = 1.0) + coord_cartesian(ylim = c(-0.12, 0.12)) +
theme_bw() + ggtitle("Hourly returns") + xlab("theoretical Gaussian quantiles") + ylab("sample quantiles")
p2 <- ggplot(data.frame(y = as.vector(btc_returns)), aes(sample = y)) +
stat_qq(color = "blue", alpha = 0.5) +
stat_qq_line(color = "gray15", linewidth = 1.0) + coord_cartesian(ylim = c(-0.22, 0.22)) +
theme_bw() + ggtitle("Daily returns") + xlab("theoretical Gaussian quantiles") + ylab(element_blank())
p3 <- ggplot(data.frame(y = as.vector(btc_monthlyreturns)), aes(sample = y)) +
stat_qq(color = "blue", alpha = 0.5) +
stat_qq_line(color = "gray15", linewidth = 1.0) + coord_cartesian(ylim = c(-1.00, 1.00)) +
theme_bw() + ggtitle("Monthly returns") + xlab("theoretical Gaussian quantiles") + ylab(element_blank())
p1 | p2 | p3Excess kurtosis of S&P 500 log-returns:
ggplot(data.frame("x" = 1:60, "y" = SP500_kurtosis), aes(x, y)) +
geom_line(color = "blue", linewidth = 1.2) +
scale_x_continuous(limits = c(0, 60), breaks = seq(0, 60, by = 10), minor_breaks = NULL) +
scale_y_continuous(limits = c(0, 12), breaks = seq(0, 12, by = 3), minor_breaks = NULL) +
ggtitle("") + xlab("period [days]") + ylab("excess kurtosis")Excess kurtosis of Bitcoin log-returns:
ggplot(data.frame("x" = 1:60, "y" = btc_kurtosis), aes(x, y)) +
geom_line(color = "blue", linewidth = 1.2) +
scale_x_continuous(limits = c(0, 60), breaks = seq(0, 60, by = 10), minor_breaks = NULL) +
scale_y_continuous(limits = c(-0.3, 12), breaks = seq(0, 12, by = 3), minor_breaks = NULL) +
ggtitle("") + xlab("period [days]") + ylab("excess kurtosis")Temporal structure
Linear structure
Autocorrelation of S&P 500 daily log-returns:
library(ggfortify)
sp500_acf <- acf(sp500_returns, plot = FALSE)
sp500_pacf <- pacf(sp500_returns, plot = FALSE)
p1 <- autoplot(sp500_acf, conf.int.fill = "blue", conf.int.value = 0.98, conf.int.type = "ma") +
xlab("lag") + ylab(element_blank()) +
scale_x_continuous(limits = c(0, NA)) + scale_y_continuous(limits = c(-0.10, 1)) +
ggtitle("ACF")
p2 <- autoplot(sp500_pacf, conf.int.fill = "blue", conf.int.value = 0.98, conf.int.type = "ma") +
xlab("lag") + ylab(element_blank()) +
scale_x_continuous(limits = c(0, NA)) + scale_y_continuous(limits = c(-0.10, 1)) +
ggtitle("PACF")
p1 / p2Autocorrelation of Bitcoin daily log-returns:
library(ggfortify)
btc_acf <- acf(btc_returns, plot = FALSE)
btc_pacf <- pacf(btc_returns, plot = FALSE)
p1 <- autoplot(btc_acf, conf.int.fill = "blue", conf.int.value = 0.98, conf.int.type = "ma") +
xlab("lag") + ylab(element_blank()) +
scale_x_continuous(limits = c(0, NA)) + scale_y_continuous(limits = c(-0.10, 1)) +
ggtitle("ACF")
p2 <- autoplot(btc_pacf, conf.int.fill = "blue", conf.int.value = 0.98, conf.int.type = "ma") +
xlab("lag") + ylab(element_blank()) +
scale_x_continuous(limits = c(0, NA)) + scale_y_continuous(limits = c(-0.10, 1)) +
ggtitle("PACF")
p1 / p2Nonlinear structure
Volatility clustering in S&P 500:
# compute envelope
library(stochvol)
res <- svsample(sp500_returns - mean(sp500_returns))Done!
Summarizing posterior draws...
sp500_envelope <- xts(res$summary$sd[, 1], index(sp500_returns))
# plot with envelope
ggplot(fortify(cbind(sp500_returns, sp500_envelope), melt = TRUE),
aes(x = Index, y = Value, color = Series, linewidth = Series, alpha = Series)) +
geom_line(show.legend = FALSE) +
scale_color_manual(values = c("blue", "red")) +
scale_linewidth_manual(values = c(0.5, 1.2)) +
scale_alpha_manual(values = c(0.6, 1)) +
scale_x_date(date_breaks = "1 year", date_labels = "%Y", date_minor_breaks = "1 month") +
ggtitle("S&P 500 return and envelope") + xlab(element_blank()) + ylab(element_blank())Volatility clustering in Bitcoin:
# compute envelope
library(stochvol)
res <- svsample(btc_returns - mean(btc_returns))Done!
Summarizing posterior draws...
btc_envelope <- xts(res$summary$sd[, 1], index(btc_returns))
# plot with envelope
ggplot(fortify(cbind(btc_returns, btc_envelope), melt = TRUE),
aes(x = Index, y = Value, color = Series, linewidth = Series, alpha = Series)) +
geom_line(show.legend = FALSE) +
scale_color_manual(values = c("blue", "red")) +
scale_linewidth_manual(values = c(0.5, 1.2)) +
scale_alpha_manual(values = c(0.6, 1)) +
scale_x_date(date_breaks = "1 year", date_labels = "%Y", date_minor_breaks = "1 month") +
ggtitle("BTC return and envelope") + xlab(element_blank()) + ylab(element_blank())Autocorrelation of absolute value of S&P 500 daily log-returns:
library(ggfortify)
sp500_abs_acf <- acf(abs(sp500_returns), plot = FALSE)
sp500_abs_pacf <- pacf(abs(sp500_returns), plot = FALSE)
p1 <- autoplot(sp500_abs_acf, conf.int.fill = "blue", conf.int.value = 0.98, conf.int.type = "ma") +
xlab("lag") + ylab(element_blank()) +
scale_x_continuous(limits = c(0, NA)) + scale_y_continuous(limits = c(-0.15, 1)) +
ggtitle("ACF")
p2 <- autoplot(sp500_abs_pacf, conf.int.fill = "blue", conf.int.value = 0.98, conf.int.type = "ma") +
xlab("lag") + ylab(element_blank()) +
scale_x_continuous(limits = c(0, NA)) + scale_y_continuous(limits = c(-0.10, 1)) +
ggtitle("PACF")
p1 / p2Autocorrelation of absolute value of Bitcoin daily log-returns:
library(ggfortify)
btc_abs_acf <- acf(abs(btc_returns), plot = FALSE)
btc_abs_pacf <- pacf(abs(btc_returns), plot = FALSE)
p1 <- autoplot(btc_abs_acf, conf.int.fill = "blue", conf.int.value = 0.98, conf.int.type = "ma") +
xlab("lag") + ylab(element_blank()) +
scale_x_continuous(limits = c(0, NA)) + scale_y_continuous(limits = c(-0.10, 1)) +
ggtitle("ACF")
p2 <- autoplot(btc_abs_pacf, conf.int.fill = "blue", conf.int.value = 0.98, conf.int.type = "ma") +
xlab("lag") + ylab(element_blank()) +
scale_x_continuous(limits = c(0, NA)) + scale_y_continuous(limits = c(-0.10, 1)) +
ggtitle("PACF")
p1 / p2Standardized S&P 500 log-returns after factoring out the volatility envelope:
ggplot(fortify(sp500_returns/sp500_envelope, melt = TRUE), aes(x = Index, y = Value)) +
geom_line(linewidth = 0.5, color = "blue", show.legend = FALSE) +
scale_x_date(date_breaks = "1 year", date_labels = "%Y", date_minor_breaks = "1 month") +
ggtitle("Standardized S&P 500 log-returns") + xlab(element_blank()) + ylab(element_blank())Standardized Bitcoin log-returns after factoring out the volatility envelope:
ggplot(fortify(btc_returns/btc_envelope, melt = TRUE), aes(x = Index, y = Value)) +
geom_line(linewidth = 0.5, color = "blue", show.legend = FALSE) +
scale_x_date(date_breaks = "1 year", date_labels = "%Y", date_minor_breaks = "1 month") +
ggtitle("Standardized BTC log-returns") + xlab(element_blank()) + ylab(element_blank())Asset structure
Correlation matrix of returns for stocks and cryptocurrencies:
library(viridisLite)
# use data from package pob
data(SP500_2015to2020)
set.seed(42)
prices <- tail(SP500_2015to2020$stocks[, sample(ncol(SP500_2015to2020$stocks), 40)], 200)
X <- diff(log(prices))[-1]
cormat_sp500 <- cor(X)
reorder_cormat <- function(cormat){
dd <- as.dist((1 - cormat)/2)
hc <- hclust(dd)
cormat <-cormat[hc$order, hc$order]
}
p_SP500_cormat <- cormat_sp500 |> reorder_cormat() |> melt() |>
ggplot(aes(x=Var1, y=Var2, fill=value)) +
geom_tile() +
scale_fill_viridis_c(na.value = "white") +
theme(axis.text.x = element_text(size = 7, angle = 90, hjust = 1, vjust = 1)) +
theme(legend.position = "none") +
xlab(element_blank()) + ylab(element_blank()) #+ ggtitle("Correlation matrix of some stocks from S&P 500") # use data from package pob
data(cryptos_2017to2021)
prices <- tail(cryptos_2017to2021$hourly[, 1:40], 200)
X <- diff(log(prices))[-1]
cormat_btc <- cor(X)
p_cryptos_cormat <- cormat_btc |> reorder_cormat() |> melt() |>
ggplot(aes(x=Var1, y=Var2, fill=value)) +
geom_tile() +
scale_fill_viridis_c(na.value = "white") +
theme(axis.text.x = element_text(size = 7, angle = 90, hjust = 1, vjust = 1)) +
#theme(legend.position = "none") +
xlab(element_blank()) + ylab(element_blank()) #+ ggtitle("Correlation matrix of some stocks from S&P 500")(p_SP500_cormat + ggtitle("S&P 500 stocks")) | (p_cryptos_cormat + ggtitle("Cryptocurrencies"))