# basic finance
library(xts) # to manipulate time series of stock data
library(portfolioBacktest) # to conduct backtests
library(pob) # book package with financial data
# plotting
library(ggplot2) # for nice plots
library(ggdendro) # to plot dendograms
library(reshape2) # to reshape data
library(patchwork) # for combining plots
# graphs
library(fingraph) # to learn financial graphs
# optimization
library(CVXR)
Portfolio Optimization
Chapter 12: Graph-Based Portfolios
R code
R code examples for Chapter 12 of the book:
Daniel P. Palomar (2024). Portfolio Optimization: Theory and Application. Cambridge University Press.
Loading packages
First load the packages used in the examples:
Hierarchical clustering
Dendograms
Plots of dendrograms of S&P 500 stocks (with cut to produce four clusters):
Code
library(pob)
library(ggdendro)
# use data from package pob
data(SP500_2015to2020)
set.seed(42)
<- tail(SP500_2015to2020$stocks[, c("MGM", "WYNN", "LVS", "CMS", "DUK", "BAC", "BK", "USB", "KMB", "PG", "FB", "AMZN", "MSFT", "GOOGL")], 200)
stock_prices <- stock_prices[, sample(ncol(stock_prices))]
stock_prices <- diff(log(stock_prices))[-1]
X <- sqrt(0.5*(1 - cor(X)))
D
<- hclust(dist(D), method = "single") |>
p_single ggdendrogram(rotate = FALSE, size = 4, theme_dendro = TRUE) +
geom_hline(yintercept = 0.66, color = "blue", linetype = "dashed") +
theme(panel.border = element_blank(),
axis.text.y=element_blank()) +
xlab(element_blank()) + ylab(element_blank()) + ggtitle("Single linkage")
<- hclust(dist(D), method = "complete") |>
p_complete ggdendrogram(rotate = FALSE, size = 4, theme_dendro = TRUE) +
geom_hline(yintercept = 0.72, color = "blue", linetype = "dashed") +
theme(panel.border = element_blank(),
axis.text.y=element_blank()) +
xlab(element_blank()) + ylab(element_blank()) + ggtitle("Complete linkage")
<- hclust(dist(D), method = "average") |>
p_average ggdendrogram(rotate = FALSE, size = 4, theme_dendro = TRUE) +
geom_hline(yintercept = 0.70, color = "blue", linetype = "dashed") +
theme(panel.border = element_blank(),
axis.text.y=element_blank()) +
xlab(element_blank()) + ylab(element_blank()) + ggtitle("Average linkage")
<- hclust(dist(D), method = "ward.D") |>
p_ward ggdendrogram(rotate = FALSE, size = 4, theme_dendro = TRUE) +
geom_hline(yintercept = 1.00, color = "blue", linetype = "dashed") +
theme(panel.border = element_blank(),
axis.text.y=element_blank()) +
xlab(element_blank()) + ylab(element_blank()) + ggtitle("Ward's method")
| p_complete) / (p_average | p_ward) (p_single
Quasi-diagonalization of correlation matrix
Effect of seriation in the correlation matrix of S&P 500 stocks:
Code
<- cov(X)
Sigma_sp500
<- function(cormat, method = "single"){
reorder_cormat <- sqrt(0.5*(1 - cormat))
D <- hclust(dist(D), method = method)
hc <-cormat[hc$order, hc$order]
cormat
}
<- cov2cor(Sigma_sp500) |> melt() |>
p1 ggplot(aes(x=Var1, y=Var2, fill=value)) +
geom_tile() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 1)) +
theme(legend.position = "none") +
xlab(element_blank()) + ylab(element_blank()) + ggtitle("Heatmap of original correlation matrix")
<- cov2cor(Sigma_sp500) |> reorder_cormat() |> melt() |>
p2 ggplot(aes(x=Var1, y=Var2, fill=value)) +
geom_tile() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 1)) +
xlab(element_blank()) + ylab(element_blank()) + ggtitle("Heatmap of quasi-diagonal correlation matrix")
| p2 p1
Hierarchical clustering-based portfolios
Hierarchical \(1/N\) portfolio
We first define the function to design the hierarchical \(1/N\) portfolio with different options for the splitting, linkage, and distance matrix calculation:
Code
library(fingraph)
<- function(
design_H1overN_portfolio Sigma = NULL, X,
splitting = c("dendrogram", "bisection"), num_clusters = NULL,
linkage = c("single", "complete", "average", "Ward"),
distance = c("corr-dist2", "corr-dist", "regular-heavytail", "k-comp-heavytail"), k) {
<- match.arg(splitting)
splitting <- match.arg(distance)
distance <- match.arg(linkage)
linkage if (linkage == "Ward") linkage <- "ward.D"
# data
if (is.null(Sigma))
<- cov(X)
Sigma <- ncol(Sigma)
N if (is.null(num_clusters)) # by default, cluster all the way down
<- N
num_clusters
# hierarchical clustering
<- cov2cor(Sigma)
C <- sqrt(0.5*(1 - C))
D_C <- switch(distance,
D "corr-dist" = as.dist(D_C),
"corr-dist2" = dist(D_C),
"regular-heavytail" = {
<- abs(learn_regular_heavytail_graph(scale(X), heavy_type = "student", nu = 4, verbose = TRUE)$laplacian)
C <- 0.5*(1 - C)
D2 < 0] <- 0
D2[D2 as.dist(D2) #as.dist(sqrt(D2))
},"k-comp-heavytail" = {
<- abs(learn_kcomp_heavytail_graph(scale(X), k = k, heavy_type = "student", nu = 4, verbose = TRUE)$laplacian)
C <- 0.5*(1 - C)
D2 < 0] <- 0
D2[D2 as.dist(D2) #as.dist(sqrt(D2))
}, stop("Unknown distance method."))
<- hclust(D, method = linkage)
hc <- hc$order
cluster_order <- cutree(hc, k = 1:N)
clusters_mat
# portfolio allocation
<- rep(1, N)
w <- list(cluster_order) # the ordering only matters for bisection
L for (kk in 2:num_clusters) {
# split next set in the hierarchical clustering
if (splitting == "bisection") {
<- sort(sapply(L, length), decreasing = TRUE, index.return = TRUE)$ix[1] # maximal subset
i_split <- L[[i_split]]
L_to_split <- 1:floor(length(L_to_split)/2)
subindex <- L_to_split[subindex]
L_1 <- L_to_split[-subindex]
L_2 else if (splitting == "dendrogram") {
} <- which.max(sapply(L, function(L_i) length(unique(clusters_mat[L_i, kk])))) # element in L with 2 different values
i_split <- L[[i_split]]
L_to_split <- unique(clusters_mat[L_to_split, kk])
split_two_values <- L_to_split[clusters_mat[L_to_split, kk] == split_two_values[1]]
L_1 <- L_to_split[clusters_mat[L_to_split, kk] == split_two_values[2]]
L_2 else
} stop("Unknown splitting method.")
<- NULL
L[i_split] <- c(L, list(L_1, L_2))
L
# update weights
<- w[L_1] * 0.5
w[L_1] <- w[L_2] * 0.5
w[L_2]
}# in case the clustering is not all the way down
for (i in 1:length(L))
<- w[L[[i]]]/length(L[[i]])
w[L[[i]]]
return(w)
}
We also need the basic portfolio benchmarks GMVP and MVP:
Code
library(CVXR)
<- function(Sigma) {
design_GMVP <- Variable(nrow(Sigma))
w <- Problem(Minimize(quad_form(w, Sigma)),
prob constraints = list(w >= 0, sum(w) == 1))
<- solve(prob)
result <- as.vector(result$getValue(w))
w return(w)
}
<- function(mu, Sigma, lambda = 1, ub = Inf) {
design_MVP <- Variable(nrow(Sigma))
w <- Problem(Maximize(t(mu) %*% w - (lambda/2)*quad_form(w, Sigma)),
prob constraints = list(w >= 0, sum(w) == 1, w <= ub))
<- solve(prob)
result <- as.vector(result$getValue(w))
w return(w)
}
Then we are ready to compare different versions of the hierarchical \(1/N\) portfolio along with some benchmarks.
Different linkage methods
First run backtests for the hierarchical \(1/N\) portfolios with different linkage methods:
Code
# portfolios
<- function(data, ...) {
EWP <- ncol(data[[1]])
N return(rep(1/N, N))
}
<- function(dataset, ...) {
GMVP <- diff(log(dataset$prices))[-1]
X <- cov(X)
Sigma <- design_GMVP(Sigma)
w return(w)
}
<- function(dataset, ...) {
MVP <- diff(log(dataset$prices))[-1]
X <- cov(X)
Sigma <- colMeans(X)
mu <- design_MVP(mu, Sigma, lambda = 1)
w return(w)
}
<- function(dataset, ...) {
H1overN_single <- diff(log(dataset$prices))[-1]
X <- design_H1overN_portfolio(X = X, linkage = "single", distance = "corr-dist")
w return(w)
}
<- function(dataset, ...) {
H1overN_complete <- diff(log(dataset$prices))[-1]
X <- design_H1overN_portfolio(X = X, linkage = "complete", distance = "corr-dist")
w return(w)
}
<- function(dataset, ...) {
H1overN_average <- diff(log(dataset$prices))[-1]
X <- design_H1overN_portfolio(X = X, linkage = "average", distance = "corr-dist")
w return(w)
}
<- function(dataset, ...) {
H1overN_Ward <- diff(log(dataset$prices))[-1]
X <- design_H1overN_portfolio(X = X, linkage = "Ward", distance = "corr-dist")
w return(w)
}
# single backtest
<- nrow(stock_prices)
T <- round(0.70*T)
T_trn <- portfolioBacktest(list("1/N" = EWP,
bt "Hierarchical 1/N (single linkage)" = H1overN_single,
"Hierarchical 1/N (complete linkage)" = H1overN_complete,
"Hierarchical 1/N (average linkage)" = H1overN_average,
"Hierarchical 1/N (Ward's method)" = H1overN_Ward),
list(list("prices" = stock_prices)),
lookback = T_trn, optimize_every = 30, rebalance_every = 1)
Backtesting 5 portfolios over 1 datasets (periodicity = daily data)...
Portfolio allocation of hierarchical \(1/N\) portfolios with different linkage methods:
Code
# barplot
data.frame(
"stocks" = names(stock_prices),
"1/N" = as.numeric(bt$`1/N`$data1$w_optimized[1, ]),
"Hierarchical 1/N (single linkage)" = as.numeric(bt$`Hierarchical 1/N (single linkage)`$data1$w_optimized[1, ]),
"Hierarchical 1/N (complete linkage)" = as.numeric(bt$`Hierarchical 1/N (complete linkage)`$data1$w_optimized[1, ]),
"Hierarchical 1/N (average linkage)" = as.numeric(bt$`Hierarchical 1/N (average linkage)`$data1$w_optimized[1, ]),
"Hierarchical 1/N (Ward's method)" = as.numeric(bt$`Hierarchical 1/N (Ward's method)`$data1$w_optimized[1, ]),
check.names = FALSE) |>
melt(id.vars = "stocks") |>
ggplot(aes(x = stocks, y = value, fill = variable)) +
geom_bar(stat = "identity", position = "dodge", color = "black", width = 0.8) +
labs(title = "Portfolio weights", y = "weight")
Backtest performance of hierarchical \(1/N\) portfolios with different linkage methods:
Code
<- backtestChartCumReturn(bt) +
p1 scale_x_date(date_breaks = "1 month", date_labels = "%b %Y", date_minor_breaks = "1 day") +
ggtitle("Cumulative P&L")
<- backtestChartDrawdown(bt) +
p2 scale_x_date(date_breaks = "1 month", date_labels = "%b %Y", date_minor_breaks = "1 day") +
ggtitle("Drawdown")
/ p2 + plot_layout(guides = "collect") p1
Different distance matrices
First run backtests for the hierarchical \(1/N\) portfolios with different distance matrices:
Code
# portfolios
<- function(dataset, ...) {
H1overN_Ward_dist2 <- diff(log(dataset$prices))[-1]
X <- design_H1overN_portfolio(X = X, linkage = "Ward", distance = "corr-dist2")
w return(w)
}
<- function(dataset, ...) {
H1overN_Ward_regular_heavytail <- diff(log(dataset$prices))[-1]
X <- design_H1overN_portfolio(X = X, linkage = "Ward", distance = "regular-heavytail")
w return(w)
}
<- function(dataset, ...) {
H1overN_Ward_kcomp_heavytail <- diff(log(dataset$prices))[-1]
X <- design_H1overN_portfolio(X = X, linkage = "Ward", distance = "k-comp-heavytail", k = 5)
w return(w)
}
# single backtest
<- nrow(stock_prices)
T <- round(0.70*T)
T_trn <- portfolioBacktest(list("1/N" = EWP,
bt "Hierarchical 1/N (corr-distance)" = H1overN_Ward,
"Hierarchical 1/N (corr-distance-of-distance)" = H1overN_Ward_dist2,
"Hierarchical 1/N (regular-heavy-tail graph)" = H1overN_Ward_regular_heavytail,
"Hierarchical 1/N (k-comp-heavy-tail graph)" = H1overN_Ward_kcomp_heavytail),
list(list("prices" = stock_prices)),
lookback = T_trn, optimize_every = 30, rebalance_every = 1)
Backtesting 5 portfolios over 1 datasets (periodicity = daily data)...
Portfolio allocation of hierarchical \(1/N\) portfolios with different distance matrices:
Code
# barplot
data.frame(
"stocks" = names(stock_prices),
"1/N" = as.numeric(bt$`1/N`$data1$w_optimized[1, ]),
"Hierarchical 1/N (corr-distance)" = as.numeric(bt$`Hierarchical 1/N (corr-distance)`$data1$w_optimized[1, ]),
"Hierarchical 1/N (corr-distance-of-distance)" = as.numeric(bt$`Hierarchical 1/N (corr-distance-of-distance)`$data1$w_optimized[1, ]),
"Hierarchical 1/N (regular-heavy-tail graph)" = as.numeric(bt$`Hierarchical 1/N (regular-heavy-tail graph)`$data1$w_optimized[1, ]),
"Hierarchical 1/N (k-comp-heavy-tail graph)" = as.numeric(bt$`Hierarchical 1/N (k-comp-heavy-tail graph)`$data1$w_optimized[1, ]),
check.names = FALSE) |>
melt(id.vars = "stocks") |>
ggplot(aes(x = stocks, y = value, fill = variable)) +
geom_bar(stat = "identity", position = "dodge", color = "black", width = 0.8) +
labs(title = "Portfolio weights", y = "weight")
Backtest performance of hierarchical \(1/N\) portfolios with different distance matrices:
Code
<- backtestChartCumReturn(bt) +
p1 scale_x_date(date_breaks = "1 month", date_labels = "%b %Y", date_minor_breaks = "1 day") +
ggtitle("Cumulative P&L")
<- backtestChartDrawdown(bt) +
p2 scale_x_date(date_breaks = "1 month", date_labels = "%b %Y", date_minor_breaks = "1 day") +
ggtitle("Drawdown")
/ p2 + plot_layout(guides = "collect") p1
Final comparison
Finally, we compare the selected version of the hierarchical \(1/N\) portfolio (using Ward’s method for the linkage and the correlation-based distance-of-distance matrix with the following benchmarks: naive \(1/N\) portfolio, global minimum variance portfolio (GMVP), and Markowitz mean–variance portfolio (MVP).
First run the backtest:
Code
# backtest
<- nrow(stock_prices)
T <- round(0.70*T)
T_trn <- portfolioBacktest(list("1/N" = EWP,
bt "GMVP" = GMVP,
"MVP" = MVP,
"Hierarchical 1/N" = H1overN_Ward_dist2),
list(list("prices" = stock_prices)),
lookback = T_trn, optimize_every = 30, rebalance_every = 1)
Backtesting 4 portfolios over 1 datasets (periodicity = daily data)...
Portfolio allocation of hierarchical \(1/N\) portfolio along benchmarks:
Code
# barplot
data.frame(
"stocks" = names(stock_prices),
"1/N" = as.numeric(bt$`1/N`$data1$w_optimized[1, ]),
"GMVP" = as.numeric(bt$`GMVP`$data1$w_optimized[1, ]),
"MVP" = as.numeric(bt$`MVP`$data1$w_optimized[1, ]),
"Hierarchical 1/N" = as.numeric(bt$`Hierarchical 1/N`$data1$w_optimized[1, ]),
check.names = FALSE) |>
melt(id.vars = "stocks") |>
ggplot(aes(x = stocks, y = value, fill = variable)) +
geom_bar(stat = "identity", position = "dodge", color = "black", width = 0.8) +
labs(title = "Portfolio weights", y = "weight")
Backtest performance of hierarchical \(1/N\) portfolio along benchmarks:
Code
<- backtestChartCumReturn(bt) +
p1 scale_x_date(date_breaks = "1 month", date_labels = "%b %Y", date_minor_breaks = "1 day") +
ggtitle("Cumulative P&L")
<- backtestChartDrawdown(bt) +
p2 scale_x_date(date_breaks = "1 month", date_labels = "%b %Y", date_minor_breaks = "1 day") +
ggtitle("Drawdown")
/ p2 + plot_layout(guides = "collect") p1
Hierarchical risk parity (HRP) portfolio
We first define the function to design the hierarchical risk parity (HRP) portfolio with different options for the splitting, linkage, and distance matrix calculation:
Code
library(fingraph)
<- function(Sigma) {
design_IVarP <- diag(Sigma)
sigma2 <- 1/sigma2
w <- w/sum(w)
w return(w)
}
<- function(
design_HRP_portfolio Sigma = NULL, X,
splitting = c("bisection", "dendrogram"), num_clusters = NULL,
linkage = c("single", "complete", "average", "Ward"),
distance = c("corr-dist2", "corr-dist", "regular-heavytail", "k-comp-heavytail"), k,
final = c("IVarP", "1/N")) {
<- match.arg(splitting)
splitting <- match.arg(distance)
distance <- match.arg(linkage)
linkage if (linkage == "Ward") linkage <- "ward.D"
<- match.arg(final)
final
# data
if (is.null(Sigma))
<- cov(X)
Sigma <- ncol(Sigma)
N if (is.null(num_clusters)) # by default, cluster all the way down
<- N
num_clusters
# hierarchical clustering
<- cov2cor(Sigma)
C <- sqrt(0.5*(1 - C))
D_C <- switch(distance,
D "corr-dist" = as.dist(D_C),
"corr-dist2" = dist(D_C),
"regular-heavytail" = {
<- abs(learn_regular_heavytail_graph(scale(X), heavy_type = "student", nu = 4, verbose = TRUE)$laplacian)
C <- 0.5*(1 - C)
D2 < 0] <- 0
D2[D2 as.dist(D2) #as.dist(sqrt(D2))
},"k-comp-heavytail" = {
<- abs(learn_kcomp_heavytail_graph(scale(X), k = k, heavy_type = "student", nu = 4, verbose = TRUE)$laplacian)
C <- 0.5*(1 - C)
D2 < 0] <- 0
D2[D2 as.dist(D2) #as.dist(sqrt(D2))
},stop("Unknown distance method."))
<- hclust(D, method = linkage)
hc <- hc$order
cluster_order <- cutree(hc, k = 1:N)
clusters_mat
# HRP algorithm
<- rep(1, N)
w <- list(cluster_order) # the ordering only matters for bisection
L for (kk in 2:num_clusters) {
# split next set in the hierarchical clustering
if (splitting == "bisection") {
<- sort(sapply(L, length), decreasing = TRUE, index.return = TRUE)$ix[1] # maximal subset
i_split <- L[[i_split]]
L_to_split <- 1:floor(length(L_to_split)/2)
subindex <- L_to_split[subindex]
L_1 <- L_to_split[-subindex]
L_2 else if (splitting == "dendrogram") {
} <- which.max(sapply(L, function(L_i) length(unique(clusters_mat[L_i, kk])))) # element in L with 2 different values
i_split <- L[[i_split]]
L_to_split <- unique(clusters_mat[L_to_split, kk])
split_two_values <- L_to_split[clusters_mat[L_to_split, kk] == split_two_values[1]]
L_1 <- L_to_split[clusters_mat[L_to_split, kk] == split_two_values[2]]
L_2 else
} stop("Unknown splitting method.")
<- NULL
L[i_split] <- c(L, list(L_1, L_2))
L
# update weights
<- get_cluster_var(Sigma, L_1)
sigma2_1 <- get_cluster_var(Sigma, L_2)
sigma2_2 <- 1 - sigma2_1/(sigma2_1 + sigma2_2)
alpha <- w[L_1] * alpha
w[L_1] <- w[L_2] * (1 - alpha)
w[L_2]
}# in case the clustering is not all the way down
for (i in 1:length(L)) {
if (length(L[[i]]) > 1) {
if (final == "1/N")
<- w[L[[i]]]/length(L[[i]])
w[L[[i]]] else if (final == "IVarP")
<- w[L[[i]]][1] * design_IVarP(as.matrix(Sigma[L[[i]], L[[i]]]))
w[L[[i]]] else stop("Finall allocation method unknown.")
}
}
return(w)
}
# compute cluster variance from the inverse variance portfolio above
<- function(Sigma, items) {
get_cluster_var <- as.matrix(Sigma[items, items])
Sigma_slice <- design_IVarP(Sigma_slice)
w <- as.numeric(t(w) %*% Sigma_slice %*% w)
var return(var)
}
We now compare the HRP portfolios (both with bisection split and dendrogram split) with the following benchmarks: global minimum variance portfolio (GMVP) and inverse-variance portfolio (IVarP).
We run the backtest:
Code
<- nrow(stock_prices)
T <- round(0.70*T)
T_trn
<- function(data, ...) {
EWP <- ncol(data[[1]])
N return(rep(1/N, N))
}
<- function(dataset, ...) {
GMVP <- diff(log(dataset$prices))[-1]
X <- cov(X)
Sigma <- design_GMVP(Sigma)
w return(w)
}
<- function(dataset, ...) {
IVarP <- diff(log(dataset$prices))[-1]
X <- cov(X)
Sigma <- design_IVarP(Sigma)
w return(w)
}
<- function(dataset, ...) {
HRP_bisection <- diff(log(dataset$prices))[-1]
X <- cov(X)
Sigma <- design_HRP_portfolio(Sigma, splitting = "bisection", linkage = "single")
w return(w)
}
<- function(dataset, ...) {
HRP_dendrogram <- diff(log(dataset$prices))[-1]
X <- cov(X)
Sigma <- design_HRP_portfolio(Sigma, splitting = "dendrogram", linkage = "single")
w return(w)
}
<- function(dataset, ...) {
HRP_dendrogram_regular_heavytail <- diff(log(dataset$prices))[-1]
X <- design_HRP_portfolio(X = X, splitting = "dendrogram", linkage = "single", distance = "regular-heavytail")
w return(w)
}
<- function(dataset, ...) {
HRP_dendrogram_kcomp_heavytail <- diff(log(dataset$prices))[-1]
X <- design_HRP_portfolio(X = X, splitting = "dendrogram", linkage = "single", distance = "k-comp-heavytail", k = 5)
w return(w)
}
<- portfolioBacktest(list("GMVP" = GMVP,
bt "IVarP" = IVarP,
"HRP-bisection" = HRP_bisection,
"HRP-dendrogram" = HRP_dendrogram,
"HRP-dendrogram (heavy-tail graph)" = HRP_dendrogram_regular_heavytail,
"HRP-dendrogram (k-comp-heavy-tail graph)" = HRP_dendrogram_kcomp_heavytail),
list(list("prices" = stock_prices)),
lookback = T_trn, optimize_every = 30, rebalance_every = 1)
Backtesting 6 portfolios over 1 datasets (periodicity = daily data)...
Portfolio allocation of HRP portfolios and benchmarks:
Code
# barplot
data.frame(
"stocks" = names(stock_prices),
"GMVP" = as.numeric(bt$`GMVP`$data1$w_optimized[1, ]),
"IVarP" = as.numeric(bt$`IVarP`$data1$w_optimized[1, ]),
"HRP-bisection" = as.numeric(bt$`HRP-bisection`$data1$w_optimized[1, ]),
"HRP-dendrogram" = as.numeric(bt$`HRP-dendrogram`$data1$w_optimized[1, ]),
"HRP-dendrogram (heavy-tail graph)" = as.numeric(bt$`HRP-dendrogram (heavy-tail graph)`$data1$w_optimized[1, ]),
"HRP-dendrogram (k-comp-heavy-tail graph)" = as.numeric(bt$`HRP-dendrogram (k-comp-heavy-tail graph)`$data1$w_optimized[1, ]),
check.names = FALSE) |>
melt(id.vars = "stocks") |>
ggplot(aes(x = stocks, y = value, fill = variable)) +
geom_bar(stat = "identity", position = "dodge", color = "black", width = 0.8) +
labs(title = "Portfolio weights", y = "weight")
Backtest performance of HRP portfolios and benchmarks:
Code
<- backtestChartCumReturn(bt) +
p1 scale_x_date(date_breaks = "1 month", date_labels = "%b %Y", date_minor_breaks = "1 day") +
ggtitle("Cumulative P&L")
<- backtestChartDrawdown(bt) +
p2 scale_x_date(date_breaks = "1 month", date_labels = "%b %Y", date_minor_breaks = "1 day") +
ggtitle("Drawdown")
/ p2 + plot_layout(guides = "collect") p1
Hierarchical equal risk contribution (HERC) portfolio
For simplicity, for the weight splitting we simply use the two risk contribution measures: \(\textm{RC}_i = 1\) and \(\textm{RC}_i = 1/\sigma_i^{2}\), leading to the 50% - 50% split as in the hierarchical \(1/N\) portfolio and to \[ \begin{bmatrix} w_1\\ w_2 \end{bmatrix} = \begin{bmatrix} \sigma_1^{-2}/(\sigma_1^{-2} + \sigma_2^{-2})\\ \sigma_2^{-2}/(\sigma_1^{-2} + \sigma_2^{-2}) \end{bmatrix} = \begin{bmatrix} \sigma_2^{2}/(\sigma_1^{2} + \sigma_2^{2})\\ \sigma_1^{2}/(\sigma_1^{2} + \sigma_2^{2}) \end{bmatrix}. \] as in the HRP portfolio.
We now compare the HERC portfolios (both with bisection split and dendrogram split) with the following benchmarks: \(1/N\) portfolio, hierarchical \(1/N\) portfolio, inverse-variance portfolio (IVarP), and HRP portfolio.
We run the backtest:
Code
<- function(dataset, ...) {
H1overN_Ward_dist2 <- diff(log(dataset$prices))[-1]
X <- design_H1overN_portfolio(X = X, splitting = "dendrogram", linkage = "Ward", distance = "corr-dist2")
w return(w)
}
<- function(dataset, ...) {
HERC_1overN_dendrogram <- diff(log(dataset$prices))[-1]
X <- cov(X)
Sigma <- design_H1overN_portfolio(Sigma, splitting = "dendrogram", num_clusters = 4, linkage = "Ward", distance = "corr-dist2")
w return(w)
}
<- function(dataset, ...) {
HERC_1overN_bisection <- diff(log(dataset$prices))[-1]
X <- cov(X)
Sigma <- design_H1overN_portfolio(Sigma, splitting = "bisection", num_clusters = 4, linkage = "Ward", distance = "corr-dist2")
w return(w)
}
<- function(dataset, ...) {
HRP_bisection <- diff(log(dataset$prices))[-1]
X <- cov(X)
Sigma <- design_HRP_portfolio(Sigma, splitting = "bisection", linkage = "single")
w return(w)
}
<- function(dataset, ...) {
HERC_IVarP_dendrogram <- diff(log(dataset$prices))[-1]
X <- cov(X)
Sigma <- design_HRP_portfolio(Sigma, splitting = "dendrogram", num_clusters = 4,
w linkage = "Ward", distance = "corr-dist2", final = "1/N")
return(w)
}
<- function(dataset, ...) {
HERC_IVarP_bisection <- diff(log(dataset$prices))[-1]
X <- cov(X)
Sigma <- design_HRP_portfolio(Sigma, splitting = "bisection", num_clusters = 4,
w linkage = "Ward", distance = "corr-dist2", final = "1/N")
return(w)
}
<- portfolioBacktest(list("1/N" = EWP,
bt "Hierarchical 1/N" = H1overN_Ward_dist2,
"HERC-dendrogram 1/N" = HERC_1overN_dendrogram,
"HERC-bisection 1/N" = HERC_1overN_bisection,
"IVarP" = IVarP,
"HRP-bisection" = HRP_bisection,
"HERC-dendrogram IVarP" = HERC_IVarP_dendrogram,
"HERC-bisection IVarP" = HERC_IVarP_bisection),
list(list("prices" = stock_prices)),
lookback = T_trn, optimize_every = 30, rebalance_every = 1)
Backtesting 8 portfolios over 1 datasets (periodicity = daily data)...
Portfolio allocation of HERC portfolios and benchmarks:
Code
# barplot
data.frame(
"stocks" = names(stock_prices),
"1/N" = as.numeric(bt$`1/N`$data1$w_optimized[1, ]),
"Hierarchical 1/N" = as.numeric(bt$`Hierarchical 1/N`$data1$w_optimized[1, ]),
"HERC-dendrogram 1/N" = as.numeric(bt$`HERC-dendrogram 1/N`$data1$w_optimized[1, ]),
"HERC-bisection 1/N" = as.numeric(bt$`HERC-bisection 1/N`$data1$w_optimized[1, ]),
"IVarP" = as.numeric(bt$`IVarP`$data1$w_optimized[1, ]),
"HRP-bisection" = as.numeric(bt$`HRP-bisection`$data1$w_optimized[1, ]),
"HERC-dendrogram IVarP" = as.numeric(bt$`HERC-dendrogram IVarP`$data1$w_optimized[1, ]),
"HERC-bisection IVarP" = as.numeric(bt$`HERC-bisection IVarP`$data1$w_optimized[1, ]),
check.names = FALSE) |>
melt(id.vars = "stocks") |>
ggplot(aes(x = stocks, y = value, fill = variable)) +
geom_bar(stat = "identity", position = "dodge", color = "black", width = 0.8) +
labs(title = "Portfolio weights", y = "weight")
Backtest performance of HERC portfolios and benchmarks:
Code
<- backtestChartCumReturn(bt) +
p1 scale_x_date(date_breaks = "1 month", date_labels = "%b %Y", date_minor_breaks = "1 day") +
ggtitle("Cumulative P&L")
<- backtestChartDrawdown(bt) +
p2 scale_x_date(date_breaks = "1 month", date_labels = "%b %Y", date_minor_breaks = "1 day") +
ggtitle("Drawdown")
/ p2 + plot_layout(guides = "collect") p1
Multiple backtests
After overviewing several graph-based portfolios and observing the performance over single backtests, we now resort to an empirical evaluation based on multiple randomized backtests. In particular, we take a dataset of the S&P 500 stocks over the period 2015-2020 and generate 200 resamples each with \(N=50\) randomly selected stocks and a random period of two years. Then, for each resample, we perform a walk-forward backtest with a lookback window of 1 year, reoptimizing the portfolio every month. Nevertheless, more exhaustive backtests should be conducted with a wide variety of data before drawing any firm conclusion.
Splitting: bisection versus dendrogram
We start comparing the two splitting methods considered: direct bisection (after reordering the assets with the dendrogram) and following the natural splits of the dendrogram. In principle, one may expect that following the natural dendrogram should be more faithful to the structure in the actual data structure as opposed to bisection that somehow breaks the dendrogram. Interestingly, the empirical evaluation seems to suggest otherwise. One possible explanation is that bisection provides more balanced clusters while still using the reordering information provided by the dendrogram.
We first define the portfolios for the backtests:
Code
######### H1overN
<- function(dataset, ...) {
H1overN_bisection <- ncol(dataset$prices)
N <- diff(log(dataset$prices))[-1]
X <- design_H1overN_portfolio(X = X, splitting = "bisection", linkage = "Ward", distance = "corr-dist2")
w return(w)
}
<- function(dataset, ...) {
H1overN_dendrogram <- diff(log(dataset$prices))[-1]
X <- design_H1overN_portfolio(X = X, splitting = "dendrogram", linkage = "Ward", distance = "corr-dist2")
w return(w)
}
######### HRP
<- function(dataset, ...) {
HRP_bisection <- diff(log(dataset$prices))[-1]
X <- cov(X)
Sigma <- design_HRP_portfolio(Sigma, splitting = "bisection", linkage = "single")
w return(w)
}
<- function(dataset, ...) {
HRP_dendrogram <- diff(log(dataset$prices))[-1]
X <- cov(X)
Sigma <- design_HRP_portfolio(Sigma, splitting = "dendrogram", linkage = "single")
w return(w)
}
######### HERC_1overN
<- function(dataset, ...) {
HERC_1overN_dendrogram <- diff(log(dataset$prices))[-1]
X <- cov(X)
Sigma <- design_H1overN_portfolio(Sigma, splitting = "dendrogram", num_clusters = 10, linkage = "Ward", distance = "corr-dist2")
w return(w)
}
<- function(dataset, ...) {
HERC_1overN_bisection <- diff(log(dataset$prices))[-1]
X <- cov(X)
Sigma <- design_H1overN_portfolio(Sigma, splitting = "bisection", num_clusters = 10, linkage = "Ward", distance = "corr-dist2")
w return(w)
}
######### HERC_IVarP
<- function(dataset, ...) {
HERC_IVarP_dendrogram <- diff(log(dataset$prices))[-1]
X <- cov(X)
Sigma <- design_HRP_portfolio(Sigma, splitting = "dendrogram", num_clusters = 10,
w linkage = "Ward", distance = "corr-dist2", final = "1/N")
return(w)
}
<- function(dataset, ...) {
HERC_IVarP_bisection <- diff(log(dataset$prices))[-1]
X <- cov(X)
Sigma <- design_HRP_portfolio(Sigma, splitting = "bisection", num_clusters = 10,
w linkage = "Ward", distance = "corr-dist2", final = "1/N")
return(w)
}
Then we run the multiple randomized backtests over the resampled data:
Code
library(portfolioBacktest)
library(pob)
data(SP500_2015to2020)
<- SP500_2015to2020$stocks
stock_prices
# resample data
set.seed(42)
<- financialDataResample(list("prices" = stock_prices),
stock_prices_resampled num_datasets = 200, N_sample = 50, T_sample = 252*2)
200 datasets resampled (with N = 50 instruments and length T = 504) from the original data between 2015-01-05 and 2020-09-22.
Code
# perform multiple backtest on the resampled data
<- portfolioBacktest(portfolio_funs = list("1/N" = EWP,
bt "Hierarchical 1/N - bisection" = H1overN_bisection,
"Hierarchical 1/N - dendrogram" = H1overN_dendrogram,
"HERC 1/N - bisection" = HERC_1overN_bisection,
"HERC 1/N - dendrogram" = HERC_1overN_dendrogram,
"IVarP" = IVarP,
"HRP - bisection" = HRP_bisection,
"HRP - dendrogram" = HRP_dendrogram,
"HERC IVarP - bisection" = HERC_IVarP_bisection,
"HERC IVarP - dendrogram" = HERC_IVarP_dendrogram),
dataset_list = stock_prices_resampled,
lookback = 252, optimize_every = 20, rebalance_every = 5,
paral_datasets = 6,
cost = list(buy = 60e-4, sell = 60e-4),
return_portfolio = FALSE, return_returns = FALSE)
Backtesting 10 portfolios over 200 datasets (periodicity = daily data)...
Backtest performance of graph-based portfolios: bisection versus dendrogram splitting:
Code
<- backtestBoxPlot(bt, "Sharpe ratio") + coord_flip()
p1 <- backtestBoxPlot(bt, "max drawdown") + coord_flip()
p2
/ p2 p1
Graph estimation: simple versus sophisticated
Now we evaluate whether the more sophisticated graph estimation methods can bring any benefit to the portfolios. In principle, one may expect a positive answer, but the empirical results show that this is not so clear and further analysis is required.
We first define the portfolios for the backtests:
Code
############ H1overN
<- function(dataset, ...) {
H1overN <- diff(log(dataset$prices))[-1]
X <- design_H1overN_portfolio(X = X, splitting = "bisection", linkage = "Ward", distance = "corr-dist2")
w return(w)
}
<- function(dataset, ...) {
H1overN_regular_heavytail <- diff(log(dataset$prices))[-1]
X <- design_H1overN_portfolio(X = X, splitting = "bisection", linkage = "Ward", distance = "regular-heavytail")
w return(w)
}
<- function(dataset, ...) {
H1overN_kcomp_heavytail <- diff(log(dataset$prices))[-1]
X <- design_H1overN_portfolio(X = X, splitting = "bisection", linkage = "Ward", distance = "k-comp-heavytail", k = 5)
w return(w)
}
########### HRP
<- function(dataset, ...) {
HRP <- diff(log(dataset$prices))[-1]
X <- cov(X)
Sigma <- design_HRP_portfolio(Sigma, splitting = "bisection", linkage = "single", distance = "corr-dist2")
w return(w)
}
<- function(dataset, ...) {
HRP_regular_heavytail <- diff(log(dataset$prices))[-1]
X <- design_HRP_portfolio(X = X, splitting = "bisection", linkage = "single", distance = "regular-heavytail")
w return(w)
}
<- function(dataset, ...) {
HRP_kcomp_heavytail <- diff(log(dataset$prices))[-1]
X <- design_HRP_portfolio(X = X, splitting = "bisection", linkage = "single", distance = "k-comp-heavytail", k = 5)
w return(w)
}
######### HERC_1overN
<- function(dataset, ...) {
HERC_1overN <- diff(log(dataset$prices))[-1]
X <- cov(X)
Sigma <- design_H1overN_portfolio(Sigma, splitting = "bisection", num_clusters = 10, linkage = "Ward", distance = "corr-dist2")
w return(w)
}
<- function(dataset, ...) {
HERC_1overN_regular_heavytail <- diff(log(dataset$prices))[-1]
X <- design_H1overN_portfolio(X = X, splitting = "bisection", num_clusters = 10, linkage = "Ward", distance = "regular-heavytail")
w return(w)
}
<- function(dataset, ...) {
HERC_1overN_kcomp_heavytail <- diff(log(dataset$prices))[-1]
X <- design_H1overN_portfolio(X = X, splitting = "bisection", num_clusters = 10, linkage = "Ward", distance = "k-comp-heavytail", k = 5)
w return(w)
}
######### HERC_IVarP
<- function(dataset, ...) {
HERC_IVarP <- diff(log(dataset$prices))[-1]
X <- cov(X)
Sigma <- design_HRP_portfolio(Sigma, splitting = "bisection", num_clusters = 10,
w linkage = "Ward", distance = "corr-dist2", final = "1/N")
return(w)
}
<- function(dataset, ...) {
HERC_IVarP_regular_heavytail <- diff(log(dataset$prices))[-1]
X <- design_HRP_portfolio(X = X, splitting = "bisection", num_clusters = 10,
w linkage = "Ward", distance = "regular-heavytail", final = "1/N")
return(w)
}
<- function(dataset, ...) {
HERC_IVarP_kcomp_heavytail <- diff(log(dataset$prices))[-1]
X <- design_HRP_portfolio(X = X, splitting = "bisection", num_clusters = 10,
w linkage = "Ward", distance = "k-comp-heavytail", k = 5, final = "1/N")
return(w)
}
Then we run the multiple randomized backtests over the resampled data:
Code
library(portfolioBacktest)
library(pob)
data(SP500_2015to2020)
<- SP500_2015to2020$stocks
stock_prices
# resample data
set.seed(42)
<- financialDataResample(list("prices" = stock_prices),
stock_prices_resampled num_datasets = 120, N_sample = 50, T_sample = 252*2) #120
120 datasets resampled (with N = 50 instruments and length T = 504) from the original data between 2015-01-05 and 2020-09-22.
Code
# perform multiple backtest on the resampled data
<- portfolioBacktest(portfolio_funs = list("1/N" = EWP,
bt "Hierarchical 1/N (original)" = H1overN,
"Hierarchical 1/N (regular-heavy-tail graph)" = H1overN_regular_heavytail,
"Hierarchical 1/N (k-comp-heavy-tail graph)" = H1overN_kcomp_heavytail,
"IVarP" = IVarP,
"HRP (original)" = HRP,
"HRP (regular-heavy-tail graph)" = HRP_regular_heavytail,
"HRP (k-comp-heavy-tail graph)" = HRP_kcomp_heavytail),
dataset_list = stock_prices_resampled,
lookback = 252, optimize_every = 20, rebalance_every = 5,
paral_datasets = 6,
cost = list(buy = 60e-4, sell = 60e-4),
return_portfolio = FALSE, return_returns = FALSE)
Backtesting 8 portfolios over 120 datasets (periodicity = daily data)...
Backtest performance of graph-based portfolios: simple versus sophisticated graph learning methods:
Code
<- backtestBoxPlot(bt, "Sharpe ratio") + coord_flip()
p1 <- backtestBoxPlot(bt, "max drawdown") + coord_flip()
p2
/ p2 p1
Final comparison
Finally, we perform a comparison of the graph-based portfolios (using the simple graph-based distance-of-distance matrix), along with the benchmarks \(1/N\) portfolio and IVarP, namely:
- Hierarchical \(1/N\) portfolio
- HERC \(1/N\) portfolio
- HRP portfolio
- HERC IVarP
The empirical results show that the different methods do not appear significantly different and a more exhaustive comparison is needed before drawing any clear conclusion.
We first define the portfolios for the backtests:
Code
############ H1overN
<- function(dataset, ...) {
H1overN <- diff(log(dataset$prices))[-1]
X <- design_H1overN_portfolio(X = X, splitting = "bisection", linkage = "Ward", distance = "corr-dist2")
w return(w)
}
########### HRP
<- function(dataset, ...) {
HRP <- diff(log(dataset$prices))[-1]
X <- cov(X)
Sigma <- design_HRP_portfolio(Sigma, splitting = "bisection", linkage = "single", distance = "corr-dist2")
w return(w)
}
######### HERC_1overN
<- function(dataset, ...) {
HERC_1overN <- diff(log(dataset$prices))[-1]
X <- cov(X)
Sigma <- design_H1overN_portfolio(Sigma, splitting = "bisection", num_clusters = 10, linkage = "Ward", distance = "corr-dist2")
w return(w)
}
######### HERC_IVarP
<- function(dataset, ...) {
HERC_IVarP <- diff(log(dataset$prices))[-1]
X <- cov(X)
Sigma <- design_HRP_portfolio(Sigma, splitting = "bisection", num_clusters = 10,
w linkage = "Ward", distance = "corr-dist2", final = "1/N")
return(w)
}
Then we run the multiple randomized backtests over the resampled data:
Code
library(portfolioBacktest)
library(pob)
data(SP500_2015to2020)
<- SP500_2015to2020$stocks
stock_prices
# resample data
set.seed(42)
<- financialDataResample(list("prices" = stock_prices),
stock_prices_resampled num_datasets = 200, N_sample = 50, T_sample = 252*2)
200 datasets resampled (with N = 50 instruments and length T = 504) from the original data between 2015-01-05 and 2020-09-22.
Code
# perform multiple backtest on the resampled data
<- portfolioBacktest(portfolio_funs = list("1/N" = EWP,
bt "Hierarchical 1/N" = H1overN,
"HERC 1/N" = HERC_1overN,
"IVarP" = IVarP,
"HRP" = HRP,
"HERC IVarP" = HERC_IVarP_bisection),
dataset_list = stock_prices_resampled,
lookback = 252, optimize_every = 20, rebalance_every = 5,
paral_datasets = 6,
cost = list(buy = 60e-4, sell = 60e-4),
return_portfolio = FALSE, return_returns = FALSE)
Backtesting 6 portfolios over 200 datasets (periodicity = daily data)...
Backtest performance of graph-based portfolios: simple versus sophisticated graph learning methods:
Code
<- backtestSummary(bt)
summary_bt
summaryTable(summary_bt, type = "kable", digits = 2,
measures = c("Sharpe ratio", "annual return", "annual volatility", "Sortino ratio", "max drawdown"),
caption = "Comparison of selected graph-based portfolios: performance measures.") |>
::kable_styling() kableExtra
Portfolio | Sharpe ratio | annual return | annual volatility | Sortino ratio | max drawdown |
---|---|---|---|---|---|
1/N | 1.31 | 16% | 13% | 1.79 | 10% |
Hierarchical 1/N | 1.18 | 14% | 13% | 1.60 | 10% |
HERC 1/N | 1.28 | 16% | 13% | 1.77 | 10% |
IVarP | 1.26 | 14% | 12% | 1.73 | 10% |
HRP | 1.13 | 12% | 11% | 1.55 | 10% |
HERC IVarP | 1.19 | 14% | 12% | 1.61 | 10% |
Code
summaryBarPlot(summary_bt, measures = c("max drawdown", "annual volatility")) +
theme(legend.title = element_text()) + labs(fill = "Portfolio")
Code
<- backtestBoxPlot(bt, "Sharpe ratio") + coord_flip()
p1 <- backtestBoxPlot(bt, "max drawdown") + coord_flip()
p2
/ p2 p1