Portfolio Optimization
Chapter 11: Risk Parity Portfolios

R code

Published

October 17, 2024

R code examples for Chapter 11 of the book:

Daniel P. Palomar (2024). Portfolio Optimization: Theory and Application. Cambridge University Press.

Loading packages

The following packages are used in the examples:

# basic finance
library(xts)                    # to manipulate time series of stock data
library(pob)                    # book package with financial data
 
# plotting
library(ggplot2)                # for nice plots
library(reshape2)               # to reshape data
library(dplyr)                  # for data frame manipulation (e.g., mutate)
library(patchwork)              # for combining plots
library(scales)                 # for axes control

# optimization
library(alabama)
library(nloptr)
library(quadprog)
library(riskParityPortfolio)    # for risk parity portfolios
library(microbenchmark)         # for cpu computation

From dollar to risk diversification

Portfolio allocation and risk allocation for the \(1/N\) portfolio and risk parity portfolio:

library(pob)
library(riskParityPortfolio)

stock_prices <- SP500_2015to2020$stocks["2020::2020-09", c("AAPL", "NFLX", "TSCO", "MGM", "MSFT", "FB", "AMZN", "GOOGL")]
T <- nrow(stock_prices)
T_trn <- round(0.70*T)
X <- diff(log(stock_prices[1:T_trn, ]))[-1]
Sigma <- cov(X)
N <- ncol(X)


# portfolios
w_EWP <- rep(1/N, N)
w_RPP <- riskParityPortfolio(Sigma)$w

# barplots
df_w <- data.frame(
  "stocks"  = names(stock_prices),
  "1/N"     = w_EWP,
  "RPP"     = w_RPP,
  check.names = FALSE)

# Using package riskParityPortfolio:
# barplotPortfolioRisk(as.matrix(df_w[-1]), Sigma)

# We define our own barplot function for completeness:
barplot_w_and_RCC <- function(df_w, Sigma) {
  df_w$stocks <- factor(df_w$stocks, levels = unique(df_w$stocks))
  
  # get relative risk contribution
  df_rrc <- df_w
  df_rrc[-1] <- lapply(as.list(df_w[-1]), function(w) {
    rc <- as.vector(w * (Sigma %*% w))
    rc/sum(rc)
    })
  
  # plots
  p_w <- df_w |> 
    melt(id.vars = "stocks") |>
    ggplot(aes(x = stocks, y = value, fill = variable)) +
    geom_bar(stat = "identity", position = "dodge", color = "black", width = 0.8) + 
    labs(fill = "portfolios") +
    labs(title = "Portfolio weights", x = NULL, y = "weight")
  
  p_rrc <- df_rrc |> 
    melt(id.vars = "stocks") |>
    ggplot(aes(x = stocks, y = value, fill = variable)) +
    geom_bar(stat = "identity", position = "dodge", color = "black", width = 0.8) + 
    labs(fill = "portfolios") +
    labs(title = "Relative risk contribution", y = "risk")

  p_w / p_rrc + plot_layout(guides = "collect")
}

barplot_w_and_RCC(df_w, Sigma)  

Naive diagonal formulation

Portfolio allocation and risk contribution of the \(1/N\) portfolio and naive RPP:

library(pob)
library(riskParityPortfolio)

stock_prices <- SP500_2015to2020$stocks["2020::2020-09", c("AAPL", "NFLX", "TSCO", "MGM", "MSFT", "FB", "AMZN", "GOOGL")]
T <- nrow(stock_prices)
T_trn <- round(0.70*T)
X <- diff(log(stock_prices[1:T_trn, ]))[-1]
Sigma <- cov(X)
N <- ncol(X)


# portfolios
w_EWP <- rep(1/N, N)
#w_naive_RPP <- riskParityPortfolio(Sigma, formulation = "diag")$w
w_naive_RPP <- 1/sqrt(diag(Sigma))
w_naive_RPP <- w_naive_RPP/sum(w_naive_RPP)


# barplots
df_w <- data.frame(
  "stocks"    = names(stock_prices),
  "1/N"                = w_EWP,
  "Naive diagonal RPP" = w_naive_RPP,
  check.names = FALSE)

barplot_w_and_RCC(df_w, Sigma)

Vanilla convex formulations

Portfolio allocation and risk contribution of the vanilla convex RPP compared to benchmarks:

library(riskParityPortfolio)

stock_prices <- SP500_2015to2020$stocks["2020::2020-09", c("AAPL", "NFLX", "TSCO", "MGM", "MSFT", "FB", "AMZN", "GOOGL")]
T <- nrow(stock_prices)
T_trn <- round(0.70*T)
X <- diff(log(stock_prices[1:T_trn, ]))[-1]
Sigma <- cov(X)
N <- ncol(X)


# portfolios
w_EWP <- rep(1/N, N)
w_naive_RPP  <- riskParityPortfolio(Sigma, formulation = "diag")$w
w_convex_RPP <- riskParityPortfolio(Sigma)$w


# barplots
df_w <- data.frame(
  "stocks"    = names(stock_prices),
  "1/N"                = w_EWP,
  "Naive diagonal RPP" = w_naive_RPP,
  "Vanilla convex RPP" = w_convex_RPP,
  check.names = FALSE)

barplot_w_and_RCC(df_w, Sigma)

Algorithms

General-purpose nonlinear solver

We can solve this problem with the general-purpose nonlinear solver optim() in R:

# initial point
w <- rep(1/N, N)
x0 <- w/as.numeric(sqrt(w %*% Sigma %*% w))

# function definition
fn_convex <- function(x, Sigma) {
  N <- nrow(Sigma)
  return(0.5 * t(x) %*% Sigma %*% x - (1/N)*sum(log(x)))
}

# optimize with general-purpose solver
result <- optim(par = x0, fn = fn_convex, Sigma = Sigma, method = "BFGS")
x_general_solver <- result$par
w_general_solver_RPP <- x_general_solver/sum(x_general_solver)

# sanity check of the solution
b <- rep(1/N, N)
Sigma %*% x_general_solver - b/x_general_solver
               [,1]
AAPL  -7.391053e-06
NFLX  -1.951736e-05
TSCO  -9.158081e-06
MGM    9.466781e-06
MSFT   8.160602e-06
FB    -1.373207e-05
AMZN  -3.604614e-05
GOOGL -3.071543e-05

Newton’s method

Newton’s method for Spinu’s RPP formulation:

library(microbenchmark)
library(dplyr)

b <- rep(1/N, N)
x_opt <- w_convex_RPP / as.vector(sqrt(w_convex_RPP %*% Sigma %*% w_convex_RPP))
opt_value <- 0.5 * x_opt %*% Sigma %*% x_opt - b %*% log(x_opt)

#
# Newton Spinu algorithm  (1/N initial point)
#
num_iter <- 5L
w <- rep(1/N, N)
x <- w/as.numeric(sqrt(w %*% Sigma %*% w))
df <- data.frame("k"          = 0L,
                 "cpu time k" = 0,
                 "obj_value"  = 0.5 * x %*% Sigma %*% x - b %*% log(x),
                 "gap"        = 0.5 * x %*% Sigma %*% x - b %*% log(x) - opt_value,
                 check.names      = FALSE)

for (k in 1:num_iter) {
  cpu_time <- microbenchmark({
    grad <- Sigma %*% x - b/x
    Hessian <- Sigma + diag(b/x^2)
    x_new <- x - solve(Hessian, grad)
    }, unit = "microseconds", times = 10L)$time |> median()
  x <- as.numeric(x_new)

  df <- rbind(df, list("k"          = k,
                       "cpu time k" = cpu_time,
                       "obj_value"  = 0.5 * x %*% Sigma %*% x - b %*% log(x),
                       "gap"        = 0.5 * x %*% Sigma %*% x - b %*% log(x) - opt_value))
}
df <- df |> mutate("CPU time [ms]" = cumsum(`cpu time k`)/1e6)

p_gap_vs_iter <- df |>
  ggplot(aes(x = k, y = gap)) +
  geom_line(linewidth = 1.2, col = "blue") +
  scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(10^.x))) +
  scale_x_continuous(limits = c(0, num_iter), breaks = seq(0, num_iter, by = 1), minor_breaks = NULL) +
  coord_cartesian(ylim = c(1e-10, 1e1)) +
  ggtitle("Optimality gap versus iterations")

p_gap_vs_cpu_time <- df |>
  ggplot(aes(x = `CPU time [ms]`, y = gap, color = method)) +
  geom_line(linewidth = 1.2, col = "blue") +
  scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(10^.x))) +
  coord_cartesian(xlim = c(0, 0.25), ylim = c(1e-10, 1e1)) +
  ggtitle("Optimality gap versus CPU time")

p_gap_vs_iter / p_gap_vs_cpu_time + plot_layout(guides = "collect")

Cyclical coordinate descent algorithm

Cyclical coordinate descent algorithm for Spinu’s RPP formulation:

#
# Cyclical Spinu coordinate descent algorithm (1/N initial point)
#
num_iter <- 10L
w <- rep(1/N, N)
x <- w/as.numeric(sqrt(w %*% Sigma %*% w))
df <- data.frame("k"          = 0L,
                 "cpu time k" = 0,
                 "obj_value"  = 0.5 * x %*% Sigma %*% x - b %*% log(x),
                 "gap"        = 0.5 * x %*% Sigma %*% x - b %*% log(x) - opt_value,
                 check.names      = FALSE)

for (k in 1:num_iter) {
  cpu_time <- microbenchmark({
    x_new <- x
    #Sigma_xk <- as.numeric(Sigma %*% x_new)
    for (i in 1:N) {
      Sigma_xk_i <- as.numeric(x_new[-i] %*% Sigma[-i, i])
      #Sigma_xk_i <- Sigma_xk[i] - x_new[i] * Sigma[i, i]
      x_new[i] <- (- Sigma_xk_i + sqrt(Sigma_xk_i^2 + 4*Sigma[i, i]*b[i]))/(2*Sigma[i, i])
      #x_diff <- x_new[i] - x[i]
      #Sigma_xk <- Sigma_xk + Sigma[, i] * x_diff
    }
    }, unit = "microseconds", times = 10L)$time |> median()
  x <- as.numeric(x_new)

  df <- rbind(df, list("k"          = k,
                       "cpu time k" = cpu_time,
                       "obj_value"  = 0.5 * x %*% Sigma %*% x - b %*% log(x),
                       "gap"        = 0.5 * x %*% Sigma %*% x - b %*% log(x) - opt_value))
}
df <- df |> mutate("CPU time [ms]" = cumsum(`cpu time k`)/1e6)

p_gap_vs_iter <- df |>
  ggplot(aes(x = k, y = gap)) +
  geom_line(linewidth = 1.2, col = "blue") +
  scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(10^.x))) +
  scale_x_continuous(limits = c(0, num_iter), breaks = seq(0, num_iter, by = 1), minor_breaks = NULL) +
  coord_cartesian(ylim = c(1e-10, 1e1)) +
  ggtitle("Optimality gap versus iterations")

p_gap_vs_cpu_time <- df |>
  ggplot(aes(x = `CPU time [ms]`, y = gap, color = method)) +
  geom_line(linewidth = 1.2, col = "blue") +
  scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(10^.x))) +
  coord_cartesian(xlim = c(0, 50), ylim = c(1e-10, 1e1)) +
  ggtitle("Optimality gap versus CPU time")

p_gap_vs_iter / p_gap_vs_cpu_time + plot_layout(guides = "collect")

Numerical comparison

Convergence of Newton, MM, and SCA methods for Spinu’s RPP problem, comparing the original version with the improved one (i.e., using the correlation matrix instead of the covariance matrix and using the linear normalization step) (Palomar, 2024, Chapter 11). It seems that the best method is the improved SCA.

num_iter  <- 25L

sigma <- sqrt(diag(Sigma))
C <- cov2cor(Sigma)

#
# Newton Spinu algorithm
#
x <- sqrt(b)/sqrt(rowSums(Sigma))
df <- data.frame("k"          = 0L,
                 "cpu time k" = 0,
                 "obj_value"  = 0.5 * x %*% Sigma %*% x - b %*% log(x),
                 "gap"        = 0.5 * x %*% Sigma %*% x - b %*% log(x) - opt_value,
                 "method"     = "Newton",
                 check.names      = FALSE)
for (k in 1:num_iter) {
  cpu_time <- microbenchmark({
    grad <- Sigma %*% x - b/x
    Hessian <- Sigma + diag(b/x^2)
    x_new <- x - solve(Hessian, grad)
    }, unit = "microseconds", times = 10L)$time |> median()
  x <- as.numeric(x_new)

  df <- rbind(df, list("k"          = k, 
                       "cpu time k" = cpu_time, 
                       "obj_value"  = 0.5 * x %*% Sigma %*% x - b %*% log(x),
                       "gap"        = 0.5 * x %*% Sigma %*% x - b %*% log(x) - opt_value, 
                       "method"     = "Newton"))
}



#
# Newton Spinu algorithm (normalized and based on correlation matrix)
#
x <- sqrt(b)/sqrt(rowSums(C))
rowsum_C <- rowSums(C)
df <- rbind(df, list("k"          = 0L, 
                     "cpu time k" = 0, 
                     "obj_value"  = 0.5 * (x/sigma) %*% Sigma %*% (x/sigma) - b %*% log(x/sigma),
                     "gap"        = 0.5 * (x/sigma) %*% Sigma %*% (x/sigma) - b %*% log(x/sigma) - opt_value,
                     "method"     = "Newton - improved"))
for (k in 1:num_iter) {
  cpu_time <- microbenchmark({
    grad <- C %*% x - b/x
    Hessian <- C + diag(b/x^2)
    x_new <- x - solve(Hessian, grad)
    x_new <- x_new * sqrt(sum(b/x_new)/sum(rowsum_C * x_new))
    }, unit = "microseconds", times = 10L)$time |> median()
  x <- as.numeric(x_new)

  df <- rbind(df, list("k"          = k, 
                       "cpu time k" = cpu_time, 
                       "obj_value"  = 0.5 * (x/sigma) %*% Sigma %*% (x/sigma) - b %*% log(x/sigma),
                       "gap"        = 0.5 * (x/sigma) %*% Sigma %*% (x/sigma) - b %*% log(x/sigma) - opt_value,
                       "method"     = "Newton - improved"))
}



#
# Parallel Spinu MM
#
x <- sqrt(b)/sqrt(rowSums(Sigma))
df <- rbind(df, list("k"          = 0L, 
                     "cpu time k" = 0,
                     "obj_value"  = 0.5 * x %*% Sigma %*% x - b %*% log(x),
                     "gap"        = 0.5 * x %*% Sigma %*% x - b %*% log(x) - opt_value, 
                     "method"     = "MM"))
lmd_max <- eigen(Sigma)$values[1]
Sigma_lmd_max <- Sigma - lmd_max*diag(N)
for (k in 1:num_iter) {
  cpu_time <- microbenchmark({
    Sigma_lmd_max_xk <- Sigma_lmd_max %*% x
    x_new <- (-Sigma_lmd_max_xk + sqrt(Sigma_lmd_max_xk^2 + 4*lmd_max*b))/(2*lmd_max)
    }, unit = "microseconds", times = 10L)$time |> median()
  x <- as.numeric(x_new)

  df <- rbind(df, list("k"          = k, 
                       "cpu time k" = cpu_time,
                       "obj_value"  = 0.5 * x %*% Sigma %*% x - b %*% log(x),
                       "gap"        = 0.5 * x %*% Sigma %*% x - b %*% log(x) - opt_value, 
                       "method"     = "MM"))
}


#
# Parallel Spinu MM (normalized and based on correlation matrix)
#
x <- sqrt(b)/sqrt(rowSums(C))
rowsum_C <- rowSums(C)
df <- rbind(df, list("k"          = 0L, 
                     "cpu time k" = 0,
                     "obj_value"  = 0.5 * (x/sigma) %*% Sigma %*% (x/sigma) - b %*% log(x/sigma),
                     "gap"        = 0.5 * (x/sigma) %*% Sigma %*% (x/sigma) - b %*% log(x/sigma) - opt_value, 
                     "method"     = "MM - improved"))
lmd_max <- eigen(C)$values[1]
C_lmd_max <- C - lmd_max*diag(N)
for (k in 1:num_iter) {
  cpu_time <- microbenchmark({
    C_lmd_max_xk <- C_lmd_max %*% x
    x_new <- (-C_lmd_max_xk + sqrt(C_lmd_max_xk^2 + 4*lmd_max*b))/(2*lmd_max)
    x_new <- x_new * sqrt(sum(b/x_new)/sum(rowsum_C * x_new))
    }, unit = "microseconds", times = 10L)$time |> median()
  x <- as.numeric(x_new)

  df <- rbind(df, list("k"          = k, 
                       "cpu time k" = cpu_time,
                       "obj_value"  = 0.5 * (x/sigma) %*% Sigma %*% (x/sigma) - b %*% log(x/sigma),
                       "gap"        = 0.5 * (x/sigma) %*% Sigma %*% (x/sigma) - b %*% log(x/sigma) - opt_value,
                       "method"     = "MM - improved"))
}


#
# Parallel Spinu SCA
#
x <- sqrt(b)/sqrt(rowSums(Sigma))
gamma <- 1   #(1, 0.1), (0.9, 0.01)
eps <- 0.1
df <- rbind(df, list("k"          = 0L, 
                     "cpu time k" = 0,
                     "obj_value"  = 0.5 * x %*% Sigma %*% x - b %*% log(x),
                     "gap"        = 0.5 * x %*% Sigma %*% x - b %*% log(x) - opt_value, 
                     "method"     = "SCA"))
Sigma_Diag_Sigma <- Sigma - diag(diag(Sigma))
for (k in 1:num_iter) {
  cpu_time <- microbenchmark({
    Sigma_Diag_Sigma_xk <- Sigma_Diag_Sigma %*% x
    x_hat <- (-Sigma_Diag_Sigma_xk + sqrt(Sigma_Diag_Sigma_xk^2 + 4*diag(Sigma)*b))/(2*diag(Sigma))
    x_new <- gamma*x_hat + (1 - gamma)*x
    }, unit = "microseconds", times = 10L)$time |> median()
  x <- as.numeric(x_new)
  gamma <- gamma * (1 - eps*gamma)

  df <- rbind(df, list("k"          = k, 
                       "cpu time k" = cpu_time,
                       "obj_value"  = 0.5 * x %*% Sigma %*% x - b %*% log(x),
                       "gap"        = 0.5 * x %*% Sigma %*% x - b %*% log(x) - opt_value, 
                       "method"     = "SCA"))
}


#
# Parallel Spinu SCA (normalized and based on correlation matrix)
#
x <- sqrt(b)/sqrt(rowSums(C))
rowsum_C <- rowSums(C)
gamma <- 1   #(1, 0.1), (0.9, 0.01)
eps <- 0.1
df <- rbind(df, list("k"          = 0L, 
                     "cpu time k" = 0,
                     "obj_value"  = 0.5 * (x/sigma) %*% Sigma %*% (x/sigma) - b %*% log(x/sigma),
                     "gap"        = 0.5 * (x/sigma) %*% Sigma %*% (x/sigma) - b %*% log(x/sigma) - opt_value,
                     "method"     = "SCA - improved"))
C_minus_I <- C - diag(N)
for (k in 1:num_iter) {
  cpu_time <- microbenchmark({
    C_minus_I_xk <- C_minus_I %*% x
    x_hat <- (-C_minus_I_xk + sqrt(C_minus_I_xk^2 + 4*b))/2
    #x_hat <- x_hat /sqrt(as.numeric(t(x_hat) %*% C %*% x_hat))
    x_hat <- x_hat * sqrt(sum(b/x_hat)/sum(rowsum_C * x_hat))  # my version
    x_new <- gamma*x_hat + (1 - gamma)*x
    }, unit = "microseconds", times = 10L)$time |> median()
  x <- as.numeric(x_new)
  gamma <- gamma * (1 - eps*gamma)

  df <- rbind(df, list("k"          = k, 
                       "cpu time k" = cpu_time,
                       "obj_value"  = 0.5 * (x/sigma) %*% Sigma %*% (x/sigma) - b %*% log(x/sigma),
                       "gap"        = 0.5 * (x/sigma) %*% Sigma %*% (x/sigma) - b %*% log(x/sigma) - opt_value,
                       "method"     = "SCA - improved"))
}



# process data frame
df$method <- factor(df$method, levels = unique(df$method))
df <- df |>
  group_by(method) |>
  mutate("CPU time [ms]" = cumsum(`cpu time k`)/1e6) |>
  ungroup()


# plots
p_gap_vs_iter <- df |>
  ggplot(aes(x = k, y = gap, color = method)) +
  geom_line(linewidth = 1.2) +
  scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(10^.x))) +
  scale_x_continuous(limits = c(0, num_iter), breaks = seq(0, num_iter, by = 1), minor_breaks = NULL) +
  coord_cartesian(ylim = c(1e-10, 1e-2)) +
  labs(color = "method")
  ggtitle("Optimality gap versus iterations")
$title
[1] "Optimality gap versus iterations"

attr(,"class")
[1] "labels"
p_gap_vs_cpu_time <- df |>
  ggplot(aes(x = `CPU time [ms]`, y = gap, color = method)) +
  geom_line(linewidth = 1.2) +
  scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(10^.x))) +
  coord_cartesian(xlim = c(0, 0.25), ylim = c(1e-10, 1e-2)) +
  labs(color = "method")
  ggtitle("Optimality gap versus CPU time")
$title
[1] "Optimality gap versus CPU time"

attr(,"class")
[1] "labels"
p_gap_vs_iter / p_gap_vs_cpu_time + plot_layout(guides = "collect")

General nonconvex formulations

Portfolio allocation and risk contribution of general nonconvex RPP (with \(w_i \\leq 0.15\)) compared to benchmarks (\(1/N\) portfolio, naive diagonal RPP, and vanilla convex RPP):

library(riskParityPortfolio)

stock_prices <- SP500_2015to2020$stocks["2020::2020-09", c("AAPL", "NFLX", "TSCO", "MGM", "MSFT", "FB", "AMZN", "GOOGL")]
T <- nrow(stock_prices)
T_trn <- round(0.70*T)
X <- diff(log(stock_prices[1:T_trn, ]))[-1]
Sigma <- cov(X)
N <- ncol(X)

# portfolios
w_EWP <- rep(1/N, N)
w_naive_RPP  <- riskParityPortfolio(Sigma, formulation = "diag")$w
w_convex_RPP <- riskParityPortfolio(Sigma)$w
w_nonconvex_RPP <- riskParityPortfolio(Sigma, w_ub = 0.15)$w


# barplots
df_w <- data.frame(
  "stocks"    = names(stock_prices),
  "1/N"       = w_EWP,
  "Naive diagonal RPP" = w_naive_RPP,
  "Vanilla convex RPP" = w_convex_RPP,
  "General nonconvex RPP" = w_nonconvex_RPP,
  check.names = FALSE)

barplot_w_and_RCC(df_w, Sigma)

Algorithms

Converge comparison for different algorithms to solve the nonconvex RPP formulation: \[ \begin{array}{ll} \underset{\w}{\textm{minimize}} & \begin{aligned}\sum_{i=1}^N \left(\frac{w_i\left(\bSigma\w\right)_i}{\w^\T\bSigma\w} - b_i\right)^2\end{aligned}\\ \textm{subject to} & \w \in \mathcal{W}. \end{array} \]

library(riskParityPortfolio)
library(microbenchmark)
library(dplyr)
library(alabama)
library(nloptr)
library(quadprog)

N <- 200
Sigma <- cov(diff(log(SP500_2015to2020$stocks[, 1:N]))[-1])
b <- rep(1/N, N)
w_ub <- 0.008  # RPP with upper bound!!
w_init <- rep(1/N, N)

# problem definition
R      <- riskParityPortfolio:::R_rc_over_var_vs_b
R_grad <- riskParityPortfolio:::R_grad_rc_over_var_vs_b


num_iter  <- 20L
num_times <- 10L  # to compute the cpu time



#
# General nonlinear solver (alabama)
#
heq     <- function(w, ...) sum(w) - 1                         # sum(w) - 1 = 0
heq.jac <- function(w, ...) matrix(1, 1, N)
hin     <- function(w, ...) return(c(w, w_ub - w))             # w >= 0 and w <= ub (ub - w >= 0)
hin.jac <- function(w, ...) return(rbind(diag(N), -diag(N)))
w <- w_init
df <- data.frame("k"             = 0L,
                 "cpu time k"    = NA,
                 "CPU time [ms]" = 0,
                 "obj_value"     = R(w, Sigma, b),
                 "method"        = "nonlinear solver (alabama)",
                 check.names     = FALSE)
for (k in 1:num_iter) {
  cpu_time <- microbenchmark({
    res <- alabama::constrOptim.nl(par = w_init, 
                                   fn  = R,    gr = R_grad,
                                   hin = hin,  hin.jac = hin.jac,
                                   heq = heq,  heq.jac = heq.jac,
                                   control.outer = list(trace = FALSE, itmax = k),
                                   Sigma = Sigma, b = b)
    w <- res$par
  }, unit = "microseconds", times = num_times)$time |> median()
  df <- rbind(df, list("k"             = res$outer.iterations, 
                       "cpu time k"    = NA,
                       "CPU time [ms]" = cpu_time/1e6, 
                       "obj_value"     = R(w, Sigma, b),
                       "method"        = "nonlinear solver (alabama)"))
}



#
# General nonlinear solver (nloptr)
#
w <- w_init
df <- rbind(df, list("k"             = 0L,
                     "cpu time k"    = NA,
                     "CPU time [ms]" = 0,
                     "obj_value"     = R(w, Sigma, b),
                     "method"        = "nonlinear solver (nloptr)"))
for (k in 1:num_iter) {
  cpu_time <- microbenchmark({
    res <- nloptr::slsqp(x0 = w_init,
                         fn = R,      gr = R_grad,
                         hin = hin,   hinjac = hin.jac,
                         heq = heq,   heqjac = heq.jac,
                         control = list(maxeval = k),
                         Sigma = Sigma, b = b)
    w <- res$par
  }, unit = "microseconds", times = num_times)$time |> median()
  df <- rbind(df, list("k"             = res$iter,
                       "cpu time k"    = NA,
                       "CPU time [ms]" = cpu_time/1e6, 
                       "obj_value"     = R(w, Sigma, b),
                       "method"        = "nonlinear solver (nloptr)"))
}



#
# SCA
#
gamma <- 0.9
zeta <- 1e-7
# constraints (A' * b >= b0)
meq <- 1
Amat <- t(rbind(1, -diag(N), diag(N)))
bvec <- c(1, -rep(w_ub, N), rep(0, N))
# intermediate variables
tau <- .05 * sum(diag(Sigma)) / (2*N)
tauI <- tau*diag(N)
g <- riskParityPortfolio:::g_rc_over_var_vs_b
A <- riskParityPortfolio:::A_rc_over_var_vs_b

wk <- w_init
df <- rbind(df, list("k"             = 0L,
                     "cpu time k"    = 0,
                     "CPU time [ms]" = NA, 
                     "obj_value"     = R(wk, Sigma, b),
                     "method"        = "SCA"))
for (k in 1:num_iter) {
  cpu_time <- microbenchmark({
    # auxiliary quantities
    Sigma_wk <- Sigma %*% wk
    rk <- wk * Sigma_wk
    Jk <- A(wk, Sigma, b, Sigma_w = Sigma_wk, r = rk)
    gk <- g(wk, Sigma, b, r = rk)
    Qk <- 2 * crossprod(Jk) + tauI
    qk <- 2 * t(Jk) %*% gk - Qk %*% wk
    w_hat <- quadprog::solve.QP(Dmat = Qk, dvec = -qk, Amat = Amat, bvec = bvec, meq = meq)$solution
    w_new <- wk + gamma * (w_hat - wk)
  }, unit = "microseconds", times = num_times)$time |> median()
  wk <- w_new
  gamma <- gamma * (1 - zeta*gamma)

  df <- rbind(df, list("k"             = k, 
                       "cpu time k"    = cpu_time,
                       "CPU time [ms]" = NA,
                       "obj_value"     = R(wk, Sigma, b),
                       "method"        = "SCA"))
}


# process data frame
df$method <- factor(df$method, levels = unique(df$method))
df <- df |>
  group_by(method) |>
  mutate("CPU time [ms]" = ifelse(is.na(`CPU time [ms]`), cumsum(`cpu time k`)/1e6, `CPU time [ms]`)) |>
  ungroup()

# add optimality gap using the actual minimum value
df <- df |>
  group_by(method) |>
  mutate("gap" = obj_value - min(df$obj_value)) |>
  ungroup()


# plot
p_gap_vs_iter <- df |>
  ggplot(aes(x = k, y = gap, color = method)) +
  geom_line(linewidth = 1.2) +
  scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(10^.x))) +
  scale_x_continuous(limits = c(0, num_iter), breaks = seq(0, num_iter, by = 1), minor_breaks = NULL) +
  #coord_cartesian(ylim = c(1e-10, 1e-2)) +
  labs(color = "method")
  ggtitle("Optimality gap versus iterations") + theme(axis.title.x = element_text(face = "italic"))
NULL
p_gap_vs_cpu_time <- df |>
  ggplot(aes(x = `CPU time [ms]`, y = gap, color = method)) +
  geom_line(linewidth = 1.2) +
  scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(10^.x))) +
  coord_cartesian(xlim = c(0, 250)) +
  labs(color = "method")
  ggtitle("Optimality gap versus CPU time")
$title
[1] "Optimality gap versus CPU time"

attr(,"class")
[1] "labels"
p_gap_vs_iter / p_gap_vs_cpu_time + plot_layout(guides = "collect")

References

Palomar, D. P. (2024). Portfolio optimization: Theory and application. Cambridge University Press.