Python Code for Portfolio Optimization Chapter 7 – Modern Portfolio Theory
Daniel P. Palomar (2025). Portfolio Optimization: Theory and Application. Cambridge University Press.
Last update: February 19, 2026
Contributors:
Libraries¶
The following libraries are used in the examples:
# Core numerical computing
import numpy as np
import pandas as pd
from typing import Dict, Tuple
# For financial data
import yfinance as yf # Loading financial data
import empyrical as emp # Performance metrics
# Book data (pip install "git+https://github.com/dppalomar/pob.git#subdirectory=python")
from pob_python import SP500_stocks_2015to2020, SP500_index_2015to2020
# Plotting
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
sns.set_theme(style="darkgrid")
import plotly.io as pio
pio.renderers.default = "notebook"
pio.templates.default = "plotly_white"
# Optimization
import cvxpy as cp # interface for convex optimization solvers
from scipy.optimize import minimize, LinearConstraint # for optimization
from scipy.linalg import cholesky
$\def\bm#1{\boldsymbol{#1}}$ $\def\textm#1{\textsf{#1}}$ $\def\T{{\mkern-2mu\raise-1mu\mathsf{T}}}$ $\def\R{\mathbb{R}}$ $\def\E{\rm I\kern-.2em E}$ $\def\w{\bm{w}}$ $\def\bmu{\bm{\mu}}$ $\def\bSigma{\bm{\Sigma}}$
Helper Functions¶
# Create wrapper to backtest multiple backtests (currently not available in skfolio, but soon will be)
import time
from datetime import timedelta
def batch_cross_val_predict(optimizers, X, cv, **kwargs) -> list:
"""Run cross_val_predict on multiple optimizers with timing information"""
bt_list = []
n_splits = cv.get_n_splits(X)
total_start_time = time.time()
print(f"\n🚀 Starting batch backtesting with {len(optimizers)} portfolio designs...")
print(f"📊 Cross-validation: {n_splits} splits")
print("=" * 70)
for i, optim in enumerate(optimizers, 1):
# Handle missing or None portfolio_params (fixes the original error)
if hasattr(optim, 'portfolio_params') and optim.portfolio_params is not None:
name = optim.portfolio_params.get('name', f'Portfolio {i}')
else:
name = f'Portfolio {i} ({type(optim).__name__})'
print(f"\n[{i}/{len(optimizers)}] 🏃 Backtesting '{name}'...")
# Start timing for this specific backtest
start_time = time.time()
try:
bt = cross_val_predict(
optim, X, cv=cv,
portfolio_params=getattr(optim, 'portfolio_params', {}),
**kwargs
)
# Calculate elapsed time
elapsed_time = time.time() - start_time
elapsed_str = str(timedelta(seconds=int(elapsed_time)))
bt_list.append(bt)
print(f"✅ Completed '{name}' in {elapsed_str}")
except Exception as e:
elapsed_time = time.time() - start_time
elapsed_str = str(timedelta(seconds=int(elapsed_time)))
print(f"❌ Failed '{name}' after {elapsed_str}: {str(e)}")
raise # Re-raise the exception to maintain original behavior
# Calculate total elapsed time
total_elapsed = time.time() - total_start_time
total_elapsed_str = str(timedelta(seconds=int(total_elapsed)))
print("=" * 70)
print(f"🎉 Batch backtesting completed!")
print(f"⏱️ Total time: {total_elapsed_str}")
print(f"📈 Successfully processed {len(bt_list)} portfolios")
return bt_list
# Create wrapper to plot portfolio compositions as a barplot
def plot_composition_sidebyside(population, asset_names=None):
"""
Plot portfolio compositions side-by-side for a Population of portfolios.
Parameters
----------
population : Population
A skfolio Population object containing multiple portfolios.
asset_names : list of str, optional
List of asset names in the desired order. If provided, all assets
will be displayed (with zero weights for missing assets).
Returns
-------
fig : plotly.graph_objects.Figure
The plotly figure object.
"""
import plotly.graph_objects as go
# Determine the asset order
if asset_names is None:
asset_names = population[0].composition.index.tolist()
fig = go.Figure()
# Add a bar trace for each portfolio
for portfolio in population:
df = portfolio.composition # shape: (n_assets, 1)
# Reindex to include all assets in the specified order, filling missing with 0
weights = df.iloc[:, 0].reindex(asset_names, fill_value=0)
fig.add_trace(go.Bar(
x=asset_names, # asset names in specified order
y=weights, # weights (0 for missing assets)
name=portfolio.name # legend label (portfolio name)
))
fig.update_layout(
title="Portfolio Compositions",
xaxis_title="Assets",
yaxis_title="Weight",
barmode='group', # bars side-by-side (not stacked)
xaxis={'categoryorder': 'array', # enforce asset order
'categoryarray': asset_names}
)
return fig
# Create a class to be able to specify portfolios direclty via a CVXPY code
from skfolio.optimization import ConvexOptimization
class Portfolio_via_CVXPY(ConvexOptimization):
"""Portfolio based on custom CVXPY function."""
def __init__(self, cvxpy_fun=None, **kwargs):
super().__init__(**kwargs)
self.cvxpy_fun = cvxpy_fun
def fit(self, X, y=None):
"""Fit the custom optimization problem using the provided function."""
if self.cvxpy_fun is None:
raise ValueError("cvxpy_fun must be provided at initialization")
# Call the custom function to compute weights
weights = self.cvxpy_fun(X)
# Store results
self.weights_ = np.asarray(weights)
return self
Mean-Variance Portfolio (MVP)¶
Vanilla MVP¶
We explore the mean-variance portfolio (MVP), $$ \begin{array}{ll} \underset{\w}{\textm{maximize}} & \w^\T\bmu - \frac{\lambda}{2}\w^\T\bSigma\w\\ \textm{subject to} & \bm{1}^\T\w=1, \quad \w\ge\bm{0}, \end{array} $$ with different values of the hyper-parameter $\lambda$.
# Get data
stock_prices = SP500_stocks_2015to2020[
["AAPL", "AMZN", "AMD", "GM", "GOOGL", "MGM", "MSFT", "QCOM", "TSCO", "UPS"]
].loc["2019":]
T = stock_prices.shape[0]
T_trn = round(0.70*T)
stock_prices.head()
| AAPL | AMZN | AMD | GM | GOOGL | MGM | MSFT | QCOM | TSCO | UPS | |
|---|---|---|---|---|---|---|---|---|---|---|
| Date | ||||||||||
| 2019-01-02 | 38.629 | 1539.13 | 18.83 | 31.8934 | 1054.68 | 24.5435 | 98.8602 | 54.2052 | 80.3520 | 91.445 |
| 2019-01-03 | 34.781 | 1500.28 | 17.05 | 30.5755 | 1025.47 | 24.0660 | 95.2233 | 52.5998 | 78.8374 | 88.849 |
| 2019-01-04 | 36.266 | 1575.39 | 19.00 | 31.5995 | 1078.07 | 25.1086 | 99.6521 | 53.4497 | 80.4594 | 91.944 |
| 2019-01-07 | 36.185 | 1629.51 | 20.57 | 32.5760 | 1075.92 | 25.8296 | 99.7792 | 53.2986 | 81.6418 | 91.633 |
| 2019-01-08 | 36.875 | 1656.58 | 20.75 | 33.0026 | 1085.37 | 26.5116 | 100.5027 | 52.8359 | 81.5930 | 91.643 |
#
# Define portfolios
#
def EWP(X: pd.DataFrame) -> np.ndarray:
N = X.shape[1]
return np.repeat(1/N, N)
def design_MVP(mu: np.ndarray, Sigma: np.ndarray, lmd: float = 1) -> np.ndarray:
N = len(Sigma)
w = cp.Variable(N)
prob = cp.Problem(cp.Maximize(mu @ w - (lmd/2) * cp.quad_form(w, Sigma)),
[cp.sum(w) == 1, w >= 0])
prob.solve()
return w.value
def MVP_lmd1(X_lin: pd.DataFrame) -> np.ndarray:
X_log = np.log(1 + X_lin)
mu = X_log.mean().values
Sigma = X_log.cov().values
return design_MVP(mu, Sigma, lmd=1)
def MVP_lmd4(X_lin: pd.DataFrame) -> np.ndarray:
X_log = np.log(1 + X_lin)
mu = X_log.mean().values
Sigma = X_log.cov().values
return design_MVP(mu, Sigma, lmd=4)
def MVP_lmd16(X_lin: pd.DataFrame) -> np.ndarray:
X_log = np.log(1 + X_lin)
mu = X_log.mean().values
Sigma = X_log.cov().values
return design_MVP(mu, Sigma, lmd=16)
def MVP_lmd64(X_lin: pd.DataFrame) -> np.ndarray:
X_log = np.log(1 + X_lin)
mu = X_log.mean().values
Sigma = X_log.cov().values
return design_MVP(mu, Sigma, lmd=64)
from skfolio import Population
from skfolio.model_selection import cross_val_predict, WalkForward
from skfolio.preprocessing import prices_to_returns
# Wrap each portfolio within skfolio
portf_EWP = Portfolio_via_CVXPY(
cvxpy_fun=EWP,
portfolio_params=dict(name="1/N")
)
portf_MVP_lmd1 = Portfolio_via_CVXPY(
cvxpy_fun=MVP_lmd1,
portfolio_params=dict(name="MVP (lmd = 1)")
)
portf_MVP_lmd4 = Portfolio_via_CVXPY(
cvxpy_fun=MVP_lmd4,
portfolio_params=dict(name="MVP (lmd = 4)")
)
portf_MVP_lmd16 = Portfolio_via_CVXPY(
cvxpy_fun=MVP_lmd16,
portfolio_params=dict(name="MVP (lmd = 16)")
)
portf_MVP_lmd64 = Portfolio_via_CVXPY(
cvxpy_fun=MVP_lmd64,
portfolio_params=dict(name="MVP (lmd = 64)")
)
all_formulations = [
portf_EWP,
portf_MVP_lmd1,
portf_MVP_lmd4,
portf_MVP_lmd16,
portf_MVP_lmd64,
]
n_portfolios = len(all_formulations)
# WalkForward backtest
bt_list = batch_cross_val_predict(
all_formulations,
X=prices_to_returns(stock_prices), #or: stock_prices.pct_change(fill_method=None)
cv=WalkForward(test_size=30, train_size=T_trn, purged_size=1),
)
bt_population = Population(bt_list)
🚀 Starting batch backtesting with 5 portfolio designs... 📊 Cross-validation: 4 splits ====================================================================== [1/5] 🏃 Backtesting '1/N'... ✅ Completed '1/N' in 0:00:00 [2/5] 🏃 Backtesting 'MVP (lmd = 1)'... ✅ Completed 'MVP (lmd = 1)' in 0:00:00 [3/5] 🏃 Backtesting 'MVP (lmd = 4)'... ✅ Completed 'MVP (lmd = 4)' in 0:00:00 [4/5] 🏃 Backtesting 'MVP (lmd = 16)'... ✅ Completed 'MVP (lmd = 16)' in 0:00:00 [5/5] 🏃 Backtesting 'MVP (lmd = 64)'... ✅ Completed 'MVP (lmd = 64)' in 0:00:00 ====================================================================== 🎉 Batch backtesting completed! ⏱️ Total time: 0:00:00 📈 Successfully processed 5 portfolios
# Extract initial portfolio composition of each portfolio model
initial_portfolios = Population([
setattr(mpp[0], 'name', mpp.name) or mpp[0]
for mpp in bt_population
])
# Plot side-by-side barplot
plot_composition_sidebyside(initial_portfolios, asset_names=stock_prices.columns)
# Summary
bt_population.summary().loc[[
'Annualized Sharpe Ratio',
'CVaR at 95%',
'MAX Drawdown',
'Annualized Mean',
'Annualized Standard Deviation'
]].T
| Annualized Sharpe Ratio | CVaR at 95% | MAX Drawdown | Annualized Mean | Annualized Standard Deviation | |
|---|---|---|---|---|---|
| 1/N | 3.98 | 4.37% | 9.11% | 142.32% | 35.74% |
| MVP (lmd = 1) | 2.70 | 6.65% | 15.33% | 162.77% | 60.32% |
| MVP (lmd = 4) | 3.42 | 5.39% | 15.40% | 143.44% | 41.97% |
| MVP (lmd = 16) | 3.82 | 3.85% | 10.72% | 120.08% | 31.41% |
| MVP (lmd = 64) | 4.24 | 3.49% | 9.19% | 124.13% | 29.29% |
# Plot NAV
bt_population.set_portfolio_params(compounded=True)
fig = bt_population.plot_cumulative_returns()
fig.update_layout(title="Wealth", xaxis_title=None, yaxis_title="Net Asset Value (NAV)")
fig
# Plot drawdowns
bt_population.plot_drawdowns()
MVP with diversification heuristics¶
We now consider the MVP with two diversification heuristics:
- upper bound: $\w\leq0.25\times\bm{1}$; and
- diversification constraint: $\|\w\|_2^2 \leq 2/N$.
#
# Define portfolios
#
def GMVP(X_lin: pd.DataFrame) -> np.ndarray:
# Estimate parameters
X_log = np.log(1 + X_lin)
Sigma = X_log.cov().values
# Optimize portfolio
N = len(Sigma)
w = cp.Variable(N)
prob = cp.Problem(cp.Minimize(cp.quad_form(w, Sigma)),
[cp.sum(w) == 1, w >= 0])
prob.solve()
return w.value
def MVP_vanilla(X_lin: pd.DataFrame) -> np.ndarray:
# Estimate parameters
X_log = np.log(1 + X_lin)
mu = X_log.mean().values
Sigma = X_log.cov().values
# Optimize portfolio
lmd = 1
N = len(Sigma)
w = cp.Variable(N)
prob = cp.Problem(cp.Maximize(mu.T @ w - (lmd/2) * cp.quad_form(w, Sigma)),
[cp.sum(w) == 1, w >= 0])
prob.solve()
return w.value
def MVP_ub(X_lin: pd.DataFrame) -> np.ndarray:
# Estimate parameters
X_log = np.log(1 + X_lin)
mu = X_log.mean().values
Sigma = X_log.cov().values
# Optimize portfolio
N = len(Sigma)
lmd = 1
ub = 0.25
w = cp.Variable(N)
prob = cp.Problem(cp.Maximize(mu.T @ w - (lmd/2) * cp.quad_form(w, Sigma)),
[cp.sum(w) == 1, w >= 0, w <= ub])
prob.solve()
return w.value
def MVP_diversification(X_lin: pd.DataFrame) -> np.ndarray:
# Estimate parameters
X_log = np.log(1 + X_lin)
mu = X_log.mean().values
Sigma = X_log.cov().values
# Optimize portfolio
N = len(Sigma)
lmd = 1
D = 2/N
w = cp.Variable(N)
prob = cp.Problem(cp.Maximize(mu.T @ w - (lmd/2) * cp.quad_form(w, Sigma)),
[cp.sum(w) == 1, w >= 0, cp.sum_squares(w) <= D])
prob.solve()
return w.value
# Wrap each portfolio within skfolio
portf_EWP = Portfolio_via_CVXPY(
cvxpy_fun=EWP,
portfolio_params=dict(name="1/N")
)
portf_GMVP = Portfolio_via_CVXPY(
cvxpy_fun=GMVP,
portfolio_params=dict(name="GMVP")
)
portf_MVP_vanilla = Portfolio_via_CVXPY(
cvxpy_fun=MVP_vanilla,
portfolio_params=dict(name="MVP")
)
portf_MVP_ub = Portfolio_via_CVXPY(
cvxpy_fun=MVP_ub,
portfolio_params=dict(name="MVP with upper bound")
)
portf_MVP_diversification = Portfolio_via_CVXPY(
cvxpy_fun=MVP_diversification,
portfolio_params=dict(name="MVP with diversific. const.")
)
all_formulations = [
portf_EWP,
portf_GMVP,
portf_MVP_vanilla,
portf_MVP_ub,
portf_MVP_diversification,
]
n_portfolios = len(all_formulations)
# WalkForward backtest
bt_list = batch_cross_val_predict(
all_formulations,
X=prices_to_returns(stock_prices), #or: stock_prices.pct_change(fill_method=None)
cv=WalkForward(test_size=30, train_size=T_trn, purged_size=1),
)
bt_population = Population(bt_list)
🚀 Starting batch backtesting with 5 portfolio designs... 📊 Cross-validation: 4 splits ====================================================================== [1/5] 🏃 Backtesting '1/N'... ✅ Completed '1/N' in 0:00:00 [2/5] 🏃 Backtesting 'GMVP'... ✅ Completed 'GMVP' in 0:00:00 [3/5] 🏃 Backtesting 'MVP'... ✅ Completed 'MVP' in 0:00:00 [4/5] 🏃 Backtesting 'MVP with upper bound'... ✅ Completed 'MVP with upper bound' in 0:00:00 [5/5] 🏃 Backtesting 'MVP with diversific. const.'... ✅ Completed 'MVP with diversific. const.' in 0:00:00 ====================================================================== 🎉 Batch backtesting completed! ⏱️ Total time: 0:00:00 📈 Successfully processed 5 portfolios
# Extract initial portfolio composition of each portfolio model
initial_portfolios = Population([
setattr(mpp[0], 'name', mpp.name) or mpp[0]
for mpp in bt_population
])
# Plot side-by-side barplot
plot_composition_sidebyside(initial_portfolios, asset_names=stock_prices.columns)
# Summary
bt_population.summary().loc[[
'Annualized Sharpe Ratio',
'CVaR at 95%',
'MAX Drawdown',
'Annualized Mean',
'Annualized Standard Deviation'
]].T
| Annualized Sharpe Ratio | CVaR at 95% | MAX Drawdown | Annualized Mean | Annualized Standard Deviation | |
|---|---|---|---|---|---|
| 1/N | 3.98 | 4.37% | 9.11% | 142.32% | 35.74% |
| GMVP | 4.32 | 3.46% | 9.10% | 125.75% | 29.10% |
| MVP | 2.70 | 6.65% | 15.33% | 162.77% | 60.32% |
| MVP with upper bound | 3.38 | 4.96% | 13.36% | 129.65% | 38.34% |
| MVP with diversific. const. | 3.31 | 4.91% | 12.91% | 130.03% | 39.26% |
# Plot NAV
fig = bt_population.plot_cumulative_returns()
fig.update_layout(title="Wealth", xaxis_title=None, yaxis_title="Net Asset Value (NAV)")
fig
# Plot drawdowns
bt_population.plot_drawdowns()
Maximum Sharpe Ratio Portfolio (MSRP)¶
Recall the maximum Sharpe ratio portfolio (MSRP) formulation: $$ \begin{array}{ll} \underset{\w}{\textm{maximize}} & \dfrac{\w^\T\bmu - r_\textm{f}}{\sqrt{\w^\T\bSigma\w}}\\ \textm{subject to} & \begin{array}{l} \bm{1}^\T\w=1, \quad \w\ge\bm{0}.\end{array} \end{array} $$
We will solve it numerically via different methods:
- general-purpose nonlinear solver
- bisection method
- Dinkelbach method
- Schaible transform
First, let's obtain the mean vector $\bmu$ and covariance matrix $\bSigma$ from the market data:
stock_prices = SP500_stocks_2015to2020[
["AAPL", "AMZN", "AMD", "GM", "GOOGL", "MGM", "MSFT", "QCOM", "TSCO", "UPS"]
].loc["2019":]
X_lin = stock_prices.pct_change().dropna()
X_log = np.log(1 + X_lin)
T, N = X_lin.shape
mu = X_log.mean().values
Sigma = X_log.cov().values
MSRP via general-purpose nonlinear solver¶
We will solve this problem with the general-purpose nonlinear solver scipy.optimize.minimize in Python:
# Define the nonconvex objective function
def fn_SR(w):
return (w @ mu) / np.sqrt(w @ Sigma @ w)
def neg_fn_SR(w):
return -fn_SR(w)
# Initial point
N = len(mu)
w0 = np.ones(N) / N
# Equality constraint: sum(w) = 1
def constraint_eq(w):
return np.sum(w) - 1
# Optimization
res = minimize(neg_fn_SR, w0, method='SLSQP',
bounds=[(0, None) for _ in range(N)], # w >= 0
constraints={'type': 'eq', 'fun': constraint_eq})
w_nonlinear_solver = res.x
print(res)
message: Optimization terminated successfully
success: True
status: 0
fun: -0.10712133871542401
x: [ 5.622e-01 1.628e-01 1.909e-01 1.203e-17 1.059e-16
0.000e+00 0.000e+00 2.444e-19 7.543e-02 8.767e-03]
nit: 14
jac: [-9.832e-05 2.212e-04 2.336e-04 8.025e-02 4.231e-02
1.223e-01 1.348e-02 1.547e-02 -2.779e-04 -4.957e-04]
nfev: 154
njev: 14
multipliers: [-8.909e-05]
MSRP via bisection method¶
We are going to solve the nonconvex problem $$ \begin{array}{ll} \underset{\w,t}{\textm{maximize}} & t\\ \textm{subject to} & t \leq \dfrac{\w^\T\bmu}{\sqrt{\w^\T\bSigma\w}}\\ & \bm{1}^\T\w=1, \quad \w\geq\bm{0}. \end{array} $$ via bisection on $t$ with the following (convex) SOCP problem for a given $t$: $$ \begin{array}{ll} \underset{\;}{\textm{find}} & \w\\ \textm{subject to} & t \left\Vert \bSigma^{1/2}\w\right\Vert_{2}\leq\w^\T\bmu\\ & \bm{1}^\T\w=1, \quad \w\geq\bm{0}. \end{array} $$
# Square-root of matrix Sigma
Sigma_12 = cholesky(Sigma)
print(np.max(np.abs(Sigma_12.T @ Sigma_12 - Sigma))) # sanity check
2.168404344971009e-19
# Create function for MVP (each iteration of bisection)
import warnings
def SOCP_bisection(t):
w = cp.Variable(N)
prob = cp.Problem(cp.Maximize(0),
constraints=[t * cp.norm(Sigma_12 @ w, 2) <= mu.T @ w,
sum(w) == 1,
w >= 0])
# Solve the problem (ignore annoying warnings):
with warnings.catch_warnings():
warnings.simplefilter("ignore")
prob.solve()
return {"status": prob.status, "w": w.value}
# Now run the bisection algorithm
t_lb = 0 # for sure the problem is feasible in this case
t_ub = 10 # a tighter upper bound could be chosen (10 is a conservative choice)
while t_ub - t_lb > 1e-3:
t = (t_ub + t_lb) / 2 # midpoint
print(f"Current interval = ({t_lb:.3f}, {t_ub:.3f}) Trying midpoint t = {t:.4f}")
if 'infeasible' in SOCP_bisection(t)["status"]:
t_ub = t
else:
t_lb = t
w_bisection = SOCP_bisection(t_lb)["w"]
Current interval = (0.000, 10.000) Trying midpoint t = 5.0000 Current interval = (0.000, 5.000) Trying midpoint t = 2.5000 Current interval = (0.000, 2.500) Trying midpoint t = 1.2500 Current interval = (0.000, 1.250) Trying midpoint t = 0.6250 Current interval = (0.000, 0.625) Trying midpoint t = 0.3125 Current interval = (0.000, 0.312) Trying midpoint t = 0.1562 Current interval = (0.000, 0.156) Trying midpoint t = 0.0781 Current interval = (0.078, 0.156) Trying midpoint t = 0.1172 Current interval = (0.078, 0.117) Trying midpoint t = 0.0977 Current interval = (0.098, 0.117) Trying midpoint t = 0.1074 Current interval = (0.098, 0.107) Trying midpoint t = 0.1025 Current interval = (0.103, 0.107) Trying midpoint t = 0.1050 Current interval = (0.105, 0.107) Trying midpoint t = 0.1062 Current interval = (0.106, 0.107) Trying midpoint t = 0.1068
# Comparison between two solutions
w_df = pd.DataFrame({
'nonlinear_solver': w_nonlinear_solver,
'bisection': w_bisection,
})
print(w_df.round(3))
nonlinear_solver bisection 0 0.562 0.537 1 0.163 0.152 2 0.191 0.185 3 0.000 0.000 4 0.000 0.001 5 0.000 0.000 6 0.000 0.003 7 0.000 0.002 8 0.075 0.081 9 0.009 0.039
# Sharpe ratio of two solutions
sharpe_ratios_df = pd.DataFrame({
'Method': ['nonlinear_solver', 'bisection'],
'Sharpe Ratio': [fn_SR(w_nonlinear_solver), fn_SR(w_bisection)]
})
print(sharpe_ratios_df.round(3))
Method Sharpe Ratio 0 nonlinear_solver 0.107 1 bisection 0.107
MSRP via Dinkelbach method¶
We are going to solve the nonconvex problem $$ \begin{array}{ll} \underset{\w}{\textm{maximize}} & \dfrac{\w^\T\bmu}{\sqrt{\w^\T\bSigma\w}}\\ \textm{subject to} & \begin{array}{l} \bm{1}^\T\w=1, \quad \w\ge\bm{0}.\end{array} \end{array} $$ by iteratively solving the (convex) SOCP problem for a given $y^{(k)}$: $$ \begin{array}{ll} \underset{\w}{\textm{maximize}} & \w^\T\bmu - y^{(k)} \left\Vert \bSigma^{1/2}\w\right\Vert_{2}\\ \textm{subject to} & \begin{array}{l} \bm{1}^\T\w=1, \quad \w\ge\bm{0}.\end{array} \end{array} $$ where $$y^{(k)} = \frac{\mathbf{w}^{(k)\T}\bmu}{\sqrt{\w^{(k)\T}\bSigma\w^{(k)}}}.$$
# Create function for MVP (each iteration of Dinkelbach)
def SOCP_Dinkelbach(y):
w = cp.Variable(Sigma.shape[0])
prob = cp.Problem(cp.Maximize(mu.T @ w - y * cp.norm(Sigma_12 @ w, 2)),
constraints=[cp.sum(w) == 1,
w >= 0])
prob.solve()
return w.value
# Initial point (has to satisfy w_k.T @ mu >= 0)
i_max = np.argmax(mu)
w_k = np.zeros(N)
w_k[i_max] = 1
# Now the iterative Dinkelbach algorithm
k = 1
while k == 1 or np.max(np.abs(w_k - w_prev)) > 1e-6:
w_prev = w_k
y_k = (w_k.T @ mu) / np.sqrt(w_k.T @ Sigma @ w_k)
w_k = SOCP_Dinkelbach(y_k)
k += 1
w_Dinkelbach = w_k
print(f"Number of iterations: {k-1}")
Number of iterations: 5
# Comparison among three solutions
w_df = pd.DataFrame({
'nonlinear_solver': w_nonlinear_solver,
'bisection': w_bisection,
'Dinkelbach': w_Dinkelbach
})
print(w_df.round(3))
nonlinear_solver bisection Dinkelbach 0 0.562 0.537 0.561 1 0.163 0.152 0.156 2 0.191 0.185 0.188 3 0.000 0.000 0.000 4 0.000 0.001 0.000 5 0.000 0.000 0.000 6 0.000 0.003 0.000 7 0.000 0.002 0.000 8 0.075 0.081 0.079 9 0.009 0.039 0.015
# Sharpe ratio of three solutions
sharpe_ratios_df = pd.DataFrame({
'Method': ['nonlinear_solver', 'bisection', 'Dinkelbach'],
'Sharpe Ratio': [fn_SR(w_nonlinear_solver), fn_SR(w_bisection), fn_SR(w_Dinkelbach)]
})
print(sharpe_ratios_df.round(3))
Method Sharpe Ratio 0 nonlinear_solver 0.107 1 bisection 0.107 2 Dinkelbach 0.107
MSRP via Schaible transform¶
The maximum Sharpe ratio portfolio (MSRP) is the nonconvex problem $$ \begin{array}{ll} \underset{\w}{\textm{maximize}} & \dfrac{\w^\T\bmu}{\sqrt{\w^\T\bSigma\w}}\\ \textm{subject to} & \begin{array}{l} \bm{1}^\T\w=1, \quad \w\ge\bm{0}.\end{array} \end{array} $$ that can be rewritten in convex form as $$ \begin{array}{ll} \underset{\w}{\textm{minimize}} & \tilde{\w}^\T\bSigma\tilde{\w}\\ \textm{subject to} & \tilde{\w}^\T\bmu = 1\\ & \tilde{\w}\ge\bm{0} \end{array} $$ and then $\w = \tilde{\w}/(\bm{1}^\T\tilde{\w})$.
This is a quadratic problem (QP) and we can conveniently use CVXR (although one is advised to use a specific QP solver like quadprog for speed and stability):
# create function for MSRP in convex form
def MSRP_convex(mu, Sigma):
w_ = cp.Variable(Sigma.shape[0])
prob = cp.Problem(cp.Minimize(cp.quad_form(w_, Sigma)),
constraints=[w_ >= 0, mu.T @ w_ == 1])
prob.solve()
w = w_.value / np.sum(w_.value)
return w
# This function can now be used as
w_MSRP = MSRP_convex(mu, Sigma)
# Comparison among solutions
comparison = pd.DataFrame({
'w_nonlinear_solver': w_nonlinear_solver,
'w_bisection': w_bisection,
'w_Dinkelbach': w_Dinkelbach,
'w_MSRP': w_MSRP
})
print(comparison.round(3))
w_nonlinear_solver w_bisection w_Dinkelbach w_MSRP 0 0.562 0.537 0.561 0.561 1 0.163 0.152 0.156 0.156 2 0.191 0.185 0.188 0.188 3 0.000 0.000 0.000 -0.000 4 0.000 0.001 0.000 0.000 5 0.000 0.000 0.000 0.000 6 0.000 0.003 0.000 -0.000 7 0.000 0.002 0.000 -0.000 8 0.075 0.081 0.079 0.079 9 0.009 0.039 0.015 0.015
# Sharpe ratio of different solutions
sharpe_ratios_df = pd.DataFrame({
'Method': ['nonlinear_solver', 'bisection', 'Dinkelbach', 'Schaible'],
'Sharpe Ratio': [fn_SR(w_nonlinear_solver), fn_SR(w_bisection), fn_SR(w_Dinkelbach), fn_SR(w_MSRP)]
})
print(sharpe_ratios_df.round(3))
Method Sharpe Ratio 0 nonlinear_solver 0.107 1 bisection 0.107 2 Dinkelbach 0.107 3 Schaible 0.107
Conclusion: All methods produce the optimal solution.
Numerical experiments¶
We now compare the following portfolios:
global minimum variance portfolio (GMVP): $$ \begin{array}{ll} \underset{\w}{\textm{minimize}} & \w^\T\bSigma\w\\ \textm{subject to} & \bm{1}^\T\w=1, \quad \w\ge\bm{0}; \end{array} $$
mean-variance portfolio (MVP): $$ \begin{array}{ll} \underset{\w}{\textm{maximize}} & \w^\T\bmu - \frac{\lambda}{2}\w^\T\bSigma\w\\ \textm{subject to} & \bm{1}^\T\w=1, \quad \w\ge\bm{0}; \end{array} $$
maximum Sharpe ratio portfolio (MSRP): $$ \begin{array}{ll} \underset{\w}{\textm{maximize}} & \dfrac{\w^\T\bmu - r_\textm{f}}{\sqrt{\w^\T\bSigma\w}}\\ \textm{subject to} & \begin{array}{l} \bm{1}^\T\w=1, \quad \w\ge\bm{0}.\end{array} \end{array} $$
# Get data
stock_prices = SP500_stocks_2015to2020[
["AAPL", "AMZN", "AMD", "GM", "GOOGL", "MGM", "MSFT", "QCOM", "TSCO", "UPS"]
].loc["2019":]
T = stock_prices.shape[0]
T_trn = round(0.70*T)
#
# Define portfolios
#
def GMVP(X_lin: pd.DataFrame) -> np.ndarray:
# Estimate parameters
X_log = np.log(1 + X_lin)
Sigma = X_log.cov().values
# Optimize portfolio
N = len(Sigma)
w = cp.Variable(N)
prob = cp.Problem(cp.Minimize(cp.quad_form(w, Sigma)),
[cp.sum(w) == 1, w >= 0])
prob.solve()
return w.value
def MVP(X_lin: pd.DataFrame) -> np.ndarray:
# Estimate parameters
X_log = np.log(1 + X_lin)
mu = X_log.mean().values
Sigma = X_log.cov().values
# Optimize portfolio
lmd = 1
N = len(Sigma)
w = cp.Variable(N)
prob = cp.Problem(cp.Maximize(mu.T @ w - (lmd/2) * cp.quad_form(w, Sigma)),
[cp.sum(w) == 1, w >= 0])
prob.solve()
return w.value
def MSRP(X_lin: pd.DataFrame) -> np.ndarray:
# Estimate parameters
X_log = np.log(1 + X_lin)
mu = X_log.mean().values
Sigma = X_log.cov().values
# Optimize portfolio
w = MSRP_convex(mu, Sigma)
return w
from skfolio.optimization import MeanRisk, ObjectiveFunction
from skfolio import RiskMeasure
# Wrap each portfolio within skfolio
portf_GMVP = Portfolio_via_CVXPY(
cvxpy_fun=GMVP,
portfolio_params=dict(name="GMVP")
)
portf_GMVP_skfolio = MeanRisk(
objective_function=ObjectiveFunction.MINIMIZE_RISK,
risk_measure=RiskMeasure.VARIANCE,
portfolio_params=dict(name="GMVP (skfolio)")
)
portf_MVP = Portfolio_via_CVXPY(
cvxpy_fun=MVP,
portfolio_params=dict(name="MVP")
)
portf_MVP_skfolio = MeanRisk(
objective_function=ObjectiveFunction.MAXIMIZE_UTILITY, # uses mean - λ·var
risk_measure=RiskMeasure.VARIANCE,
risk_aversion=1/2,
portfolio_params=dict(name="MVP (skfolio)")
)
portf_MSRP = Portfolio_via_CVXPY(
cvxpy_fun=MSRP,
portfolio_params=dict(name="MSRP")
)
portf_MSRP_skfolio = MeanRisk(
objective_function=ObjectiveFunction.MAXIMIZE_RATIO, # uses mean / var^2
risk_measure=RiskMeasure.VARIANCE,
portfolio_params=dict(name="MSRP (skfolio)")
)
portf_MSRP_bis_skfolio = MeanRisk(
objective_function=ObjectiveFunction.MAXIMIZE_RATIO, # uses mean / var
risk_measure=RiskMeasure.STANDARD_DEVIATION,
portfolio_params=dict(name="MSRP bis (skfolio)")
)
all_formulations = [
portf_GMVP,
portf_GMVP_skfolio,
portf_MVP,
portf_MVP_skfolio,
portf_MSRP,
portf_MSRP_skfolio,
portf_MSRP_bis_skfolio,
]
n_portfolios = len(all_formulations)
# WalkForward backtest
bt_list = batch_cross_val_predict(
all_formulations,
X=prices_to_returns(stock_prices), #or: stock_prices.pct_change(fill_method=None)
cv=WalkForward(test_size=30, train_size=T_trn, purged_size=1),
)
bt_population = Population(bt_list)
🚀 Starting batch backtesting with 7 portfolio designs... 📊 Cross-validation: 4 splits ====================================================================== [1/7] 🏃 Backtesting 'GMVP'... ✅ Completed 'GMVP' in 0:00:00 [2/7] 🏃 Backtesting 'GMVP (skfolio)'... ✅ Completed 'GMVP (skfolio)' in 0:00:00 [3/7] 🏃 Backtesting 'MVP'... ✅ Completed 'MVP' in 0:00:00 [4/7] 🏃 Backtesting 'MVP (skfolio)'... ✅ Completed 'MVP (skfolio)' in 0:00:00 [5/7] 🏃 Backtesting 'MSRP'... ✅ Completed 'MSRP' in 0:00:00 [6/7] 🏃 Backtesting 'MSRP (skfolio)'... ✅ Completed 'MSRP (skfolio)' in 0:00:00 [7/7] 🏃 Backtesting 'MSRP bis (skfolio)'... ✅ Completed 'MSRP bis (skfolio)' in 0:00:00 ====================================================================== 🎉 Batch backtesting completed! ⏱️ Total time: 0:00:00 📈 Successfully processed 7 portfolios
# Extract initial portfolio composition of each portfolio model
initial_portfolios = Population([
setattr(mpp[0], 'name', mpp.name) or mpp[0]
for mpp in bt_population
])
# Plot side-by-side barplot
plot_composition_sidebyside(initial_portfolios, asset_names=stock_prices.columns)
# Summary
bt_population.summary().loc[[
'Annualized Sharpe Ratio',
'CVaR at 95%',
'MAX Drawdown',
'Annualized Mean',
'Annualized Standard Deviation'
]].T
| Annualized Sharpe Ratio | CVaR at 95% | MAX Drawdown | Annualized Mean | Annualized Standard Deviation | |
|---|---|---|---|---|---|
| GMVP | 4.32 | 3.46% | 9.10% | 125.75% | 29.10% |
| GMVP (skfolio) | 4.34 | 3.43% | 9.08% | 126.14% | 29.04% |
| MVP | 2.70 | 6.65% | 15.33% | 162.77% | 60.32% |
| MVP (skfolio) | 2.68 | 6.65% | 15.33% | 171.61% | 63.95% |
| MSRP | 3.40 | 5.26% | 14.89% | 142.09% | 41.76% |
| MSRP (skfolio) | 3.39 | 5.38% | 15.11% | 146.71% | 43.28% |
| MSRP bis (skfolio) | 3.39 | 5.38% | 15.11% | 146.71% | 43.28% |
# Plot NAV
fig = bt_population.plot_cumulative_returns()
fig.update_layout(title="Wealth", xaxis_title=None, yaxis_title="Net Asset Value (NAV)")
fig
# Plot drawdowns
bt_population.plot_drawdowns()
Universal Algorithm for Portfolio Optimization¶
MSRP¶
We consider two methods for the resolution of the MSRP:
Via the Schaible transform, i.e., solving $$ \begin{array}{ll} \underset{\bm{y}}{\textm{minimize}} & \bm{y}^\T\bSigma\bm{y}\\ \textm{subject to} & \bm{y}^\T\left(\bmu - r_\textm{f}\bm{1}\right) = 1\\ & \bm{y}\ge\bm{0}, \end{array} $$ with $\w = \bm{y}/\left(\bm{1}^\T\bm{y}\right)$.
Via the universal iterative SQP-MVP algorithm, i.e., we iteratively obtain $\w^{k+1}$, for $k=0,1,\dots,$, by solving $$ \begin{array}{ll} \underset{\w}{\textm{maximize}} & \w^\T\bmu - \dfrac{\lambda^k}{2}\w^\T\bSigma\w\\ \textm{subject to} & \bm{1}^\T\w=1, \quad \w\ge\bm{0}, \end{array} $$ with $$ \lambda^k = \dfrac{(\w^k)^\T\bmu - r_\textm{f}}{(\w^k)^\T\bSigma\w^k}. $$
# Prepare data
stock_prices = SP500_stocks_2015to2020[
["AAPL", "AMZN", "AMD", "GM", "GOOGL", "MGM", "MSFT", "QCOM", "TSCO", "UPS"]
].loc["2019":]
X_lin = stock_prices.pct_change().dropna()
X_log = np.log(1 + X_lin)
T, N = X_lin.shape
mu = X_log.mean().values
Sigma = X_log.cov().values
# Using Schaible transform + CVX to get the optimal value
w_ = cp.Variable(N)
prob = cp.Problem(cp.Minimize(cp.quad_form(w_, Sigma)),
constraints=[w_ >= 0, mu.T @ w_ == 1])
prob.solve()
w_cvx = w_.value / np.sum(w_.value)
obj_cvx = (w_cvx.T @ mu) / np.sqrt(w_cvx.T @ Sigma @ w_cvx)
# Using the SQP-MVP algorithm
w = np.ones(N) / N
obj_sqp = [(w.T @ mu) / np.sqrt(w.T @ Sigma @ w)]
k = 0
for k in range(21):
lmd_k = (w.T @ mu) / (w.T @ Sigma @ w)
w_prev = w
w = cp.Variable(N)
prob = cp.Problem(cp.Maximize(mu.T @ w - (lmd_k/2) * cp.quad_form(w, Sigma)),
constraints=[cp.sum(w) == 1, w >= 0])
prob.solve()
w = w.value
obj_sqp.append((w.T @ mu) / np.sqrt(w.T @ Sigma @ w))
if np.max(np.abs(w - w_prev)) < 1e-4:
break
# Plot
df = pd.DataFrame({
"k": range(len(obj_sqp)),
"SQP iterative method": obj_sqp
})
plt.figure(figsize=(10, 6))
plt.axhline(y=obj_cvx, color='r', linestyle='-', linewidth=1.5)
plt.plot(df["k"], df["SQP iterative method"], color='b', linewidth=1.5, marker='o', markersize=2.5)
plt.title("Convergence")
plt.ylabel("objective value")
plt.xlabel("iteration")
plt.legend(["optimal value", "SQP iterative method"])
plt.grid(True)
plt.show()
Mean-volatility portfolio¶
We now consider two methods for the resolution of the mean-volatility portfolio:
Via an SOCP solver, i.e., solving $$ \begin{array}{ll} \underset{\w}{\textm{maximize}} & \w^\T\bmu - \kappa\sqrt{\w^\T\bSigma\w}\\ \textm{subject to} & \bm{1}^\T\w=1, \quad \w\ge\bm{0}, \end{array} $$
Via the universal iterative SQP-MVP algorithm, i.e., we iteratively obtain $\w^{k+1}$, for $k=0,1,\dots,$, by solving $$ \begin{array}{ll} \underset{\w}{\textm{maximize}} & \w^\T\bmu - \dfrac{\lambda^k}{2}\w^\T\bSigma\w\\ \textm{subject to} & \bm{1}^\T\w=1, \quad \w\ge\bm{0}, \end{array} $$ with $$ \lambda^k = \kappa/\sqrt{(\w^k)^\T\bSigma\w^k}. $$
kappa = 1.0
# Using CVX to get the optimal value
Sigma_12 = cholesky(Sigma)
w = cp.Variable(N)
prob = cp.Problem(cp.Maximize(mu.T @ w - kappa * cp.norm(Sigma_12 @ w, 2)),
constraints=[cp.sum(w) == 1, w >= 0])
result = prob.solve()
w_cvx = w.value
obj_cvx = mu.T @ w_cvx - kappa * np.sqrt(w_cvx.T @ Sigma @ w_cvx)
# Using the SQP-MVP algorithm
w = np.ones(N) / N
obj_sqp = [w.T @ mu - np.sqrt(w.T @ Sigma @ w)]
for k in range(21):
lmd_k = kappa / np.sqrt(w.T @ Sigma @ w)
w_prev = w
w = cp.Variable(N)
prob = cp.Problem(cp.Maximize(mu.T @ w - (lmd_k/2) * cp.quad_form(w, Sigma)),
constraints=[cp.sum(w) == 1, w >= 0])
prob.solve()
w = w.value
obj_sqp.append(w.T @ mu - kappa * np.sqrt(w.T @ Sigma @ w))
if np.max(np.abs(w - w_prev)) < 1e-4:
break
# Plot
df = pd.DataFrame({
"k": range(len(obj_sqp)),
"SQP iterative method": obj_sqp
})
plt.figure(figsize=(10, 6))
plt.axhline(y=obj_cvx, color='r', linestyle='-', linewidth=1.5)
plt.plot(df["k"], df["SQP iterative method"], color='b', linewidth=1.5, marker='o', markersize=2.5)
plt.title("Convergence")
plt.ylabel("objective value")
plt.xlabel("iteration")
plt.legend(["optimal value", "SQP iterative method"])
plt.grid(True)
plt.show()