Python Code for Portfolio Optimization Chapter 8 – Portfolio Backtesting
Daniel P. Palomar (2025). Portfolio Optimization: Theory and Application. Cambridge University Press.
Last update: March 18, 2026
Contributors:
- Daniel Palomar
- Claude Opus 4.6 (Anthropic, via Perplexity)
Libraries¶
The following libraries are used in the examples:
# Core numerical computing
import numpy as np
import pandas as pd
from collections.abc import Callable
# For financial data
import yfinance as yf # Loading financial data
import empyrical as emp # Performance metrics
# Book data (pip install "git+https://github.com/dppalomar/pob.git#subdirectory=python")
from pob_python import SP500_stocks_2015to2020, SP500_index_2015to2020
# Plotting
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
sns.set_theme(style="darkgrid")
import plotly.io as pio
pio.renderers.default = "notebook"
pio.templates.default = "plotly_white"
# Optimization
import cvxpy as cp # interface for convex optimization solvers
$\def\bm#1{\boldsymbol{#1}}$ $\def\textm#1{\textsf{#1}}$ $\def\T{{\mkern-2mu\raise-1mu\mathsf{T}}}$ $\def\R{\mathbb{R}}$ $\def\E{\rm I\kern-.2em E}$ $\def\w{\bm{w}}$ $\def\bmu{\bm{\mu}}$ $\def\bSigma{\bm{\Sigma}}$
Market data and portfolios¶
For illustrative purposes, we simply choose a few stocks from the S&P 500 index during 2015-2020.
stock_prices = SP500_stocks_2015to2020[
["AAPL", "AMZN", "AMD", "GM", "GOOGL", "MGM", "MSFT", "QCOM", "TSCO", "UPS"]
].loc["2018":]
# Display first and last rows
print("First few rows of stock prices:")
display(stock_prices.head())
print("\nLast few rows of stock prices:")
display(stock_prices.tail())
# Plot log-prices
plt.figure(figsize=(12, 6))
log_prices = np.log(stock_prices)
for col in log_prices.columns:
plt.plot(log_prices.index, log_prices[col], linewidth=1, label=col)
plt.title('Log-prices')
plt.legend(title='stocks')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
First few rows of stock prices:
| AAPL | AMZN | AMD | GM | GOOGL | MGM | MSFT | QCOM | TSCO | UPS | |
|---|---|---|---|---|---|---|---|---|---|---|
| Date | ||||||||||
| 2018-01-02 | 41.514 | 1189.01 | 10.98 | 38.0722 | 1073.21 | 32.1285 | 82.5993 | 59.2277 | 72.8300 | 112.662 |
| 2018-01-03 | 41.506 | 1204.20 | 11.55 | 39.0012 | 1091.52 | 31.9559 | 82.9837 | 59.8999 | 73.3785 | 115.158 |
| 2018-01-04 | 41.699 | 1209.59 | 12.12 | 40.2035 | 1095.76 | 32.2723 | 83.7141 | 59.9817 | 74.6969 | 115.906 |
| 2018-01-05 | 42.174 | 1229.14 | 11.88 | 40.0851 | 1110.29 | 32.4928 | 84.7520 | 60.3814 | 76.4484 | 116.261 |
| 2018-01-08 | 42.017 | 1246.87 | 12.28 | 40.2764 | 1114.21 | 31.7354 | 84.8385 | 60.1997 | 76.2655 | 117.673 |
Last few rows of stock prices:
| AAPL | AMZN | AMD | GM | GOOGL | MGM | MSFT | QCOM | TSCO | UPS | |
|---|---|---|---|---|---|---|---|---|---|---|
| Date | ||||||||||
| 2020-09-16 | 112.13 | 3078.10 | 76.66 | 31.79 | 1512.09 | 23.01 | 205.05 | 114.56 | 138.250 | 159.87 |
| 2020-09-17 | 110.34 | 3008.73 | 76.55 | 31.92 | 1487.04 | 22.52 | 202.91 | 114.88 | 138.150 | 159.75 |
| 2020-09-18 | 106.84 | 2954.91 | 74.93 | 31.50 | 1451.09 | 22.02 | 200.39 | 110.69 | 138.110 | 159.66 |
| 2020-09-21 | 110.08 | 2960.47 | 77.94 | 30.00 | 1430.14 | 21.09 | 202.54 | 111.92 | 137.555 | 161.06 |
| 2020-09-22 | 111.81 | 3128.99 | 77.70 | 29.44 | 1459.82 | 21.62 | 207.42 | 113.82 | 141.610 | 161.89 |
Also for illustrative purposes, we choose the following three simple portfolios:
- $1/N$ portfolio: $$ \w = \frac{1}{N}\bm{1}; $$
- Global minimum variance portfolio (GMVP): $$ \begin{array}{ll} \underset{\w}{\textm{minimize}} & \w^\T\bSigma\w\\ \textm{subject to} & \bm{1}^\T\w=1, \quad \w\ge\bm{0}; \end{array} $$
- Inverse volatility portfolio (IVP): $$ \w = \frac{\bm{\sigma}^{-1}}{\bm{1}^\T\bm{\sigma}^{-1}}. $$
# Define portfolio functions
def one_over_N(X: pd.DataFrame) -> np.ndarray:
"""Equal weight portfolio (1/N)"""
N = X.shape[1]
return np.repeat(1/N, N)
def design_GMVP(Sigma: np.ndarray) -> np.ndarray:
"""Design Global Minimum Variance Portfolio"""
N = Sigma.shape[0]
w = cp.Variable(N)
prob = cp.Problem(cp.Minimize(cp.quad_form(w, Sigma)),
[cp.sum(w) == 1, w >= 0])
prob.solve()
return w.value
def GMVP(X: pd.DataFrame) -> np.ndarray:
"""Global Minimum Variance Portfolio from linear returns"""
X_log = np.log(1 + X) # Convert to log returns for covariance estimation
Sigma = X_log.cov().values
return design_GMVP(Sigma)
def design_IVP(sigma: np.ndarray) -> np.ndarray:
"""Design Inverse Volatility Portfolio"""
w = 1 / sigma
return w / np.sum(w)
def IVP(X: pd.DataFrame) -> np.ndarray:
"""Inverse Volatility Portfolio from linear returns"""
X_log = np.log(1 + X)
sigma = X_log.std()
return design_IVP(sigma.values)
Helper functions¶
The following helper functions are used throughout the notebook for backtesting, performance measurement, and visualization.
Base-Python helpers¶
def backtest(
portfolio_funcs: dict[str, callable],
prices: pd.DataFrame,
lookback: int,
rebalance_every: int = 1,
optimize_every: int = 1,
cost_bps: float = 0,
verbose: bool = True
) -> tuple[pd.DataFrame, dict[str, pd.DataFrame]]:
"""
Backtest portfolio strategies over historical data.
At each time step from `lookback` to T, the function:
1. Re-optimizes target weights every `optimize_every` periods.
2. Rebalances to target weights every `rebalance_every` periods,
computing turnover-based transaction costs.
3. Computes the portfolio return and drifts the weights.
Parameters
----------
portfolio_funcs : dict
Strategy names -> functions. Each function receives a DataFrame
of linear returns and returns a 1-D weight array.
prices : pd.DataFrame
T x N asset price DataFrame.
lookback : int
Number of past return periods used for estimation.
rebalance_every : int
Periods between rebalancing to target weights.
optimize_every : int
Periods between re-optimizing target weights.
Must be a multiple of `rebalance_every`.
cost_bps : float
One-way transaction cost in basis points applied to turnover.
verbose : bool
If True, print progress messages.
Returns
-------
portf_rets_df : pd.DataFrame
Portfolio returns (one column per strategy).
ws : dict
Strategy names -> DataFrames of weights over time.
"""
if optimize_every % rebalance_every != 0:
raise ValueError(
f"optimize_every ({optimize_every}) must be a multiple of "
f"rebalance_every ({rebalance_every})."
)
N = prices.shape[1]
X = prices.pct_change().dropna()
portf_rets = {}
ws = {}
for portfolio_name, portfolio_func in portfolio_funcs.items():
if verbose:
print(f"Backtesting portfolio '{portfolio_name}'...")
current_w = np.zeros(N)
target_w = np.zeros(N)
w = pd.DataFrame(index=X.index, columns=X.columns)
portf_ret = pd.Series(index=X.index, dtype=float)
portf_ret.name = portfolio_name
for t in range(lookback, len(X)):
if (t - lookback) % rebalance_every == 0:
# Re-optimize target weights if necessary
if (t - lookback) % optimize_every == 0:
target_w = portfolio_func(X.iloc[t - lookback:t])
# Rebalance to target weights
turnover = np.abs(target_w - current_w).sum()
current_w = target_w.copy()
transaction_cost = turnover * cost_bps / 1e4
else:
transaction_cost = 0.0
# Store weights
w.iloc[t] = current_w
# Period return
period_return = (X.iloc[t] * current_w).sum()
portf_ret.iloc[t] = period_return - transaction_cost
# Drift weights based on asset performance
current_w = current_w * (1 + X.iloc[t])
current_w = current_w / current_w.sum()
portf_rets[portfolio_name] = portf_ret[lookback:]
ws[portfolio_name] = w[lookback:]
portf_rets_df = pd.DataFrame(portf_rets)
return portf_rets_df, ws
def calculate_performance_metrics(returns: pd.DataFrame) -> dict[str, float]:
"""Calculate annualized return, volatility, Sharpe ratio, and max drawdown."""
risk_free_rate = 0.0
trading_days = 252
annualized_return = (1 + returns.mean())**trading_days - 1
annualized_volatility = returns.std() * np.sqrt(trading_days)
sharpe_ratio = (returns.mean() - risk_free_rate) / returns.std() * np.sqrt(trading_days)
def _max_drawdown(r):
cum = (1 + r).cumprod()
return ((cum - cum.cummax()) / cum.cummax()).min()
max_drawdown = returns.apply(_max_drawdown)
return {
'Annualized Return': annualized_return,
'Annualized Volatility': annualized_volatility,
'Sharpe Ratio': sharpe_ratio,
'Maximum Drawdown': max_drawdown
}
def print_table_performance_metrics(returns: pd.DataFrame):
"""Display a styled performance metrics table."""
perf = pd.DataFrame(calculate_performance_metrics(returns))
display(perf.style.format("{:.2f}"))
def plot_cum_return(returns: pd.DataFrame) -> None:
"""Plot cumulative returns."""
cumulative_returns = (1 + returns).cumprod() - 1
fig, ax = plt.subplots(figsize=(12, 6))
cumulative_returns.plot(ax=ax)
ax.set_title('Cumulative Return')
ax.set_xlabel(None)
ax.legend(title="portfolio")
plt.show()
def plot_drawdown(returns: pd.DataFrame) -> None:
"""Plot drawdowns."""
cumulative_returns = (1 + returns).cumprod()
running_max = cumulative_returns.cummax()
drawdowns = (cumulative_returns - running_max) / running_max
fig, ax = plt.subplots(figsize=(12, 6))
drawdowns.plot(ax=ax)
ax.set_title('Drawdown')
ax.set_xlabel(None)
ax.legend(title="portfolio")
plt.show()
def plot_weights_over_time(weights: pd.DataFrame, portfolio_name: str):
"""Plot the evolution of portfolio weights over time as a stacked area chart."""
weights = weights.copy()
weights.index = pd.to_datetime(weights.index)
weights = weights.apply(pd.to_numeric, errors='coerce').fillna(0)
fig, ax = plt.subplots(figsize=(14, 6))
dates = weights.index
bottom = np.zeros(len(dates))
colors = plt.colormaps['tab20'](np.linspace(0, 1, len(weights.columns)))
for i, asset in enumerate(weights.columns):
ax.fill_between(dates, bottom, bottom + weights[asset].values,
label=asset, color=colors[i])
bottom += weights[asset].values
ax.set_title(f"Weight allocation over time for portfolio {portfolio_name}")
ax.set_ylabel("weight")
ax.set_ylim(0, 1.0)
ax.legend(title="stocks", bbox_to_anchor=(1.05, 1), loc='upper left')
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=2))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
ax.xaxis.set_minor_locator(mdates.WeekdayLocator())
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
skfolio helpers¶
The following helper class and functions are used for backtesting via skfolio.
from skfolio.optimization import ConvexOptimization, EqualWeighted, MeanRisk, ObjectiveFunction
from skfolio import Population, RiskMeasure
from skfolio.model_selection import cross_val_predict, WalkForward
from skfolio.preprocessing import prices_to_returns
import plotly.graph_objects as go
import time
from datetime import timedelta
class Portfolio_via_CVXPY(ConvexOptimization):
"""Wraps a custom portfolio function into a skfolio-compatible estimator."""
def __init__(self, cvxpy_fun=None, **kwargs):
super().__init__(**kwargs)
self.cvxpy_fun = cvxpy_fun
def fit(self, X, y=None):
if self.cvxpy_fun is None:
raise ValueError("cvxpy_fun must be provided at initialization")
self.weights_ = np.asarray(self.cvxpy_fun(X))
return self
def batch_cross_val_predict(optimizers, X, cv, **kwargs) -> list:
"""Run cross_val_predict on multiple optimizers with timing information."""
bt_list = []
n_splits = cv.get_n_splits(X)
total_start = time.time()
print(f"\nStarting batch backtesting with {len(optimizers)} portfolios...")
print(f"Cross-validation: {n_splits} splits")
print("=" * 60)
for i, optim in enumerate(optimizers, 1):
name = (optim.portfolio_params or {}).get("name", f"Portfolio {i}")
print(f"\n[{i}/{len(optimizers)}] Backtesting '{name}'...")
start = time.time()
bt = cross_val_predict(
optim, X, cv=cv,
portfolio_params=getattr(optim, "portfolio_params", {}),
**kwargs
)
elapsed = str(timedelta(seconds=int(time.time() - start)))
bt_list.append(bt)
print(f" Completed '{name}' in {elapsed}")
total = str(timedelta(seconds=int(time.time() - total_start)))
print("=" * 60)
print(f"Batch backtesting completed! Total time: {total}")
return bt_list
def plot_composition_sidebyside(population, asset_names=None):
"""Plot portfolio compositions side-by-side as a grouped bar chart."""
if asset_names is None:
asset_names = population[0].composition.index.tolist()
fig = go.Figure()
for portfolio in population:
df = portfolio.composition
weights = df.iloc[:, 0].reindex(asset_names, fill_value=0)
fig.add_trace(go.Bar(x=asset_names, y=weights, name=portfolio.name))
fig.update_layout(
title="Portfolio Compositions",
xaxis_title="Assets", yaxis_title="Weight",
barmode="group",
xaxis={"categoryorder": "array", "categoryarray": asset_names}
)
return fig
Vanilla backtesting¶
Vanilla backtesting refers to simply dividing the available data into training data (for the portfolio design) and test data (for the portfolio assessment).
Via base Python¶
Daily rebalancing ignoring transaction costs¶
If we assume daily rebalancing (with daily data) and ignore transaction costs, the backtest is straightforward: simply multiply the matrix of assets' linear returns by the portfolio vector.
# Compute returns
X_lin = stock_prices.pct_change().dropna()
X_log = np.log(stock_prices).diff().dropna()
# or equivalently: X_log = np.log(1 + X_lin)
# Split data into training and test (50/50)
T, N = X_lin.shape
T_trn = round(0.50 * T)
X_lin_trn = X_lin.iloc[:T_trn]
X_lin_tst = X_lin.iloc[T_trn:]
X_log_trn = X_log.iloc[:T_trn]
X_log_tst = X_log.iloc[T_trn:]
# Estimate mu and Sigma with training data
mu = X_log_trn.mean().values
Sigma = X_log_trn.cov().values
# Design portfolios
w_one_over_N = np.repeat(1/N, N)
w_GMVP = design_GMVP(Sigma)
w_IVP = design_IVP(np.sqrt(np.diag(Sigma)))
# Backtest portfolios with test data (assuming daily rebalancing and no transaction cost)
ret_one_over_N = X_lin_tst @ w_one_over_N
ret_GMVP = X_lin_tst @ w_GMVP
ret_IVP = X_lin_tst @ w_IVP
# Compute cumulative returns
wealth_one_over_N = (1 + ret_one_over_N).cumprod() - 1
wealth_GMVP = (1 + ret_GMVP).cumprod() - 1
wealth_IVP = (1 + ret_IVP).cumprod() - 1
# Plot cumulative returns
plt.figure(figsize=(12, 6))
wealth_one_over_N.plot(label='1/N', linewidth=1)
wealth_GMVP.plot(label='GMVP', linewidth=1)
wealth_IVP.plot(label='IVP', linewidth=1)
plt.title('Cumulative returns')
plt.legend(title='Portfolios')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
We can now conveniently reproduce the same backtest using the helper function backtest() defined above. With rebalance_every=1 and optimize_every=10000, the portfolio is optimized once at the start and then rebalanced to those fixed target weights every day:
# Backtest with daily rebalancing and no transaction costs via helper function
bt_ret_daily, bt_w_daily = backtest(
{
"1/N": one_over_N,
"GMVP": GMVP,
"IVP": IVP
},
stock_prices,
lookback=T_trn,
rebalance_every=1,
optimize_every=10000, # optimize only once at the beginning
cost_bps=0
)
# Sanity check - should match the manual calculation above
print("Sanity check (results match manual calculation):")
print(f" 1/N: {np.allclose(ret_one_over_N.values, bt_ret_daily['1/N'].values, rtol=1e-5)}")
print(f" GMVP: {np.allclose(ret_GMVP.values, bt_ret_daily['GMVP'].values, rtol=1e-5)}")
print(f" IVP: {np.allclose(ret_IVP.values, bt_ret_daily['IVP'].values, rtol=1e-5)}")
Backtesting portfolio '1/N'... Backtesting portfolio 'GMVP'... Backtesting portfolio 'IVP'... Sanity check (results match manual calculation): 1/N: True GMVP: True IVP: True
# Cumulative returns
plot_cum_return(bt_ret_daily)
# Performance metrics
print("Performance metrics (daily rebalancing, no transaction costs):")
print_table_performance_metrics(bt_ret_daily)
# Drawdowns
plot_drawdown(bt_ret_daily)
Performance metrics (daily rebalancing, no transaction costs):
| Annualized Return | Annualized Volatility | Sharpe Ratio | Maximum Drawdown | |
|---|---|---|---|---|
| 1/N | 0.51 | 0.35 | 1.20 | -0.35 |
| GMVP | 0.39 | 0.31 | 1.06 | -0.30 |
| IVP | 0.48 | 0.33 | 1.17 | -0.33 |
Realistic rebalancing including transaction costs¶
We can now easily include transaction costs (say, 60 bps) and reduce the rebalancing frequency to every 5 days (weekly) instead of daily:
# Rebalance every 5 days with 60 bps transaction costs
bt_ret_realistic, bt_w_realistic = backtest(
{
"1/N": one_over_N,
"GMVP": GMVP,
"IVP": IVP
},
stock_prices,
lookback=T_trn,
rebalance_every=5,
optimize_every=10000, # optimize only once at the beginning
cost_bps=60
)
# Performance metrics
print("Performance metrics (weekly rebalancing, 60 bps transaction costs):")
print_table_performance_metrics(bt_ret_realistic)
# Cumulative returns and drawdowns
plot_cum_return(bt_ret_realistic)
plot_drawdown(bt_ret_realistic)
Backtesting portfolio '1/N'... Backtesting portfolio 'GMVP'... Backtesting portfolio 'IVP'... Performance metrics (weekly rebalancing, 60 bps transaction costs):
| Annualized Return | Annualized Volatility | Sharpe Ratio | Maximum Drawdown | |
|---|---|---|---|---|
| 1/N | 0.49 | 0.34 | 1.15 | -0.34 |
| GMVP | 0.37 | 0.31 | 1.01 | -0.30 |
| IVP | 0.45 | 0.33 | 1.12 | -0.33 |
Via skfolio¶
We reproduce the vanilla backtest using skfolio. First, a naive daily-rebalanced backtest as a sanity check:
X_skfolio = prices_to_returns(stock_prices)
model_ewp = EqualWeighted()
model_ewp.fit(X_skfolio)
bt_ewp_naive = model_ewp.predict(X_skfolio)
print("Weights:", np.round(model_ewp.weights_, 4))
bt_ewp_naive.returns_df.head()
Weights: [0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1]
2018-01-03 0.014627 2018-01-04 0.013772 2018-01-05 0.007047 2018-01-08 0.003713 2018-01-09 -0.003522 Name: returns, dtype: float64
bt_ewp_naive.plot_cumulative_returns()
Now we perform a vanilla backtest with weekly rebalancing (every 5 days) and 60 bps transaction costs for the three portfolios:
# Wrap our portfolio functions for skfolio
portf_EWP = Portfolio_via_CVXPY(
cvxpy_fun=one_over_N,
portfolio_params=dict(name="1/N")
)
portf_GMVP = Portfolio_via_CVXPY(
cvxpy_fun=GMVP,
portfolio_params=dict(name="GMVP")
)
portf_IVP = Portfolio_via_CVXPY(
cvxpy_fun=IVP,
portfolio_params=dict(name="IVP")
)
all_formulations = [portf_EWP, portf_GMVP, portf_IVP]
# Vanilla backtest: rebalance every 5 days, optimize once (fixed train_size)
bt_list_vanilla = batch_cross_val_predict(
all_formulations,
X=X_skfolio,
cv=WalkForward(test_size=5, train_size=T_trn),
)
bt_pop_vanilla = Population(bt_list_vanilla)
Starting batch backtesting with 3 portfolios... Cross-validation: 68 splits ============================================================ [1/3] Backtesting '1/N'... Completed '1/N' in 0:00:00 [2/3] Backtesting 'GMVP'... Completed 'GMVP' in 0:00:00 [3/3] Backtesting 'IVP'... Completed 'IVP' in 0:00:00 ============================================================ Batch backtesting completed! Total time: 0:00:00
pd.concat([p.returns_df for p in bt_pop_vanilla], axis=1).head(8)
| returns | returns | returns | |
|---|---|---|---|
| 2019-05-15 | 0.011418 | 0.009658 | 0.011886 |
| 2019-05-16 | 0.005786 | 0.002054 | 0.005829 |
| 2019-05-17 | -0.012718 | -0.009141 | -0.011855 |
| 2019-05-20 | -0.018609 | -0.010979 | -0.017017 |
| 2019-05-21 | 0.012484 | 0.012503 | 0.011755 |
| 2019-05-22 | -0.018468 | -0.023969 | -0.018084 |
| 2019-05-23 | -0.016648 | -0.011901 | -0.015110 |
| 2019-05-24 | -0.003409 | -0.005301 | -0.003556 |
bt_pop_vanilla.summary().loc[[
'Annualized Sharpe Ratio',
'CVaR at 95%',
'MAX Drawdown',
'Annualized Mean',
'Annualized Standard Deviation'
]].T
| Annualized Sharpe Ratio | CVaR at 95% | MAX Drawdown | Annualized Mean | Annualized Standard Deviation | |
|---|---|---|---|---|---|
| 1/N | 1.21 | 5.55% | 39.22% | 42.03% | 34.61% |
| GMVP | 1.19 | 4.49% | 29.85% | 34.74% | 29.14% |
| IVP | 1.16 | 5.32% | 37.10% | 38.26% | 32.91% |
bt_pop_vanilla.set_portfolio_params(compounded=True)
fig = bt_pop_vanilla.plot_cumulative_returns()
fig.update_layout(title="Wealth (vanilla backtest via skfolio)", xaxis_title=None, yaxis_title="Net Asset Value (NAV)")
fig
bt_pop_vanilla.plot_drawdowns()
Walk-forward backtesting¶
Rather than keeping the portfolio fixed during all the test data, we can perform a walk-forward backtest by re-optimizing the portfolio every month (approximately 20 days) on a rolling-window basis.
Via base Python¶
# Re-optimize every 20 days, rebalance every 20 days, with 60 bps transaction costs
bt_ret_wf, bt_w_wf = backtest(
{
"1/N": one_over_N,
"GMVP": GMVP,
"IVP": IVP
},
stock_prices,
lookback=T_trn,
rebalance_every=20,
optimize_every=20,
cost_bps=60
)
Backtesting portfolio '1/N'... Backtesting portfolio 'GMVP'... Backtesting portfolio 'IVP'...
# Performance metrics
print("Performance metrics (walk-forward, monthly reoptimization, 60 bps):")
print_table_performance_metrics(bt_ret_wf)
# Cumulative returns and drawdowns
plot_cum_return(bt_ret_wf)
plot_drawdown(bt_ret_wf)
Performance metrics (walk-forward, monthly reoptimization, 60 bps):
| Annualized Return | Annualized Volatility | Sharpe Ratio | Maximum Drawdown | |
|---|---|---|---|---|
| 1/N | 0.48 | 0.34 | 1.16 | -0.33 |
| GMVP | 0.37 | 0.30 | 1.06 | -0.29 |
| IVP | 0.44 | 0.32 | 1.12 | -0.32 |
Since the portfolios are re-optimized over time, we can plot the evolution of the weight allocations:
plot_weights_over_time(bt_w_wf["1/N"], portfolio_name="1/N")
plot_weights_over_time(bt_w_wf["GMVP"], portfolio_name="GMVP")
plot_weights_over_time(bt_w_wf["IVP"], portfolio_name="IVP")
Via skfolio¶
We reproduce the walk-forward backtest using skfolio. With test_size=20, the portfolio is reoptimized and rebalanced every 20 days:
bt_list_wf = batch_cross_val_predict(
all_formulations,
X=X_skfolio,
cv=WalkForward(test_size=20, train_size=T_trn, purged_size=1),
)
bt_pop_wf = Population(bt_list_wf)
Starting batch backtesting with 3 portfolios... Cross-validation: 17 splits ============================================================ [1/3] Backtesting '1/N'... Completed '1/N' in 0:00:00 [2/3] Backtesting 'GMVP'... Completed 'GMVP' in 0:00:00 [3/3] Backtesting 'IVP'... Completed 'IVP' in 0:00:00 ============================================================ Batch backtesting completed! Total time: 0:00:00
pd.concat([p.returns_df for p in bt_pop_wf], axis=1).head(8)
| returns | returns | returns | |
|---|---|---|---|
| 2019-05-16 | 0.005786 | 0.002054 | 0.005829 |
| 2019-05-17 | -0.012718 | -0.009141 | -0.011855 |
| 2019-05-20 | -0.018609 | -0.010979 | -0.017017 |
| 2019-05-21 | 0.012484 | 0.012503 | 0.011755 |
| 2019-05-22 | -0.018468 | -0.024107 | -0.018170 |
| 2019-05-23 | -0.016648 | -0.011986 | -0.015101 |
| 2019-05-24 | -0.003409 | -0.005392 | -0.003596 |
| 2019-05-28 | 0.004439 | -0.010546 | -0.000983 |
bt_pop_wf.summary().loc[[
'Annualized Sharpe Ratio',
'CVaR at 95%',
'MAX Drawdown',
'Annualized Mean',
'Annualized Standard Deviation'
]].T
| Annualized Sharpe Ratio | CVaR at 95% | MAX Drawdown | Annualized Mean | Annualized Standard Deviation | |
|---|---|---|---|---|---|
| 1/N | 1.15 | 5.55% | 39.22% | 39.84% | 34.64% |
| GMVP | 1.03 | 4.60% | 31.80% | 31.06% | 30.10% |
| IVP | 1.08 | 5.38% | 38.11% | 36.11% | 33.33% |
last_portfolios = Population([
setattr(mpp[-1], 'name', mpp.name) or mpp[-1]
for mpp in bt_pop_wf
])
plot_composition_sidebyside(last_portfolios, asset_names=stock_prices.columns)
bt_pop_wf.set_portfolio_params(compounded=True)
fig = bt_pop_wf.plot_cumulative_returns()
fig.update_layout(title="Wealth (walk-forward via skfolio)", xaxis_title=None, yaxis_title="Net Asset Value (NAV)")
fig
for mpp in bt_pop_wf:
print(f"\nWeights for: {mpp.name}")
display(mpp.plot_weights_per_observation())
Weights for: 1/N
Weights for: GMVP
Weights for: IVP
bt_pop_wf.plot_drawdowns()
Multiple randomized backtesting¶
Rather than running a single backtest, we can introduce randomness in the data and perform multiple backtests.
Via base Python¶
First, we resample 200 times the original data of $N=10$ stocks over 2017-2020. Each time we randomly select $N=8$ stocks and a random contiguous period of two years.
def financial_data_resample(
prices: pd.DataFrame,
num_datasets: int = 200,
N_sample: int = 8,
T_sample: int = 252 * 2
) -> list[pd.DataFrame]:
"""
Resample financial data by randomly selecting stocks and time windows.
Parameters
----------
prices : pd.DataFrame
Full price DataFrame.
num_datasets : int
Number of resampled datasets.
N_sample : int
Number of stocks per dataset.
T_sample : int
Length of each time window.
Returns
-------
list of pd.DataFrame
"""
resampled = []
for _ in range(num_datasets):
selected_stocks = np.random.choice(prices.columns, size=N_sample, replace=False)
max_start = max(len(prices) - T_sample, 1)
start_idx = np.random.randint(0, max_start)
subset = prices.iloc[start_idx:start_idx + T_sample][selected_stocks]
resampled.append(subset)
return resampled
stock_prices_2017 = SP500_stocks_2015to2020.loc["2017":]
stock_prices_resampled = financial_data_resample(
stock_prices_2017,
num_datasets=200,
N_sample=8,
T_sample=252 * 2
)
# Quick sanity check
print(f"Number of resampled datasets: {len(stock_prices_resampled)}")
print(f"Shape of first dataset: {stock_prices_resampled[0].shape}")
display(stock_prices_resampled[0].head())
Number of resampled datasets: 200 Shape of first dataset: (504, 8)
| AON | DTE | TXT | ETN | NEM | BF.B | HD | GL | |
|---|---|---|---|---|---|---|---|---|
| Date | ||||||||
| 2018-01-17 | 132.8703 | 94.7915 | 58.8927 | 76.0889 | 36.8911 | 51.8825 | 186.852 | 90.1474 |
| 2018-01-18 | 131.4880 | 94.2269 | 58.5344 | 76.0525 | 36.6947 | 51.4490 | 185.459 | 89.7850 |
| 2018-01-19 | 131.5854 | 93.9355 | 58.6240 | 76.4075 | 36.9098 | 51.3349 | 188.264 | 90.2650 |
| 2018-01-22 | 132.3641 | 94.1267 | 58.8131 | 76.7806 | 37.0874 | 51.4414 | 191.191 | 90.8821 |
| 2018-01-23 | 133.7658 | 95.3197 | 58.8230 | 77.4724 | 37.7046 | 52.1867 | 191.602 | 90.8723 |
Then, we perform the multiple backtests (this will take some time):
# Function to run multiple backtests
def run_multiple_backtests(
portfolio_funcs: dict[str, callable],
price_datasets: list[pd.DataFrame],
lookback: int = 252,
optimize_every: int = 20,
rebalance_every: int = 5,
cost_bps: float = 60
) -> dict[str, list[pd.DataFrame]]:
"""
Run backtests on multiple resampled datasets.
Returns
-------
dict
Portfolio names -> list of return Series (one per dataset).
"""
results = {name: [] for name in portfolio_funcs}
for i, prices in enumerate(price_datasets):
if i % 50 == 0:
print(f"Processing dataset {i + 1}/{len(price_datasets)}...")
bt_ret, _ = backtest(
portfolio_funcs, prices,
lookback=lookback,
optimize_every=optimize_every,
rebalance_every=rebalance_every,
cost_bps=cost_bps,
verbose=False
)
for name in portfolio_funcs:
results[name].append(bt_ret[name])
return results
multiple_bt_results = run_multiple_backtests(
{
"1/N": one_over_N,
"GMVP": GMVP,
"IVP": IVP
},
stock_prices_resampled,
lookback=252,
optimize_every=20,
rebalance_every=5,
cost_bps=60
)
Processing dataset 1/200... Processing dataset 51/200... Processing dataset 101/200... Processing dataset 151/200...
Now we can show the averaged results across all 200 backtests in a table:
def summarize_multiple_backtests(results: dict[str, list[pd.Series]]) -> pd.DataFrame:
"""Compute mean performance metrics across multiple backtests."""
summary = {}
for portfolio_name, returns_list in results.items():
all_metrics = [
calculate_performance_metrics(pd.DataFrame(r))
for r in returns_list
]
mean_metrics = {
metric: np.mean([m[metric] for m in all_metrics])
for metric in all_metrics[0]
}
summary[portfolio_name] = mean_metrics
return pd.DataFrame(summary).T
summary_table = summarize_multiple_backtests(multiple_bt_results)
display(summary_table.style.format("{:.4f}"))
| Annualized Return | Annualized Volatility | Sharpe Ratio | Maximum Drawdown | |
|---|---|---|---|---|
| 1/N | 0.0960 | 0.2352 | 0.4654 | -0.2467 |
| GMVP | 0.1025 | 0.2046 | 0.6192 | -0.2134 |
| IVP | 0.0968 | 0.2234 | 0.5098 | -0.2366 |
And we can also show the results in the form of bar plots:
# Create bar plots for selected metrics
plt.figure(figsize=(12, 6))
summary_table[["Maximum Drawdown", "Annualized Volatility"]].abs().plot(kind='bar')
plt.title("Risk Metrics Comparison")
plt.ylabel("Value")
plt.xticks(rotation=0)
plt.tight_layout()
plt.show()
<Figure size 1200x600 with 0 Axes>
Via skfolio¶
The library skfolio provides MultipleRandomizedCV, which is based on the "Multiple Randomized Backtests" methodology of Palomar (2025). It repeatedly samples distinct asset subsets and contiguous time windows, then applies a walk-forward split to each subsample.
from skfolio.model_selection import MultipleRandomizedCV
from skfolio.measures import RatioMeasure
# Inner walk-forward: reoptimize every 20 days
walk_forward = WalkForward(test_size=20, train_size=252)
# Multiple randomized CV: 200 subsamples, 8 assets, 2-year windows
cv_mc = MultipleRandomizedCV(
walk_forward=walk_forward,
n_subsamples=200,
asset_subset_size=8,
window_size=252 * 2,
random_state=42,
)
# Use the broader 2017-2020 data, converted to returns
X_skfolio_2017 = prices_to_returns(SP500_stocks_2015to2020.loc["2017":])
# Run multiple randomized backtests for each portfolio
pred_EWP_mc = cross_val_predict(
Portfolio_via_CVXPY(cvxpy_fun=one_over_N),
X_skfolio_2017, cv=cv_mc,
portfolio_params={"tag": "1/N"},
)
pred_GMVP_mc = cross_val_predict(
Portfolio_via_CVXPY(cvxpy_fun=GMVP),
X_skfolio_2017, cv=cv_mc,
portfolio_params={"tag": "GMVP"},
)
pred_IVP_mc = cross_val_predict(
Portfolio_via_CVXPY(cvxpy_fun=IVP),
X_skfolio_2017, cv=cv_mc,
portfolio_params={"tag": "IVP"},
)
population_mc = pred_EWP_mc + pred_GMVP_mc + pred_IVP_mc
population_mc.plot_distribution(
measure_list=[RatioMeasure.ANNUALIZED_SHARPE_RATIO],
tag_list=["1/N", "GMVP", "IVP"],
)
for pred in [pred_EWP_mc, pred_GMVP_mc, pred_IVP_mc]:
tag = pred[0].tag
mean_sr = pred.measures_mean(measure=RatioMeasure.ANNUALIZED_SHARPE_RATIO)
std_sr = pred.measures_std(measure=RatioMeasure.ANNUALIZED_SHARPE_RATIO)
print(f"{tag}\n{'=' * len(tag)}")
print(f"Average Sharpe Ratio: {mean_sr:0.4f}")
print(f"Sharpe Ratio Std Dev: {std_sr:0.4f}\n")
1/N === Average Sharpe Ratio: 0.5783 Sharpe Ratio Std Dev: 0.7058 GMVP ==== Average Sharpe Ratio: 0.7932 Sharpe Ratio Std Dev: 0.8371 IVP === Average Sharpe Ratio: 0.6433 Sharpe Ratio Std Dev: 0.7393
population_mc.boxplot_measure(
measure=RatioMeasure.CVAR_RATIO,
tag_list=["1/N", "GMVP", "IVP"],
)
fig = pred_GMVP_mc[:10].plot_cumulative_returns(use_tag_in_legend=False)
fig.update_layout(title="GMVP: 10 sample paths from randomized backtesting")
fig
pred_GMVP_mc[:2].plot_composition(display_sub_ptf_name=False)