Python Code for Portfolio Optimization
Chapter 8 – Portfolio Backtesting

Daniel P. Palomar (2025). Portfolio Optimization: Theory and Application. Cambridge University Press.

Last update: March 18, 2026

Contributors:


Libraries

The following libraries are used in the examples:

# Core numerical computing
import numpy as np
import pandas as pd
from collections.abc import Callable

# For financial data
import yfinance as yf       # Loading financial data
import empyrical as emp     # Performance metrics

# Book data (pip install "git+https://github.com/dppalomar/pob.git#subdirectory=python")
from pob_python import SP500_stocks_2015to2020, SP500_index_2015to2020

# Plotting
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
sns.set_theme(style="darkgrid")
import plotly.io as pio
pio.renderers.default = "notebook"
pio.templates.default = "plotly_white"

# Optimization
import cvxpy as cp                  # interface for convex optimization solvers

$\def\bm#1{\boldsymbol{#1}}$ $\def\textm#1{\textsf{#1}}$ $\def\T{{\mkern-2mu\raise-1mu\mathsf{T}}}$ $\def\R{\mathbb{R}}$ $\def\E{\rm I\kern-.2em E}$ $\def\w{\bm{w}}$ $\def\bmu{\bm{\mu}}$ $\def\bSigma{\bm{\Sigma}}$

Market data and portfolios

For illustrative purposes, we simply choose a few stocks from the S&P 500 index during 2015-2020.

stock_prices = SP500_stocks_2015to2020[
    ["AAPL", "AMZN", "AMD", "GM", "GOOGL", "MGM", "MSFT", "QCOM", "TSCO", "UPS"]
].loc["2018":]

# Display first and last rows
print("First few rows of stock prices:")
display(stock_prices.head())
print("\nLast few rows of stock prices:")
display(stock_prices.tail())

# Plot log-prices
plt.figure(figsize=(12, 6))
log_prices = np.log(stock_prices)
for col in log_prices.columns:
    plt.plot(log_prices.index, log_prices[col], linewidth=1, label=col)
plt.title('Log-prices')
plt.legend(title='stocks')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
First few rows of stock prices:
AAPL AMZN AMD GM GOOGL MGM MSFT QCOM TSCO UPS
Date
2018-01-02 41.514 1189.01 10.98 38.0722 1073.21 32.1285 82.5993 59.2277 72.8300 112.662
2018-01-03 41.506 1204.20 11.55 39.0012 1091.52 31.9559 82.9837 59.8999 73.3785 115.158
2018-01-04 41.699 1209.59 12.12 40.2035 1095.76 32.2723 83.7141 59.9817 74.6969 115.906
2018-01-05 42.174 1229.14 11.88 40.0851 1110.29 32.4928 84.7520 60.3814 76.4484 116.261
2018-01-08 42.017 1246.87 12.28 40.2764 1114.21 31.7354 84.8385 60.1997 76.2655 117.673
Last few rows of stock prices:
AAPL AMZN AMD GM GOOGL MGM MSFT QCOM TSCO UPS
Date
2020-09-16 112.13 3078.10 76.66 31.79 1512.09 23.01 205.05 114.56 138.250 159.87
2020-09-17 110.34 3008.73 76.55 31.92 1487.04 22.52 202.91 114.88 138.150 159.75
2020-09-18 106.84 2954.91 74.93 31.50 1451.09 22.02 200.39 110.69 138.110 159.66
2020-09-21 110.08 2960.47 77.94 30.00 1430.14 21.09 202.54 111.92 137.555 161.06
2020-09-22 111.81 3128.99 77.70 29.44 1459.82 21.62 207.42 113.82 141.610 161.89

Also for illustrative purposes, we choose the following three simple portfolios:

  • $1/N$ portfolio: $$ \w = \frac{1}{N}\bm{1}; $$
  • Global minimum variance portfolio (GMVP): $$ \begin{array}{ll} \underset{\w}{\textm{minimize}} & \w^\T\bSigma\w\\ \textm{subject to} & \bm{1}^\T\w=1, \quad \w\ge\bm{0}; \end{array} $$
  • Inverse volatility portfolio (IVP): $$ \w = \frac{\bm{\sigma}^{-1}}{\bm{1}^\T\bm{\sigma}^{-1}}. $$
# Define portfolio functions

def one_over_N(X: pd.DataFrame) -> np.ndarray:
    """Equal weight portfolio (1/N)"""
    N = X.shape[1]
    return np.repeat(1/N, N)

def design_GMVP(Sigma: np.ndarray) -> np.ndarray:
    """Design Global Minimum Variance Portfolio"""
    N = Sigma.shape[0]
    w = cp.Variable(N)
    prob = cp.Problem(cp.Minimize(cp.quad_form(w, Sigma)),
                      [cp.sum(w) == 1, w >= 0])
    prob.solve()
    return w.value

def GMVP(X: pd.DataFrame) -> np.ndarray:
    """Global Minimum Variance Portfolio from linear returns"""
    X_log = np.log(1 + X)  # Convert to log returns for covariance estimation
    Sigma = X_log.cov().values
    return design_GMVP(Sigma)

def design_IVP(sigma: np.ndarray) -> np.ndarray:
    """Design Inverse Volatility Portfolio"""
    w = 1 / sigma
    return w / np.sum(w)

def IVP(X: pd.DataFrame) -> np.ndarray:
    """Inverse Volatility Portfolio from linear returns"""
    X_log = np.log(1 + X)
    sigma = X_log.std()
    return design_IVP(sigma.values)

Helper functions

The following helper functions are used throughout the notebook for backtesting, performance measurement, and visualization.

Base-Python helpers

def backtest(
    portfolio_funcs: dict[str, callable],
    prices: pd.DataFrame,
    lookback: int,
    rebalance_every: int = 1,
    optimize_every: int = 1,
    cost_bps: float = 0,
    verbose: bool = True
) -> tuple[pd.DataFrame, dict[str, pd.DataFrame]]:
    """
    Backtest portfolio strategies over historical data.

    At each time step from `lookback` to T, the function:
      1. Re-optimizes target weights every `optimize_every` periods.
      2. Rebalances to target weights every `rebalance_every` periods,
         computing turnover-based transaction costs.
      3. Computes the portfolio return and drifts the weights.

    Parameters
    ----------
    portfolio_funcs : dict
        Strategy names -> functions. Each function receives a DataFrame
        of linear returns and returns a 1-D weight array.
    prices : pd.DataFrame
        T x N asset price DataFrame.
    lookback : int
        Number of past return periods used for estimation.
    rebalance_every : int
        Periods between rebalancing to target weights.
    optimize_every : int
        Periods between re-optimizing target weights.
        Must be a multiple of `rebalance_every`.
    cost_bps : float
        One-way transaction cost in basis points applied to turnover.
    verbose : bool
        If True, print progress messages.

    Returns
    -------
    portf_rets_df : pd.DataFrame
        Portfolio returns (one column per strategy).
    ws : dict
        Strategy names -> DataFrames of weights over time.
    """
    if optimize_every % rebalance_every != 0:
        raise ValueError(
            f"optimize_every ({optimize_every}) must be a multiple of "
            f"rebalance_every ({rebalance_every})."
        )

    N = prices.shape[1]
    X = prices.pct_change().dropna()

    portf_rets = {}
    ws = {}

    for portfolio_name, portfolio_func in portfolio_funcs.items():
        if verbose:
            print(f"Backtesting portfolio '{portfolio_name}'...")

        current_w = np.zeros(N)
        target_w = np.zeros(N)
        w = pd.DataFrame(index=X.index, columns=X.columns)
        portf_ret = pd.Series(index=X.index, dtype=float)
        portf_ret.name = portfolio_name

        for t in range(lookback, len(X)):
            if (t - lookback) % rebalance_every == 0:
                # Re-optimize target weights if necessary
                if (t - lookback) % optimize_every == 0:
                    target_w = portfolio_func(X.iloc[t - lookback:t])

                # Rebalance to target weights
                turnover = np.abs(target_w - current_w).sum()
                current_w = target_w.copy()
                transaction_cost = turnover * cost_bps / 1e4
            else:
                transaction_cost = 0.0

            # Store weights
            w.iloc[t] = current_w

            # Period return
            period_return = (X.iloc[t] * current_w).sum()
            portf_ret.iloc[t] = period_return - transaction_cost

            # Drift weights based on asset performance
            current_w = current_w * (1 + X.iloc[t])
            current_w = current_w / current_w.sum()

        portf_rets[portfolio_name] = portf_ret[lookback:]
        ws[portfolio_name] = w[lookback:]

    portf_rets_df = pd.DataFrame(portf_rets)
    return portf_rets_df, ws
def calculate_performance_metrics(returns: pd.DataFrame) -> dict[str, float]:
    """Calculate annualized return, volatility, Sharpe ratio, and max drawdown."""
    risk_free_rate = 0.0
    trading_days = 252

    annualized_return = (1 + returns.mean())**trading_days - 1
    annualized_volatility = returns.std() * np.sqrt(trading_days)
    sharpe_ratio = (returns.mean() - risk_free_rate) / returns.std() * np.sqrt(trading_days)

    def _max_drawdown(r):
        cum = (1 + r).cumprod()
        return ((cum - cum.cummax()) / cum.cummax()).min()

    max_drawdown = returns.apply(_max_drawdown)

    return {
        'Annualized Return': annualized_return,
        'Annualized Volatility': annualized_volatility,
        'Sharpe Ratio': sharpe_ratio,
        'Maximum Drawdown': max_drawdown
    }


def print_table_performance_metrics(returns: pd.DataFrame):
    """Display a styled performance metrics table."""
    perf = pd.DataFrame(calculate_performance_metrics(returns))
    display(perf.style.format("{:.2f}"))
def plot_cum_return(returns: pd.DataFrame) -> None:
    """Plot cumulative returns."""
    cumulative_returns = (1 + returns).cumprod() - 1
    fig, ax = plt.subplots(figsize=(12, 6))
    cumulative_returns.plot(ax=ax)
    ax.set_title('Cumulative Return')
    ax.set_xlabel(None)
    ax.legend(title="portfolio")
    plt.show()


def plot_drawdown(returns: pd.DataFrame) -> None:
    """Plot drawdowns."""
    cumulative_returns = (1 + returns).cumprod()
    running_max = cumulative_returns.cummax()
    drawdowns = (cumulative_returns - running_max) / running_max
    fig, ax = plt.subplots(figsize=(12, 6))
    drawdowns.plot(ax=ax)
    ax.set_title('Drawdown')
    ax.set_xlabel(None)
    ax.legend(title="portfolio")
    plt.show()
def plot_weights_over_time(weights: pd.DataFrame, portfolio_name: str):
    """Plot the evolution of portfolio weights over time as a stacked area chart."""
    weights = weights.copy()
    weights.index = pd.to_datetime(weights.index)
    weights = weights.apply(pd.to_numeric, errors='coerce').fillna(0)

    fig, ax = plt.subplots(figsize=(14, 6))
    dates = weights.index
    bottom = np.zeros(len(dates))
    colors = plt.colormaps['tab20'](np.linspace(0, 1, len(weights.columns)))

    for i, asset in enumerate(weights.columns):
        ax.fill_between(dates, bottom, bottom + weights[asset].values,
                        label=asset, color=colors[i])
        bottom += weights[asset].values

    ax.set_title(f"Weight allocation over time for portfolio {portfolio_name}")
    ax.set_ylabel("weight")
    ax.set_ylim(0, 1.0)
    ax.legend(title="stocks", bbox_to_anchor=(1.05, 1), loc='upper left')
    ax.xaxis.set_major_locator(mdates.MonthLocator(interval=2))
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
    ax.xaxis.set_minor_locator(mdates.WeekdayLocator())
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

skfolio helpers

The following helper class and functions are used for backtesting via skfolio.

from skfolio.optimization import ConvexOptimization, EqualWeighted, MeanRisk, ObjectiveFunction
from skfolio import Population, RiskMeasure
from skfolio.model_selection import cross_val_predict, WalkForward
from skfolio.preprocessing import prices_to_returns
import plotly.graph_objects as go
import time
from datetime import timedelta


class Portfolio_via_CVXPY(ConvexOptimization):
    """Wraps a custom portfolio function into a skfolio-compatible estimator."""

    def __init__(self, cvxpy_fun=None, **kwargs):
        super().__init__(**kwargs)
        self.cvxpy_fun = cvxpy_fun

    def fit(self, X, y=None):
        if self.cvxpy_fun is None:
            raise ValueError("cvxpy_fun must be provided at initialization")
        self.weights_ = np.asarray(self.cvxpy_fun(X))
        return self


def batch_cross_val_predict(optimizers, X, cv, **kwargs) -> list:
    """Run cross_val_predict on multiple optimizers with timing information."""
    bt_list = []
    n_splits = cv.get_n_splits(X)
    total_start = time.time()

    print(f"\nStarting batch backtesting with {len(optimizers)} portfolios...")
    print(f"Cross-validation: {n_splits} splits")
    print("=" * 60)

    for i, optim in enumerate(optimizers, 1):
        name = (optim.portfolio_params or {}).get("name", f"Portfolio {i}")
        print(f"\n[{i}/{len(optimizers)}] Backtesting '{name}'...")
        start = time.time()
        bt = cross_val_predict(
            optim, X, cv=cv,
            portfolio_params=getattr(optim, "portfolio_params", {}),
            **kwargs
        )
        elapsed = str(timedelta(seconds=int(time.time() - start)))
        bt_list.append(bt)
        print(f"  Completed '{name}' in {elapsed}")

    total = str(timedelta(seconds=int(time.time() - total_start)))
    print("=" * 60)
    print(f"Batch backtesting completed! Total time: {total}")
    return bt_list


def plot_composition_sidebyside(population, asset_names=None):
    """Plot portfolio compositions side-by-side as a grouped bar chart."""
    if asset_names is None:
        asset_names = population[0].composition.index.tolist()
    fig = go.Figure()
    for portfolio in population:
        df = portfolio.composition
        weights = df.iloc[:, 0].reindex(asset_names, fill_value=0)
        fig.add_trace(go.Bar(x=asset_names, y=weights, name=portfolio.name))
    fig.update_layout(
        title="Portfolio Compositions",
        xaxis_title="Assets", yaxis_title="Weight",
        barmode="group",
        xaxis={"categoryorder": "array", "categoryarray": asset_names}
    )
    return fig

Vanilla backtesting

Vanilla backtesting refers to simply dividing the available data into training data (for the portfolio design) and test data (for the portfolio assessment).

Via base Python

Daily rebalancing ignoring transaction costs

If we assume daily rebalancing (with daily data) and ignore transaction costs, the backtest is straightforward: simply multiply the matrix of assets' linear returns by the portfolio vector.

# Compute returns
X_lin = stock_prices.pct_change().dropna()
X_log = np.log(stock_prices).diff().dropna()
# or equivalently: X_log = np.log(1 + X_lin)

# Split data into training and test (50/50)
T, N = X_lin.shape
T_trn = round(0.50 * T)
X_lin_trn = X_lin.iloc[:T_trn]
X_lin_tst = X_lin.iloc[T_trn:]
X_log_trn = X_log.iloc[:T_trn]
X_log_tst = X_log.iloc[T_trn:]

# Estimate mu and Sigma with training data
mu = X_log_trn.mean().values
Sigma = X_log_trn.cov().values

# Design portfolios
w_one_over_N = np.repeat(1/N, N)
w_GMVP = design_GMVP(Sigma)
w_IVP = design_IVP(np.sqrt(np.diag(Sigma)))
# Backtest portfolios with test data (assuming daily rebalancing and no transaction cost)
ret_one_over_N = X_lin_tst @ w_one_over_N
ret_GMVP = X_lin_tst @ w_GMVP
ret_IVP = X_lin_tst @ w_IVP

# Compute cumulative returns
wealth_one_over_N = (1 + ret_one_over_N).cumprod() - 1
wealth_GMVP = (1 + ret_GMVP).cumprod() - 1
wealth_IVP = (1 + ret_IVP).cumprod() - 1
# Plot cumulative returns
plt.figure(figsize=(12, 6))
wealth_one_over_N.plot(label='1/N', linewidth=1)
wealth_GMVP.plot(label='GMVP', linewidth=1)
wealth_IVP.plot(label='IVP', linewidth=1)
plt.title('Cumulative returns')
plt.legend(title='Portfolios')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

We can now conveniently reproduce the same backtest using the helper function backtest() defined above. With rebalance_every=1 and optimize_every=10000, the portfolio is optimized once at the start and then rebalanced to those fixed target weights every day:

# Backtest with daily rebalancing and no transaction costs via helper function
bt_ret_daily, bt_w_daily = backtest(
    {
        "1/N": one_over_N,
        "GMVP": GMVP,
        "IVP": IVP
    },
    stock_prices,
    lookback=T_trn,
    rebalance_every=1,
    optimize_every=10000,  # optimize only once at the beginning
    cost_bps=0
)

# Sanity check - should match the manual calculation above
print("Sanity check (results match manual calculation):")
print(f"  1/N:  {np.allclose(ret_one_over_N.values, bt_ret_daily['1/N'].values, rtol=1e-5)}")
print(f"  GMVP: {np.allclose(ret_GMVP.values, bt_ret_daily['GMVP'].values, rtol=1e-5)}")
print(f"  IVP:  {np.allclose(ret_IVP.values, bt_ret_daily['IVP'].values, rtol=1e-5)}")
Backtesting portfolio '1/N'...
Backtesting portfolio 'GMVP'...
Backtesting portfolio 'IVP'...
Sanity check (results match manual calculation):
  1/N:  True
  GMVP: True
  IVP:  True
# Cumulative returns
plot_cum_return(bt_ret_daily)

# Performance metrics
print("Performance metrics (daily rebalancing, no transaction costs):")
print_table_performance_metrics(bt_ret_daily)

# Drawdowns
plot_drawdown(bt_ret_daily)
Performance metrics (daily rebalancing, no transaction costs):
  Annualized Return Annualized Volatility Sharpe Ratio Maximum Drawdown
1/N 0.51 0.35 1.20 -0.35
GMVP 0.39 0.31 1.06 -0.30
IVP 0.48 0.33 1.17 -0.33

Realistic rebalancing including transaction costs

We can now easily include transaction costs (say, 60 bps) and reduce the rebalancing frequency to every 5 days (weekly) instead of daily:

# Rebalance every 5 days with 60 bps transaction costs
bt_ret_realistic, bt_w_realistic = backtest(
    {
        "1/N": one_over_N,
        "GMVP": GMVP,
        "IVP": IVP
    },
    stock_prices,
    lookback=T_trn,
    rebalance_every=5,
    optimize_every=10000,  # optimize only once at the beginning
    cost_bps=60
)

# Performance metrics
print("Performance metrics (weekly rebalancing, 60 bps transaction costs):")
print_table_performance_metrics(bt_ret_realistic)

# Cumulative returns and drawdowns
plot_cum_return(bt_ret_realistic)
plot_drawdown(bt_ret_realistic)
Backtesting portfolio '1/N'...
Backtesting portfolio 'GMVP'...
Backtesting portfolio 'IVP'...
Performance metrics (weekly rebalancing, 60 bps transaction costs):
  Annualized Return Annualized Volatility Sharpe Ratio Maximum Drawdown
1/N 0.49 0.34 1.15 -0.34
GMVP 0.37 0.31 1.01 -0.30
IVP 0.45 0.33 1.12 -0.33

Via skfolio

We reproduce the vanilla backtest using skfolio. First, a naive daily-rebalanced backtest as a sanity check:

X_skfolio = prices_to_returns(stock_prices)

model_ewp = EqualWeighted()
model_ewp.fit(X_skfolio)
bt_ewp_naive = model_ewp.predict(X_skfolio)

print("Weights:", np.round(model_ewp.weights_, 4))
bt_ewp_naive.returns_df.head()
Weights: [0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1]
2018-01-03    0.014627
2018-01-04    0.013772
2018-01-05    0.007047
2018-01-08    0.003713
2018-01-09   -0.003522
Name: returns, dtype: float64
bt_ewp_naive.plot_cumulative_returns()

Now we perform a vanilla backtest with weekly rebalancing (every 5 days) and 60 bps transaction costs for the three portfolios:

# Wrap our portfolio functions for skfolio
portf_EWP = Portfolio_via_CVXPY(
    cvxpy_fun=one_over_N,
    portfolio_params=dict(name="1/N")
)
portf_GMVP = Portfolio_via_CVXPY(
    cvxpy_fun=GMVP,
    portfolio_params=dict(name="GMVP")
)
portf_IVP = Portfolio_via_CVXPY(
    cvxpy_fun=IVP,
    portfolio_params=dict(name="IVP")
)

all_formulations = [portf_EWP, portf_GMVP, portf_IVP]

# Vanilla backtest: rebalance every 5 days, optimize once (fixed train_size)
bt_list_vanilla = batch_cross_val_predict(
    all_formulations,
    X=X_skfolio,
    cv=WalkForward(test_size=5, train_size=T_trn),
)
bt_pop_vanilla = Population(bt_list_vanilla)
Starting batch backtesting with 3 portfolios...
Cross-validation: 68 splits
============================================================

[1/3] Backtesting '1/N'...
  Completed '1/N' in 0:00:00

[2/3] Backtesting 'GMVP'...
  Completed 'GMVP' in 0:00:00

[3/3] Backtesting 'IVP'...
  Completed 'IVP' in 0:00:00
============================================================
Batch backtesting completed! Total time: 0:00:00
pd.concat([p.returns_df for p in bt_pop_vanilla], axis=1).head(8)
returns returns returns
2019-05-15 0.011418 0.009658 0.011886
2019-05-16 0.005786 0.002054 0.005829
2019-05-17 -0.012718 -0.009141 -0.011855
2019-05-20 -0.018609 -0.010979 -0.017017
2019-05-21 0.012484 0.012503 0.011755
2019-05-22 -0.018468 -0.023969 -0.018084
2019-05-23 -0.016648 -0.011901 -0.015110
2019-05-24 -0.003409 -0.005301 -0.003556
bt_pop_vanilla.summary().loc[[
    'Annualized Sharpe Ratio',
    'CVaR at 95%',
    'MAX Drawdown',
    'Annualized Mean',
    'Annualized Standard Deviation'
]].T
Annualized Sharpe Ratio CVaR at 95% MAX Drawdown Annualized Mean Annualized Standard Deviation
1/N 1.21 5.55% 39.22% 42.03% 34.61%
GMVP 1.19 4.49% 29.85% 34.74% 29.14%
IVP 1.16 5.32% 37.10% 38.26% 32.91%
bt_pop_vanilla.set_portfolio_params(compounded=True)
fig = bt_pop_vanilla.plot_cumulative_returns()
fig.update_layout(title="Wealth (vanilla backtest via skfolio)", xaxis_title=None, yaxis_title="Net Asset Value (NAV)")
fig
bt_pop_vanilla.plot_drawdowns()

Walk-forward backtesting

Rather than keeping the portfolio fixed during all the test data, we can perform a walk-forward backtest by re-optimizing the portfolio every month (approximately 20 days) on a rolling-window basis.

Via base Python

# Re-optimize every 20 days, rebalance every 20 days, with 60 bps transaction costs
bt_ret_wf, bt_w_wf = backtest(
    {
        "1/N": one_over_N,
        "GMVP": GMVP,
        "IVP": IVP
    },
    stock_prices,
    lookback=T_trn,
    rebalance_every=20,
    optimize_every=20,
    cost_bps=60
)
Backtesting portfolio '1/N'...
Backtesting portfolio 'GMVP'...
Backtesting portfolio 'IVP'...
# Performance metrics
print("Performance metrics (walk-forward, monthly reoptimization, 60 bps):")
print_table_performance_metrics(bt_ret_wf)

# Cumulative returns and drawdowns
plot_cum_return(bt_ret_wf)
plot_drawdown(bt_ret_wf)
Performance metrics (walk-forward, monthly reoptimization, 60 bps):
  Annualized Return Annualized Volatility Sharpe Ratio Maximum Drawdown
1/N 0.48 0.34 1.16 -0.33
GMVP 0.37 0.30 1.06 -0.29
IVP 0.44 0.32 1.12 -0.32

Since the portfolios are re-optimized over time, we can plot the evolution of the weight allocations:

plot_weights_over_time(bt_w_wf["1/N"], portfolio_name="1/N")
plot_weights_over_time(bt_w_wf["GMVP"], portfolio_name="GMVP")
plot_weights_over_time(bt_w_wf["IVP"], portfolio_name="IVP")

Via skfolio

We reproduce the walk-forward backtest using skfolio. With test_size=20, the portfolio is reoptimized and rebalanced every 20 days:

bt_list_wf = batch_cross_val_predict(
    all_formulations,
    X=X_skfolio,
    cv=WalkForward(test_size=20, train_size=T_trn, purged_size=1),
)
bt_pop_wf = Population(bt_list_wf)
Starting batch backtesting with 3 portfolios...
Cross-validation: 17 splits
============================================================

[1/3] Backtesting '1/N'...
  Completed '1/N' in 0:00:00

[2/3] Backtesting 'GMVP'...
  Completed 'GMVP' in 0:00:00

[3/3] Backtesting 'IVP'...
  Completed 'IVP' in 0:00:00
============================================================
Batch backtesting completed! Total time: 0:00:00
pd.concat([p.returns_df for p in bt_pop_wf], axis=1).head(8)
returns returns returns
2019-05-16 0.005786 0.002054 0.005829
2019-05-17 -0.012718 -0.009141 -0.011855
2019-05-20 -0.018609 -0.010979 -0.017017
2019-05-21 0.012484 0.012503 0.011755
2019-05-22 -0.018468 -0.024107 -0.018170
2019-05-23 -0.016648 -0.011986 -0.015101
2019-05-24 -0.003409 -0.005392 -0.003596
2019-05-28 0.004439 -0.010546 -0.000983
bt_pop_wf.summary().loc[[
    'Annualized Sharpe Ratio',
    'CVaR at 95%',
    'MAX Drawdown',
    'Annualized Mean',
    'Annualized Standard Deviation'
]].T
Annualized Sharpe Ratio CVaR at 95% MAX Drawdown Annualized Mean Annualized Standard Deviation
1/N 1.15 5.55% 39.22% 39.84% 34.64%
GMVP 1.03 4.60% 31.80% 31.06% 30.10%
IVP 1.08 5.38% 38.11% 36.11% 33.33%
last_portfolios = Population([
    setattr(mpp[-1], 'name', mpp.name) or mpp[-1]
    for mpp in bt_pop_wf
])
plot_composition_sidebyside(last_portfolios, asset_names=stock_prices.columns)
bt_pop_wf.set_portfolio_params(compounded=True)
fig = bt_pop_wf.plot_cumulative_returns()
fig.update_layout(title="Wealth (walk-forward via skfolio)", xaxis_title=None, yaxis_title="Net Asset Value (NAV)")
fig
for mpp in bt_pop_wf:
    print(f"\nWeights for: {mpp.name}")
    display(mpp.plot_weights_per_observation())
Weights for: 1/N
Weights for: GMVP
Weights for: IVP
bt_pop_wf.plot_drawdowns()

Multiple randomized backtesting

Rather than running a single backtest, we can introduce randomness in the data and perform multiple backtests.

Via base Python

First, we resample 200 times the original data of $N=10$ stocks over 2017-2020. Each time we randomly select $N=8$ stocks and a random contiguous period of two years.

def financial_data_resample(
    prices: pd.DataFrame,
    num_datasets: int = 200,
    N_sample: int = 8,
    T_sample: int = 252 * 2
) -> list[pd.DataFrame]:
    """
    Resample financial data by randomly selecting stocks and time windows.

    Parameters
    ----------
    prices : pd.DataFrame
        Full price DataFrame.
    num_datasets : int
        Number of resampled datasets.
    N_sample : int
        Number of stocks per dataset.
    T_sample : int
        Length of each time window.

    Returns
    -------
    list of pd.DataFrame
    """
    resampled = []
    for _ in range(num_datasets):
        selected_stocks = np.random.choice(prices.columns, size=N_sample, replace=False)
        max_start = max(len(prices) - T_sample, 1)
        start_idx = np.random.randint(0, max_start)
        subset = prices.iloc[start_idx:start_idx + T_sample][selected_stocks]
        resampled.append(subset)
    return resampled
stock_prices_2017 = SP500_stocks_2015to2020.loc["2017":]

stock_prices_resampled = financial_data_resample(
    stock_prices_2017,
    num_datasets=200,
    N_sample=8,
    T_sample=252 * 2
)

# Quick sanity check
print(f"Number of resampled datasets: {len(stock_prices_resampled)}")
print(f"Shape of first dataset: {stock_prices_resampled[0].shape}")
display(stock_prices_resampled[0].head())
Number of resampled datasets: 200
Shape of first dataset: (504, 8)
AON DTE TXT ETN NEM BF.B HD GL
Date
2018-01-17 132.8703 94.7915 58.8927 76.0889 36.8911 51.8825 186.852 90.1474
2018-01-18 131.4880 94.2269 58.5344 76.0525 36.6947 51.4490 185.459 89.7850
2018-01-19 131.5854 93.9355 58.6240 76.4075 36.9098 51.3349 188.264 90.2650
2018-01-22 132.3641 94.1267 58.8131 76.7806 37.0874 51.4414 191.191 90.8821
2018-01-23 133.7658 95.3197 58.8230 77.4724 37.7046 52.1867 191.602 90.8723

Then, we perform the multiple backtests (this will take some time):

# Function to run multiple backtests
def run_multiple_backtests(
    portfolio_funcs: dict[str, callable],
    price_datasets: list[pd.DataFrame],
    lookback: int = 252,
    optimize_every: int = 20,
    rebalance_every: int = 5,
    cost_bps: float = 60
) -> dict[str, list[pd.DataFrame]]:
    """
    Run backtests on multiple resampled datasets.

    Returns
    -------
    dict
        Portfolio names -> list of return Series (one per dataset).
    """
    results = {name: [] for name in portfolio_funcs}

    for i, prices in enumerate(price_datasets):
        if i % 50 == 0:
            print(f"Processing dataset {i + 1}/{len(price_datasets)}...")

        bt_ret, _ = backtest(
            portfolio_funcs, prices,
            lookback=lookback,
            optimize_every=optimize_every,
            rebalance_every=rebalance_every,
            cost_bps=cost_bps,
            verbose=False
        )

        for name in portfolio_funcs:
            results[name].append(bt_ret[name])

    return results
multiple_bt_results = run_multiple_backtests(
    {
        "1/N": one_over_N,
        "GMVP": GMVP,
        "IVP": IVP
    },
    stock_prices_resampled,
    lookback=252,
    optimize_every=20,
    rebalance_every=5,
    cost_bps=60
)
Processing dataset 1/200...
Processing dataset 51/200...
Processing dataset 101/200...
Processing dataset 151/200...

Now we can show the averaged results across all 200 backtests in a table:

def summarize_multiple_backtests(results: dict[str, list[pd.Series]]) -> pd.DataFrame:
    """Compute mean performance metrics across multiple backtests."""
    summary = {}
    for portfolio_name, returns_list in results.items():
        all_metrics = [
            calculate_performance_metrics(pd.DataFrame(r))
            for r in returns_list
        ]
        mean_metrics = {
            metric: np.mean([m[metric] for m in all_metrics])
            for metric in all_metrics[0]
        }
        summary[portfolio_name] = mean_metrics
    return pd.DataFrame(summary).T


summary_table = summarize_multiple_backtests(multiple_bt_results)
display(summary_table.style.format("{:.4f}"))
  Annualized Return Annualized Volatility Sharpe Ratio Maximum Drawdown
1/N 0.0960 0.2352 0.4654 -0.2467
GMVP 0.1025 0.2046 0.6192 -0.2134
IVP 0.0968 0.2234 0.5098 -0.2366

And we can also show the results in the form of bar plots:

# Create bar plots for selected metrics
plt.figure(figsize=(12, 6))
summary_table[["Maximum Drawdown", "Annualized Volatility"]].abs().plot(kind='bar')
plt.title("Risk Metrics Comparison")
plt.ylabel("Value")
plt.xticks(rotation=0)
plt.tight_layout()
plt.show()
<Figure size 1200x600 with 0 Axes>

Via skfolio

The library skfolio provides MultipleRandomizedCV, which is based on the "Multiple Randomized Backtests" methodology of Palomar (2025). It repeatedly samples distinct asset subsets and contiguous time windows, then applies a walk-forward split to each subsample.

from skfolio.model_selection import MultipleRandomizedCV
from skfolio.measures import RatioMeasure

# Inner walk-forward: reoptimize every 20 days
walk_forward = WalkForward(test_size=20, train_size=252)

# Multiple randomized CV: 200 subsamples, 8 assets, 2-year windows
cv_mc = MultipleRandomizedCV(
    walk_forward=walk_forward,
    n_subsamples=200,
    asset_subset_size=8,
    window_size=252 * 2,
    random_state=42,
)
# Use the broader 2017-2020 data, converted to returns
X_skfolio_2017 = prices_to_returns(SP500_stocks_2015to2020.loc["2017":])

# Run multiple randomized backtests for each portfolio
pred_EWP_mc = cross_val_predict(
    Portfolio_via_CVXPY(cvxpy_fun=one_over_N),
    X_skfolio_2017, cv=cv_mc,
    portfolio_params={"tag": "1/N"},
)
pred_GMVP_mc = cross_val_predict(
    Portfolio_via_CVXPY(cvxpy_fun=GMVP),
    X_skfolio_2017, cv=cv_mc,
    portfolio_params={"tag": "GMVP"},
)
pred_IVP_mc = cross_val_predict(
    Portfolio_via_CVXPY(cvxpy_fun=IVP),
    X_skfolio_2017, cv=cv_mc,
    portfolio_params={"tag": "IVP"},
)

population_mc = pred_EWP_mc + pred_GMVP_mc + pred_IVP_mc
population_mc.plot_distribution(
    measure_list=[RatioMeasure.ANNUALIZED_SHARPE_RATIO],
    tag_list=["1/N", "GMVP", "IVP"],
)
for pred in [pred_EWP_mc, pred_GMVP_mc, pred_IVP_mc]:
    tag = pred[0].tag
    mean_sr = pred.measures_mean(measure=RatioMeasure.ANNUALIZED_SHARPE_RATIO)
    std_sr = pred.measures_std(measure=RatioMeasure.ANNUALIZED_SHARPE_RATIO)
    print(f"{tag}\n{'=' * len(tag)}")
    print(f"Average Sharpe Ratio: {mean_sr:0.4f}")
    print(f"Sharpe Ratio Std Dev: {std_sr:0.4f}\n")
1/N
===
Average Sharpe Ratio: 0.5783
Sharpe Ratio Std Dev: 0.7058

GMVP
====
Average Sharpe Ratio: 0.7932
Sharpe Ratio Std Dev: 0.8371

IVP
===
Average Sharpe Ratio: 0.6433
Sharpe Ratio Std Dev: 0.7393

population_mc.boxplot_measure(
    measure=RatioMeasure.CVAR_RATIO,
    tag_list=["1/N", "GMVP", "IVP"],
)
fig = pred_GMVP_mc[:10].plot_cumulative_returns(use_tag_in_legend=False)
fig.update_layout(title="GMVP: 10 sample paths from randomized backtesting")
fig
pred_GMVP_mc[:2].plot_composition(display_sub_ptf_name=False)