Python Code for Portfolio Optimization Chapter 6 – Portfolio Basics

Daniel P. Palomar (2025). Portfolio Optimization: Theory and Application. Cambridge University Press.

Last update: March 3, 2025

Contributors:


Packages

The following packages are used in the examples:

# Core numerical computing
import numpy as np
import pandas as pd

# For financial data
import yfinance as yf       # Lodaing financial data
import empyrical as emp     # Performance metrics

# Book data (pip install "git+https://github.com/dppalomar/pob.git#subdirectory=python")
from pob_python import SP500_stocks_2015to2020, SP500_index_2015to2020

# Plotting
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
sns.set_theme(style="darkgrid")

# Optimization
import cvxpy as cp                  # interface for convex optimization solvers
import riskparityportfolio as rpp
from scipy.optimize import minimize, LinearConstraint   # for optimization

Some math definitions for the equations:

$\def\bm#1{\boldsymbol{#1}}$ $\def\textm#1{\textsf{#1}}$ $\def\T{{\mkern-2mu\raise-1mu\mathsf{T}}}}$ $\def\R{\mathbb{R}}$ $\def\E{\rm I\kern-.2em E}$ $\def\w{\bm{w}}$ $\def\bmu{\bm{\mu}}}$ $\def\bSigma{\bm{\Sigma}}}$

Preliminaries

We start with basic aspects such as data loading and plotting.

Loading market data

Loading market data could be conveniently done with the package yfinance:

stocks = yf.download(["AAPL", "AMD", "ADI", "A", "MOH", "CVS", "APD", "AA", "CF"], start='2021-10-01', end='2021-12-31')
stocks[['Adj Close']].head()
Price Adj Close
Ticker A AA AAPL ADI AMD APD CF CVS MOH
Date

However, for convenience we will use stock data and cryptocurrency data from the official portfolio optimization book package pob_python:

from pob_python import SP500_stocks_2015to2020, cryptos_2017to2021_hourly

# stock S&P500 market data
SP500_stocks_2015to2020.iloc[:, :5].head()
A AAL AAP AAPL ABBV
Date
2015-01-05 37.7523 51.0467 154.217 24.239 50.8722
2015-01-06 37.1642 50.2556 154.108 24.241 50.6204
2015-01-07 37.6574 50.2272 157.420 24.581 52.6663
2015-01-08 38.7862 50.8430 158.800 25.526 53.2171
2015-01-09 38.5016 49.2891 157.991 25.553 51.7614
SP500_stocks_2015to2020[["AMD", "MGM", "AAPL"]].head()
AMD MGM AAPL
Date
2015-01-05 2.66 19.3206 24.239
2015-01-06 2.63 18.5833 24.241
2015-01-07 2.58 19.3112 24.581
2015-01-08 2.61 19.5853 25.526
2015-01-09 2.63 19.3868 25.553
# crypto data
cryptos_2017to2021_hourly.iloc[:, :5].head()
BTC ETH ADA DOT XRP
Date
2021-01-07 09:00:00 37485.61 1204.525106 0.330998 10.005659 0.305133
2021-01-07 10:00:00 37040.38 1183.403101 0.308546 9.677910 0.323733
2021-01-07 11:00:00 37806.57 1201.001309 0.311904 9.823281 0.357990
2021-01-07 12:00:00 37936.25 1227.161815 0.317906 9.966991 0.336457
2021-01-07 13:00:00 38154.69 1218.126633 0.318592 9.882065 0.328512

Plotting data

# Plot stock prices
stock_prices = SP500_stocks_2015to2020[["AMD", "MGM", "AAPL"]].loc["2019-10":]

fig, ax = plt.subplots(figsize=(12, 6))
stock_prices.plot(ax=ax)
ax.set_title('Prices of stocks')
ax.set_xlabel(None)
ax.legend(title="Stocks")
plt.show()
No description has been provided for this image
stock_returns = stock_prices.pct_change()

# Plot returns
fig, ax = plt.subplots(figsize=(12, 6))
stock_returns.plot(ax=ax)
ax.set_title('Returns of stocks')
ax.set_xlabel(None)
ax.legend(title="Stocks")
plt.show()
No description has been provided for this image
# Compute drawdowns
cumulative_returns = (1 + stock_returns).cumprod()
running_max = cumulative_returns.cummax()
drawdowns = (cumulative_returns - running_max) / running_max

# Plot drawdowns
fig, ax = plt.subplots(figsize=(12, 6))
drawdowns.plot(ax=ax)
ax.set_title('Drawdown of stocks')
ax.set_xlabel(None)
ax.legend(title="Stocks")
plt.show()
No description has been provided for this image

Backtesting

We now consider how to perform backtests and explore rebalancing aspects.

Naive backtesting

For illustrative purposes we now backtest the $1/N$ portfolio:

stock_prices = SP500_stocks_2015to2020[["AMD", "MGM", "AAPL", "AMZN", "TSCO"]].loc["2019-10":]
T, N = stock_prices.shape

# linear returns
X = stock_prices / stock_prices.shift(1) - 1
X = X.dropna()

# portfolio
w = np.repeat(1/N, N)

# naive backtest (assuming daily rebalancing and no transaction cost)
portf_ret = X @ w
portf_ret.head()
Date
2019-10-02   -0.013131
2019-10-03    0.012374
2019-10-04    0.009848
2019-10-07   -0.003872
2019-10-08   -0.016877
dtype: float64

However, multiplying the matrix of linear returns of the assets X by the portfolio w implicitly assumes that we are rebalancing at every period (and also ignoring the transaction costs). To perform more realistic backtests, we now define a rolling-window backtest function that rebalances the portfolio every certain number of periods using a specified lookback window of the data (as well as transaction costs).

def EWP(X):
    N = X.shape[1]
    return np.repeat(1/N, N)

def backtest_single_portfolio(portfolio_func, portfolio_name, prices, lookback, rebalance_every=1, cost_bps=0):
    N = prices.shape[1]

    # Calculate returns
    X = prices.pct_change().dropna()

    # Initialize variables
    current_w = np.repeat(0, N)
    w = pd.DataFrame(index=X.index, columns=X.columns)
    portf_ret = pd.Series(index=X.index)
    portf_ret.name = portfolio_name

    for t in range(lookback, len(X)):
        # Rebalance portfolio if necessary
        if (t - lookback) % rebalance_every == 0:
            new_w = portfolio_func(X.iloc[t-lookback:t])  # Note that the row at time t is not included
            turnover = np.abs(new_w - current_w).sum()
            transaction_cost = turnover * cost_bps / 1e4
            current_w = new_w

        # Store weights
        w.iloc[t] = current_w

        # Calculate portfolio return for this period
        period_return = (X.iloc[t] * current_w).sum()
        portf_ret.iloc[t] = period_return - transaction_cost

        # Update normalized weights based on asset performance
        current_w = current_w * (1 + X.iloc[t])
        current_w = current_w / current_w.sum()

    return portf_ret, w

As a sanity check, let’s start by reproducing the previous naive backtest assuming daily rebalancing (note that we choose a lookback of 0 since the $1/N$ portfolio does not really need any data):

bt_ret, bt_w = backtest_single_portfolio(EWP, "1/N", stock_prices, lookback=0, rebalance_every=1)
bt_ret.head()
Date
2019-10-02   -0.013131
2019-10-03    0.012374
2019-10-04    0.009848
2019-10-07   -0.003872
2019-10-08   -0.016877
Name: 1/N, dtype: float64

Rebalancing in backtesting

Now we can perform a more realistic backtest rebalancing, say, every week (i.e., 5 days), including transaction costs:

bt_ret, bt_w = backtest_single_portfolio(EWP, "1/N", stock_prices, lookback=0, rebalance_every=5, cost_bps=30e-4)
bt_ret.head()
Date
2019-10-02   -0.013132
2019-10-03    0.012464
2019-10-04    0.009735
2019-10-07   -0.003923
2019-10-08   -0.016739
Name: 1/N, dtype: float64
def plot_cum_return(returns):
    cumulative_returns = (1 + returns).cumprod() - 1
    fig, ax = plt.subplots(figsize=(12, 6))
    cumulative_returns.plot(ax=ax)
    ax.set_title('Cumulative Return')
    ax.set_xlabel(None)
    ax.legend(title="portfolio")
    plt.show()

plot_cum_return(bt_ret)
No description has been provided for this image
def plot_drawdown(returns):
    cumulative_returns = (1 + returns).cumprod()
    running_max = cumulative_returns.cummax()
    drawdowns = (cumulative_returns - running_max) / running_max

    fig, ax = plt.subplots(figsize=(12, 6))
    drawdowns.plot(ax=ax)
    ax.set_title('Drawdown')
    ax.set_xlabel(None)
    ax.legend(title="portfolio")
    plt.show()

plot_drawdown(bt_ret)
No description has been provided for this image

Let's observe the evolution of the $1/N$ portfolio over time for a universe of 5 stocks, showing the effect of rebalancing (indicated with black vertical lines):

# Run backtest
bt_ret, bt_w = backtest_single_portfolio(EWP, "1/N", stock_prices, lookback=0, rebalance_every=90)

# Filter the weights DataFrame for the desired date range
filtered_w = bt_w.loc["2020-01":"2020-08"]

# Create the plot
fig, ax = plt.subplots(figsize=(12, 6))

# Calculate bar width (in days)
bar_width = (filtered_w.index[-1] - filtered_w.index[0]).days / len(filtered_w)

# Plot stacked bars
bottom = np.zeros(len(filtered_w))
for column in filtered_w.columns:
    ax.bar(filtered_w.index, filtered_w[column], bottom=bottom, width=3*bar_width, label=column, edgecolor='none')
    bottom += filtered_w[column]

# Customize the x-axis
ax.xaxis.set_major_locator(mdates.MonthLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
plt.xticks(rotation=45, ha='right')


# Customize other aspects of the plot
plt.title("Weight Allocation Over Time for Portfolio 1/N")
plt.xlabel(None)
plt.ylabel("Weight")
plt.legend(title="Stocks", bbox_to_anchor=(1.05, 1), loc='upper left')

# Add vertical lines for specific dates
plt.axvline(pd.to_datetime("2020-02-11"), color="black", linestyle="-", linewidth=3)
plt.axvline(pd.to_datetime("2020-06-19"), color="black", linestyle="-", linewidth=3)
plt.axhline(0.0, color="black", linestyle="--")
plt.axhline(0.2, color="black", linestyle="--")
plt.axhline(0.4, color="black", linestyle="--")
plt.axhline(0.6, color="black", linestyle="--")
plt.axhline(0.8, color="black", linestyle="--")
plt.axhline(1.0, color="black", linestyle="--")

# Adjust layout and display
plt.tight_layout()
plt.show()
No description has been provided for this image

Backtest function

Let's now rewrite the backtest function to accept multiple portfolios:

def backtest(portfolio_funcs, prices, lookback, rebalance_every=1, cost_bps=0):
    N = prices.shape[1]
    X = prices.pct_change().dropna()

    portf_rets = {}
    ws = {}

    for portfolio_name, portfolio_func in portfolio_funcs.items():
        # Initialize variables
        current_w = np.repeat(0, N)
        w = pd.DataFrame(index=X.index, columns=X.columns)
        portf_ret = pd.Series(index=X.index)
        portf_ret.name = portfolio_name

        for t in range(lookback, len(X)):
            # Rebalance portfolio if necessary
            if (t - lookback) % rebalance_every == 0:
                new_w = portfolio_func(X.iloc[t-lookback:t])  # Note that the row at time t is not included
                turnover = np.abs(new_w - current_w).sum()
                transaction_cost = turnover * cost_bps / 1e4
                current_w = new_w

            # Store weights
            w.iloc[t] = current_w

            # Calculate portfolio return for this period
            period_return = (X.iloc[t] * current_w).sum()
            portf_ret.iloc[t] = period_return - transaction_cost

            # Update normalized weights based on asset performance
            current_w = current_w * (1 + X.iloc[t])
            current_w = current_w / current_w.sum()

        # Keep a record (remove inital NaNs)
        portf_rets[portfolio_name] = portf_ret[lookback:]
        ws[portfolio_name] = w[lookback:]

    # Combine all portfolio returns into a single DataFrame
    portf_rets_df = pd.DataFrame(portf_rets)

    return portf_rets_df, ws

Heuristic portfolios

We now compare the following heuristic portfolios:

  • $1/N$ portfolio:
$$ \w = \frac{1}{N}\bm{1}; $$
  • GMRP:
$$ \begin{array}{ll} \underset{\w}{\textm{maximize}} & \w^\T\bmu\\ \textm{subject to} & \bm{1}^\T\w=1, \quad \w\ge\bm{0}; \end{array} $$
  • Quintile portfolio (sorting stocks by $\bm{\mu}$):
$$ \w = \frac{1}{N/5}\left[\begin{array}{c} \begin{array}{c} 1\\ \vdots\\ 1 \end{array}\\ \begin{array}{c} 0\\ \vdots\\ 0 \end{array} \end{array}\right]\begin{array}{c} \left.\begin{array}{c} \\ \\ \\ \end{array}\right\} 20\%\\ \left.\begin{array}{c} \\ \\ \\ \end{array}\right\} 80\% \end{array} $$
  • Quintile portfolio (sorting stocks by $\bm{\mu}/\bm{\sigma}$); and
  • Quintile portfolio (sorting stocks by $\bm{\mu}/\bm{\sigma}^2$).
# Get data
stock_prices = SP500_stocks_2015to2020[
                   ["AAPL", "AMZN", "AMD", "GM", "GOOGL", "MGM", "MSFT", "QCOM", "TSCO", "UPS"]
               ].loc["2019":]
T = stock_prices.shape[0]
T_trn = round(0.70*T)
# Define heuristic portfolios
def QuintP_mu(X_lin):
    X_log = np.log(1 + X_lin)
    mu = X_log.mean()
    idx = mu.argsort()[::-1]
    w = np.zeros(len(mu))
    w[idx[:len(mu)//5]] = 1 / (len(mu)//5)
    return w

def QuintP_mu_over_sigma(X_lin):
    X_log = np.log(1 + X_lin)
    mu = X_log.mean()
    sigma = X_log.std()
    idx = (mu / sigma).argsort()[::-1]
    w = np.zeros(len(mu))
    w[idx[:len(mu)//5]] = 1 / (len(mu)//5)
    return w

def QuintP_mu_over_sigma2(X_lin):
    X_log = np.log(1 + X_lin)
    mu = X_log.mean()
    sigma = X_log.std()
    idx = (mu / sigma**2).argsort()[::-1]
    w = np.zeros(len(mu))
    w[idx[:len(mu)//5]] = 1 / (len(mu)//5)
    return w

def GMRP(X_lin):
    X_log = np.log(1 + X_lin)
    mu = X_log.mean()
    w = np.zeros(len(mu))
    w[mu.argmax()] = 1
    return w
# Run backtest
bt_ret, bt_w = backtest(
    {
        "1/N": EWP,
        "GMRP": GMRP,
        "QuintP (sorted by mu)": QuintP_mu,
        "QuintP (sorted by mu/sigma)": QuintP_mu_over_sigma,
        "QuintP (sorted by mu/sigma2)": QuintP_mu_over_sigma2,
    },
    stock_prices, lookback=T_trn, rebalance_every=30)
bt_w["1/N"].head()
AAPL AMZN AMD GM GOOGL MGM MSFT QCOM TSCO UPS
Date
2020-03-19 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1
2020-03-20 0.096425 0.099873 0.098908 0.102432 0.098993 0.10479 0.098768 0.103219 0.102368 0.094222
2020-03-23 0.09069 0.098444 0.098808 0.105369 0.09553 0.124511 0.095466 0.097139 0.102161 0.091883
2020-03-24 0.088295 0.100934 0.103324 0.101693 0.093774 0.124397 0.094015 0.099196 0.1045 0.089872
2020-03-25 0.087564 0.092752 0.103366 0.109933 0.090601 0.149244 0.092436 0.093593 0.096549 0.083963
# Extract the first row of each portfolio's weights and prepare for plotting
first_rows = {
    portfolio_name: weights.iloc[0]  # Get the first row of each DataFrame
    for portfolio_name, weights in bt_w.items()
}

# Convert the dictionary of first rows into a DataFrame
first_rows_df = pd.DataFrame(first_rows).T.reset_index()  # Transpose and reset index
first_rows_df.columns = ['portfolio'] + list(first_rows_df.columns[1:])

first_rows_df
portfolio AAPL AMZN AMD GM GOOGL MGM MSFT QCOM TSCO UPS
0 1/N 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1
1 GMRP 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2 QuintP (sorted by mu) 0.5 0.0 0.5 0.0 0.0 0.0 0.0 0.0 0.0 0.0
3 QuintP (sorted by mu/sigma) 0.5 0.0 0.5 0.0 0.0 0.0 0.0 0.0 0.0 0.0
4 QuintP (sorted by mu/sigma2) 0.5 0.0 0.0 0.0 0.0 0.0 0.5 0.0 0.0 0.0
# Barplot of weight allocation
melted_df = first_rows_df.melt(id_vars='portfolio', var_name='stocks', value_name='weights')

fig, ax = plt.subplots(figsize=(12, 4))
sns.barplot(x='stocks', y='weights', hue='portfolio', ax=ax, data=melted_df)
ax.set_title("Weight Allocation for Heuristic Portfolios")
plt.xticks(rotation=45)
plt.show()
No description has been provided for this image
# Plot of cumulative return
plot_cum_return(bt_ret)
No description has been provided for this image
# Plot of drawdown
plot_drawdown(bt_ret)
No description has been provided for this image
#
# Table of performance metrics
#
def print_table_performance_metrics(bt_ret):
    risk_free_rate = 0.0  # Risk-free rate (e.g., 0 for simplicity)
    trading_days = 252    # Number of trading days in a year

    # Calculate Annualized Return
    annualized_return = (1 + bt_ret.mean())**trading_days - 1

    # Calculate Annualized Volatility
    annualized_volatility = bt_ret.std() * np.sqrt(trading_days)

    # Calculate Sharpe Ratio
    sharpe_ratio = (bt_ret.mean() - risk_free_rate) / bt_ret.std()
    sharpe_ratio *= np.sqrt(trading_days)  # Annualize Sharpe ratio

    # Calculate Maximum Drawdown
    def calculate_max_drawdown(returns):
        cumulative_returns = (1 + returns).cumprod()
        running_max = cumulative_returns.cummax()
        drawdown = (cumulative_returns - running_max) / running_max
        max_drawdown = drawdown.min()
        return max_drawdown

    max_drawdown = bt_ret.apply(calculate_max_drawdown)

    # Combine all metrics into a single DataFrame
    performance_table = pd.DataFrame({
        'Annualized Return': annualized_return,
        'Annualized Volatility': annualized_volatility,
        'Sharpe Ratio': sharpe_ratio,
        'Maximum Drawdown': max_drawdown
    })

    # Display the performance table
    performance_table_styled = performance_table.style.format("{:.2f}")
    display(performance_table_styled)

print_table_performance_metrics(bt_ret)
  Annualized Return Annualized Volatility Sharpe Ratio Maximum Drawdown
1/N 2.74 0.36 3.66 -0.11
GMRP 3.56 0.63 2.43 -0.19
QuintP (sorted by mu) 2.92 0.47 2.91 -0.20
QuintP (sorted by mu/sigma) 2.92 0.46 3.00 -0.20
QuintP (sorted by mu/sigma2) 1.82 0.40 2.61 -0.18

Risk-based portfolios

We now compare the following risk-based portfolios:

  • Global minimum variance portfolio (GMVP):
$$ \begin{array}{ll} \underset{\w}{\textm{minimize}} & \w^\T\bSigma\w\\ \textm{subject to} & \bm{1}^\T\w=1, \quad \w\ge\bm{0}; \end{array} $$
  • Inverse volatility portfolio (IVP):
$$ \w = \frac{\bm{\sigma}^{-1}}{\bm{1}^\T\bm{\sigma}^{-1}}; $$
  • Risk parity portfolio (RPP);

  • Most diversified portfolio (MDP):

$$ \begin{array}{ll} \underset{\w}{\textm{minimize}} & \dfrac{\w^\T\bm{\sigma}}{\sqrt{\w^\T\bmu\w}}\\ \textm{subject to} & \bm{1}^\T\w=1, \quad \w\ge\bm{0}; \end{array} $$
  • Maximum decorrelation portfolio (MDCP):
$$ \begin{array}{ll} \underset{\w}{\textm{minimize}} & \w^\T\bm{C}\w\\ \textm{subject to} & \bm{1}^\T\w=1, \quad \w\ge\bm{0}, \end{array} $$

where $\bm{C} = \textm{Diag}\left(\bSigma\right)^{-1/2} \bSigma \textm{Diag}\left(\bSigma\right)^{-1/2}.$

# Define risk-based portfolios
def GMVP(X_lin):
    # Estimate parameters
    X_log = np.log(1 + X_lin)
    Sigma = X_log.cov().values
    # Optimize portfolio
    N = len(Sigma)
    w = cp.Variable(N)
    prob = cp.Problem(cp.Minimize(cp.quad_form(w, Sigma)),
                      [cp.sum(w) == 1, w >= 0])
    prob.solve()
    return w.value

def IVP(X_lin):
    # Estimate parameters
    X_log = np.log(1 + X_lin)
    sigma = X_log.std()
    # Design portfolio
    w = 1 / sigma
    return w / w.sum()

def RPP(X_lin):
    # Estimate parameters
    X_log = np.log(1 + X_lin)
    Sigma = X_log.cov().values
    # Optimize portfolio
    N = len(Sigma)
    my_portfolio = rpp.RiskParityPortfolio(covariance=Sigma, weights=np.ones(N) / N, budget=np.ones(N) / N)
    my_portfolio.design()
    return my_portfolio.weights

def MDP(X_lin):
    # Estimate parameters
    X_log = np.log(1 + X_lin)
    Sigma = X_log.cov().values
    mu = np.sqrt(np.diag(Sigma))
    # Optimize portfolio
    N = len(Sigma)
    w = cp.Variable(N)
    prob = cp.Problem(cp.Minimize(cp.quad_form(w, Sigma)),
                      [mu @ w == 1, w >= 0])
    prob.solve()
    return w.value / w.value.sum()

def MDCP(X_lin):
    # Estimate parameters
    X_log = np.log(1 + X_lin)
    Sigma = X_log.cov().values
    D = np.diag(1 / np.sqrt(np.diag(Sigma)))
    C = D @ Sigma @ D
    # Optimize portfolio
    N = len(C)
    w = cp.Variable(N)
    prob = cp.Problem(cp.Minimize(cp.quad_form(w, C)),
                      [cp.sum(w) == 1, w >= 0])
    prob.solve()
    return w.value
# Run backtest
bt_ret, bt_w = backtest(
    {
        "GMVP": GMVP,
        "IVP": IVP,
        "RPP": RPP,
        "MDP": MDP,
        "MDCP": MDCP,
    },
    stock_prices, lookback=T_trn, rebalance_every=30)
#
# Barplot of weight allocation
#
first_rows = {
    portfolio_name: weights.iloc[0]  # Get the first row of each DataFrame
    for portfolio_name, weights in bt_w.items()
}
first_rows_df = pd.DataFrame(first_rows).T.reset_index()  # Transpose and reset index
first_rows_df.columns = ['portfolio'] + list(first_rows_df.columns[1:])
melted_df = first_rows_df.melt(id_vars='portfolio', var_name='stocks', value_name='weights')

fig, ax = plt.subplots(figsize=(12, 4))
sns.barplot(x='stocks', y='weights', hue='portfolio', ax=ax, data=melted_df)
ax.set_title("Weight Allocation for Heuristic Portfolios")
plt.xticks(rotation=45)
plt.show()
No description has been provided for this image
# Plot of cumulative return
plot_cum_return(bt_ret)
No description has been provided for this image
# Plot of drawdown
plot_drawdown(bt_ret)
No description has been provided for this image
# Table of performance metrics
print_table_performance_metrics(bt_ret)
  Annualized Return Annualized Volatility Sharpe Ratio Maximum Drawdown
GMVP 2.17 0.29 3.95 -0.11
IVP 2.42 0.33 3.69 -0.11
RPP 2.45 0.33 3.74 -0.11
MDP 2.88 0.33 4.11 -0.09
MDCP 3.39 0.38 3.89 -0.10