Python Code for Portfolio Optimization Chapter 6 – Portfolio Basics

Daniel P. Palomar (2025). Portfolio Optimization: Theory and Application. Cambridge University Press.

Last update: March 3, 2025

Contributors:

Packages¶

The following packages are used in the examples:

# Core numerical computing
import numpy as np
import pandas as pd

# For financial data
import yfinance as yf       # Lodaing financial data
import empyrical as emp     # Performance metrics

# Book data (pip install "git+https://github.com/dppalomar/pob.git#subdirectory=python")
from pob_python import SP500_stocks_2015to2020, SP500_index_2015to2020

# Plotting
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
sns.set_theme(style="darkgrid")

# Optimization
import cvxpy as cp                  # interface for convex optimization solvers
import riskparityportfolio as rpp
from scipy.optimize import minimize, LinearConstraint   # for optimization

Some math definitions for the equations:

$\def\bm#1{\boldsymbol{#1}}$ $\def\textm#1{\textsf{#1}}$ $\def\T{{\mkern-2mu\raise-1mu\mathsf{T}}}}$ $\def\R{\mathbb{R}}$ $\def\E{\rm I\kern-.2em E}$ $\def\w{\bm{w}}$ $\def\bmu{\bm{\mu}}}$ $\def\bSigma{\bm{\Sigma}}}$

Preliminaries¶

We start with basic aspects such as data loading and plotting.

Loading market data¶

Loading market data could be conveniently done with the package yfinance:

stocks = yf.download(["AAPL", "AMD", "ADI", "A", "MOH", "CVS", "APD", "AA", "CF"], start='2021-10-01', end='2021-12-31')

stocks[['Adj Close']].head()

Price	Adj Close
Ticker	A	AA	AAPL	ADI	AMD	APD	CF	CVS	MOH
Date

However, for convenience we will use stock data and cryptocurrency data from the official portfolio optimization book package pob_python:

from pob_python import SP500_stocks_2015to2020, cryptos_2017to2021_hourly

# stock S&P500 market data
SP500_stocks_2015to2020.iloc[:, :5].head()

	A	AAL	AAP	AAPL	ABBV
Date
2015-01-05	37.7523	51.0467	154.217	24.239	50.8722
2015-01-06	37.1642	50.2556	154.108	24.241	50.6204
2015-01-07	37.6574	50.2272	157.420	24.581	52.6663
2015-01-08	38.7862	50.8430	158.800	25.526	53.2171
2015-01-09	38.5016	49.2891	157.991	25.553	51.7614

SP500_stocks_2015to2020[["AMD", "MGM", "AAPL"]].head()

	AMD	MGM	AAPL
Date
2015-01-05	2.66	19.3206	24.239
2015-01-06	2.63	18.5833	24.241
2015-01-07	2.58	19.3112	24.581
2015-01-08	2.61	19.5853	25.526
2015-01-09	2.63	19.3868	25.553

# crypto data
cryptos_2017to2021_hourly.iloc[:, :5].head()

	BTC	ETH	ADA	DOT	XRP
Date
2021-01-07 09:00:00	37485.61	1204.525106	0.330998	10.005659	0.305133
2021-01-07 10:00:00	37040.38	1183.403101	0.308546	9.677910	0.323733
2021-01-07 11:00:00	37806.57	1201.001309	0.311904	9.823281	0.357990
2021-01-07 12:00:00	37936.25	1227.161815	0.317906	9.966991	0.336457
2021-01-07 13:00:00	38154.69	1218.126633	0.318592	9.882065	0.328512

Plotting data¶

# Plot stock prices
stock_prices = SP500_stocks_2015to2020[["AMD", "MGM", "AAPL"]].loc["2019-10":]

fig, ax = plt.subplots(figsize=(12, 6))
stock_prices.plot(ax=ax)
ax.set_title('Prices of stocks')
ax.set_xlabel(None)
ax.legend(title="Stocks")
plt.show()

No description has been provided for this image

stock_returns = stock_prices.pct_change()

# Plot returns
fig, ax = plt.subplots(figsize=(12, 6))
stock_returns.plot(ax=ax)
ax.set_title('Returns of stocks')
ax.set_xlabel(None)
ax.legend(title="Stocks")
plt.show()

# Compute drawdowns
cumulative_returns = (1 + stock_returns).cumprod()
running_max = cumulative_returns.cummax()
drawdowns = (cumulative_returns - running_max) / running_max

# Plot drawdowns
fig, ax = plt.subplots(figsize=(12, 6))
drawdowns.plot(ax=ax)
ax.set_title('Drawdown of stocks')
ax.set_xlabel(None)
ax.legend(title="Stocks")
plt.show()

Backtesting¶

We now consider how to perform backtests and explore rebalancing aspects.

Naive backtesting¶

For illustrative purposes we now backtest the $1/N$ portfolio:

stock_prices = SP500_stocks_2015to2020[["AMD", "MGM", "AAPL", "AMZN", "TSCO"]].loc["2019-10":]
T, N = stock_prices.shape

# linear returns
X = stock_prices / stock_prices.shift(1) - 1
X = X.dropna()

# portfolio
w = np.repeat(1/N, N)

# naive backtest (assuming daily rebalancing and no transaction cost)
portf_ret = X @ w
portf_ret.head()

Date
2019-10-02   -0.013131
2019-10-03    0.012374
2019-10-04    0.009848
2019-10-07   -0.003872
2019-10-08   -0.016877
dtype: float64

However, multiplying the matrix of linear returns of the assets X by the portfolio w implicitly assumes that we are rebalancing at every period (and also ignoring the transaction costs). To perform more realistic backtests, we now define a rolling-window backtest function that rebalances the portfolio every certain number of periods using a specified lookback window of the data (as well as transaction costs).

def EWP(X):
    N = X.shape[1]
    return np.repeat(1/N, N)

def backtest_single_portfolio(portfolio_func, portfolio_name, prices, lookback, rebalance_every=1, cost_bps=0):
    N = prices.shape[1]

    # Calculate returns
    X = prices.pct_change().dropna()

    # Initialize variables
    current_w = np.repeat(0, N)
    w = pd.DataFrame(index=X.index, columns=X.columns)
    portf_ret = pd.Series(index=X.index)
    portf_ret.name = portfolio_name

    for t in range(lookback, len(X)):
        # Rebalance portfolio if necessary
        if (t - lookback) % rebalance_every == 0:
            new_w = portfolio_func(X.iloc[t-lookback:t])  # Note that the row at time t is not included
            turnover = np.abs(new_w - current_w).sum()
            transaction_cost = turnover * cost_bps / 1e4
            current_w = new_w

        # Store weights
        w.iloc[t] = current_w

        # Calculate portfolio return for this period
        period_return = (X.iloc[t] * current_w).sum()
        portf_ret.iloc[t] = period_return - transaction_cost

        # Update normalized weights based on asset performance
        current_w = current_w * (1 + X.iloc[t])
        current_w = current_w / current_w.sum()

    return portf_ret, w

As a sanity check, let’s start by reproducing the previous naive backtest assuming daily rebalancing (note that we choose a lookback of 0 since the $1/N$ portfolio does not really need any data):

bt_ret, bt_w = backtest_single_portfolio(EWP, "1/N", stock_prices, lookback=0, rebalance_every=1)
bt_ret.head()

Date
2019-10-02   -0.013131
2019-10-03    0.012374
2019-10-04    0.009848
2019-10-07   -0.003872
2019-10-08   -0.016877
Name: 1/N, dtype: float64

Rebalancing in backtesting¶

Now we can perform a more realistic backtest rebalancing, say, every week (i.e., 5 days), including transaction costs:

bt_ret, bt_w = backtest_single_portfolio(EWP, "1/N", stock_prices, lookback=0, rebalance_every=5, cost_bps=30e-4)
bt_ret.head()

Date
2019-10-02   -0.013132
2019-10-03    0.012464
2019-10-04    0.009735
2019-10-07   -0.003923
2019-10-08   -0.016739
Name: 1/N, dtype: float64

def plot_cum_return(returns):
    cumulative_returns = (1 + returns).cumprod() - 1
    fig, ax = plt.subplots(figsize=(12, 6))
    cumulative_returns.plot(ax=ax)
    ax.set_title('Cumulative Return')
    ax.set_xlabel(None)
    ax.legend(title="portfolio")
    plt.show()

plot_cum_return(bt_ret)

def plot_drawdown(returns):
    cumulative_returns = (1 + returns).cumprod()
    running_max = cumulative_returns.cummax()
    drawdowns = (cumulative_returns - running_max) / running_max

    fig, ax = plt.subplots(figsize=(12, 6))
    drawdowns.plot(ax=ax)
    ax.set_title('Drawdown')
    ax.set_xlabel(None)
    ax.legend(title="portfolio")
    plt.show()

plot_drawdown(bt_ret)

Let's observe the evolution of the $1/N$ portfolio over time for a universe of 5 stocks, showing the effect of rebalancing (indicated with black vertical lines):

# Run backtest
bt_ret, bt_w = backtest_single_portfolio(EWP, "1/N", stock_prices, lookback=0, rebalance_every=90)

# Filter the weights DataFrame for the desired date range
filtered_w = bt_w.loc["2020-01":"2020-08"]

# Create the plot
fig, ax = plt.subplots(figsize=(12, 6))

# Calculate bar width (in days)
bar_width = (filtered_w.index[-1] - filtered_w.index[0]).days / len(filtered_w)

# Plot stacked bars
bottom = np.zeros(len(filtered_w))
for column in filtered_w.columns:
    ax.bar(filtered_w.index, filtered_w[column], bottom=bottom, width=3*bar_width, label=column, edgecolor='none')
    bottom += filtered_w[column]

# Customize the x-axis
ax.xaxis.set_major_locator(mdates.MonthLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
plt.xticks(rotation=45, ha='right')


# Customize other aspects of the plot
plt.title("Weight Allocation Over Time for Portfolio 1/N")
plt.xlabel(None)
plt.ylabel("Weight")
plt.legend(title="Stocks", bbox_to_anchor=(1.05, 1), loc='upper left')

# Add vertical lines for specific dates
plt.axvline(pd.to_datetime("2020-02-11"), color="black", linestyle="-", linewidth=3)
plt.axvline(pd.to_datetime("2020-06-19"), color="black", linestyle="-", linewidth=3)
plt.axhline(0.0, color="black", linestyle="--")
plt.axhline(0.2, color="black", linestyle="--")
plt.axhline(0.4, color="black", linestyle="--")
plt.axhline(0.6, color="black", linestyle="--")
plt.axhline(0.8, color="black", linestyle="--")
plt.axhline(1.0, color="black", linestyle="--")

# Adjust layout and display
plt.tight_layout()
plt.show()

Backtest function¶

Let's now rewrite the backtest function to accept multiple portfolios:

def backtest(portfolio_funcs, prices, lookback, rebalance_every=1, cost_bps=0):
    N = prices.shape[1]
    X = prices.pct_change().dropna()

    portf_rets = {}
    ws = {}

    for portfolio_name, portfolio_func in portfolio_funcs.items():
        # Initialize variables
        current_w = np.repeat(0, N)
        w = pd.DataFrame(index=X.index, columns=X.columns)
        portf_ret = pd.Series(index=X.index)
        portf_ret.name = portfolio_name

        for t in range(lookback, len(X)):
            # Rebalance portfolio if necessary
            if (t - lookback) % rebalance_every == 0:
                new_w = portfolio_func(X.iloc[t-lookback:t])  # Note that the row at time t is not included
                turnover = np.abs(new_w - current_w).sum()
                transaction_cost = turnover * cost_bps / 1e4
                current_w = new_w

            # Store weights
            w.iloc[t] = current_w

            # Calculate portfolio return for this period
            period_return = (X.iloc[t] * current_w).sum()
            portf_ret.iloc[t] = period_return - transaction_cost

            # Update normalized weights based on asset performance
            current_w = current_w * (1 + X.iloc[t])
            current_w = current_w / current_w.sum()

        # Keep a record (remove inital NaNs)
        portf_rets[portfolio_name] = portf_ret[lookback:]
        ws[portfolio_name] = w[lookback:]

    # Combine all portfolio returns into a single DataFrame
    portf_rets_df = pd.DataFrame(portf_rets)

    return portf_rets_df, ws

Heuristic portfolios¶

We now compare the following heuristic portfolios:

$1/N$ portfolio:

$$ \w = \frac{1}{N}\bm{1}; $$

GMRP:

$$ \begin{array}{ll} \underset{\w}{\textm{maximize}} & \w^\T\bmu\\ \textm{subject to} & \bm{1}^\T\w=1, \quad \w\ge\bm{0}; \end{array} $$

Quintile portfolio (sorting stocks by $\bm{\mu}$):

$$ \w = \frac{1}{N/5}\left[\begin{array}{c} \begin{array}{c} 1\\ \vdots\\ 1 \end{array}\\ \begin{array}{c} 0\\ \vdots\\ 0 \end{array} \end{array}\right]\begin{array}{c} \left.\begin{array}{c} \\ \\ \\ \end{array}\right\} 20\%\\ \left.\begin{array}{c} \\ \\ \\ \end{array}\right\} 80\% \end{array} $$

Quintile portfolio (sorting stocks by $\bm{\mu}/\bm{\sigma}$); and
Quintile portfolio (sorting stocks by $\bm{\mu}/\bm{\sigma}^2$).

# Get data
stock_prices = SP500_stocks_2015to2020[
                   ["AAPL", "AMZN", "AMD", "GM", "GOOGL", "MGM", "MSFT", "QCOM", "TSCO", "UPS"]
               ].loc["2019":]
T = stock_prices.shape[0]
T_trn = round(0.70*T)

# Define heuristic portfolios
def QuintP_mu(X_lin):
    X_log = np.log(1 + X_lin)
    mu = X_log.mean()
    idx = mu.argsort()[::-1]
    w = np.zeros(len(mu))
    w[idx[:len(mu)//5]] = 1 / (len(mu)//5)
    return w

def QuintP_mu_over_sigma(X_lin):
    X_log = np.log(1 + X_lin)
    mu = X_log.mean()
    sigma = X_log.std()
    idx = (mu / sigma).argsort()[::-1]
    w = np.zeros(len(mu))
    w[idx[:len(mu)//5]] = 1 / (len(mu)//5)
    return w

def QuintP_mu_over_sigma2(X_lin):
    X_log = np.log(1 + X_lin)
    mu = X_log.mean()
    sigma = X_log.std()
    idx = (mu / sigma**2).argsort()[::-1]
    w = np.zeros(len(mu))
    w[idx[:len(mu)//5]] = 1 / (len(mu)//5)
    return w

def GMRP(X_lin):
    X_log = np.log(1 + X_lin)
    mu = X_log.mean()
    w = np.zeros(len(mu))
    w[mu.argmax()] = 1
    return w

# Run backtest
bt_ret, bt_w = backtest(
    {
        "1/N": EWP,
        "GMRP": GMRP,
        "QuintP (sorted by mu)": QuintP_mu,
        "QuintP (sorted by mu/sigma)": QuintP_mu_over_sigma,
        "QuintP (sorted by mu/sigma2)": QuintP_mu_over_sigma2,
    },
    stock_prices, lookback=T_trn, rebalance_every=30)

bt_w["1/N"].head()

	AAPL	AMZN	AMD	GM	GOOGL	MGM	MSFT	QCOM	TSCO	UPS
Date
2020-03-19	0.1	0.1	0.1	0.1	0.1	0.1	0.1	0.1	0.1	0.1
2020-03-20	0.096425	0.099873	0.098908	0.102432	0.098993	0.10479	0.098768	0.103219	0.102368	0.094222
2020-03-23	0.09069	0.098444	0.098808	0.105369	0.09553	0.124511	0.095466	0.097139	0.102161	0.091883
2020-03-24	0.088295	0.100934	0.103324	0.101693	0.093774	0.124397	0.094015	0.099196	0.1045	0.089872
2020-03-25	0.087564	0.092752	0.103366	0.109933	0.090601	0.149244	0.092436	0.093593	0.096549	0.083963

# Extract the first row of each portfolio's weights and prepare for plotting
first_rows = {
    portfolio_name: weights.iloc[0]  # Get the first row of each DataFrame
    for portfolio_name, weights in bt_w.items()
}

# Convert the dictionary of first rows into a DataFrame
first_rows_df = pd.DataFrame(first_rows).T.reset_index()  # Transpose and reset index
first_rows_df.columns = ['portfolio'] + list(first_rows_df.columns[1:])

first_rows_df

	portfolio	AAPL	AMZN	AMD	GM	GOOGL	MGM	MSFT	QCOM	TSCO	UPS
0	1/N	0.1	0.1	0.1	0.1	0.1	0.1	0.1	0.1	0.1	0.1
1	GMRP	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2	QuintP (sorted by mu)	0.5	0.0	0.5	0.0	0.0	0.0	0.0	0.0	0.0	0.0
3	QuintP (sorted by mu/sigma)	0.5	0.0	0.5	0.0	0.0	0.0	0.0	0.0	0.0	0.0
4	QuintP (sorted by mu/sigma2)	0.5	0.0	0.0	0.0	0.0	0.0	0.5	0.0	0.0	0.0

# Barplot of weight allocation
melted_df = first_rows_df.melt(id_vars='portfolio', var_name='stocks', value_name='weights')

fig, ax = plt.subplots(figsize=(12, 4))
sns.barplot(x='stocks', y='weights', hue='portfolio', ax=ax, data=melted_df)
ax.set_title("Weight Allocation for Heuristic Portfolios")
plt.xticks(rotation=45)
plt.show()

# Plot of cumulative return
plot_cum_return(bt_ret)

# Plot of drawdown
plot_drawdown(bt_ret)

#
# Table of performance metrics
#
def print_table_performance_metrics(bt_ret):
    risk_free_rate = 0.0  # Risk-free rate (e.g., 0 for simplicity)
    trading_days = 252    # Number of trading days in a year

    # Calculate Annualized Return
    annualized_return = (1 + bt_ret.mean())**trading_days - 1

    # Calculate Annualized Volatility
    annualized_volatility = bt_ret.std() * np.sqrt(trading_days)

    # Calculate Sharpe Ratio
    sharpe_ratio = (bt_ret.mean() - risk_free_rate) / bt_ret.std()
    sharpe_ratio *= np.sqrt(trading_days)  # Annualize Sharpe ratio

    # Calculate Maximum Drawdown
    def calculate_max_drawdown(returns):
        cumulative_returns = (1 + returns).cumprod()
        running_max = cumulative_returns.cummax()
        drawdown = (cumulative_returns - running_max) / running_max
        max_drawdown = drawdown.min()
        return max_drawdown

    max_drawdown = bt_ret.apply(calculate_max_drawdown)

    # Combine all metrics into a single DataFrame
    performance_table = pd.DataFrame({
        'Annualized Return': annualized_return,
        'Annualized Volatility': annualized_volatility,
        'Sharpe Ratio': sharpe_ratio,
        'Maximum Drawdown': max_drawdown
    })

    # Display the performance table
    performance_table_styled = performance_table.style.format("{:.2f}")
    display(performance_table_styled)

print_table_performance_metrics(bt_ret)

	Annualized Return	Annualized Volatility	Sharpe Ratio	Maximum Drawdown
1/N	2.74	0.36	3.66	-0.11
GMRP	3.56	0.63	2.43	-0.19
QuintP (sorted by mu)	2.92	0.47	2.91	-0.20
QuintP (sorted by mu/sigma)	2.92	0.46	3.00	-0.20
QuintP (sorted by mu/sigma2)	1.82	0.40	2.61	-0.18

Risk-based portfolios¶

We now compare the following risk-based portfolios:

Global minimum variance portfolio (GMVP):

$$ \begin{array}{ll} \underset{\w}{\textm{minimize}} & \w^\T\bSigma\w\\ \textm{subject to} & \bm{1}^\T\w=1, \quad \w\ge\bm{0}; \end{array} $$

Inverse volatility portfolio (IVP):

$$ \w = \frac{\bm{\sigma}^{-1}}{\bm{1}^\T\bm{\sigma}^{-1}}; $$

Risk parity portfolio (RPP);
Most diversified portfolio (MDP):

$$ \begin{array}{ll} \underset{\w}{\textm{minimize}} & \dfrac{\w^\T\bm{\sigma}}{\sqrt{\w^\T\bmu\w}}\\ \textm{subject to} & \bm{1}^\T\w=1, \quad \w\ge\bm{0}; \end{array} $$

Maximum decorrelation portfolio (MDCP):

$$ \begin{array}{ll} \underset{\w}{\textm{minimize}} & \w^\T\bm{C}\w\\ \textm{subject to} & \bm{1}^\T\w=1, \quad \w\ge\bm{0}, \end{array} $$

where $\bm{C} = \textm{Diag}\left(\bSigma\right)^{-1/2} \bSigma \textm{Diag}\left(\bSigma\right)^{-1/2}.$

# Define risk-based portfolios
def GMVP(X_lin):
    # Estimate parameters
    X_log = np.log(1 + X_lin)
    Sigma = X_log.cov().values
    # Optimize portfolio
    N = len(Sigma)
    w = cp.Variable(N)
    prob = cp.Problem(cp.Minimize(cp.quad_form(w, Sigma)),
                      [cp.sum(w) == 1, w >= 0])
    prob.solve()
    return w.value

def IVP(X_lin):
    # Estimate parameters
    X_log = np.log(1 + X_lin)
    sigma = X_log.std()
    # Design portfolio
    w = 1 / sigma
    return w / w.sum()

def RPP(X_lin):
    # Estimate parameters
    X_log = np.log(1 + X_lin)
    Sigma = X_log.cov().values
    # Optimize portfolio
    N = len(Sigma)
    my_portfolio = rpp.RiskParityPortfolio(covariance=Sigma, weights=np.ones(N) / N, budget=np.ones(N) / N)
    my_portfolio.design()
    return my_portfolio.weights

def MDP(X_lin):
    # Estimate parameters
    X_log = np.log(1 + X_lin)
    Sigma = X_log.cov().values
    mu = np.sqrt(np.diag(Sigma))
    # Optimize portfolio
    N = len(Sigma)
    w = cp.Variable(N)
    prob = cp.Problem(cp.Minimize(cp.quad_form(w, Sigma)),
                      [mu @ w == 1, w >= 0])
    prob.solve()
    return w.value / w.value.sum()

def MDCP(X_lin):
    # Estimate parameters
    X_log = np.log(1 + X_lin)
    Sigma = X_log.cov().values
    D = np.diag(1 / np.sqrt(np.diag(Sigma)))
    C = D @ Sigma @ D
    # Optimize portfolio
    N = len(C)
    w = cp.Variable(N)
    prob = cp.Problem(cp.Minimize(cp.quad_form(w, C)),
                      [cp.sum(w) == 1, w >= 0])
    prob.solve()
    return w.value

# Run backtest
bt_ret, bt_w = backtest(
    {
        "GMVP": GMVP,
        "IVP": IVP,
        "RPP": RPP,
        "MDP": MDP,
        "MDCP": MDCP,
    },
    stock_prices, lookback=T_trn, rebalance_every=30)

#
# Barplot of weight allocation
#
first_rows = {
    portfolio_name: weights.iloc[0]  # Get the first row of each DataFrame
    for portfolio_name, weights in bt_w.items()
}
first_rows_df = pd.DataFrame(first_rows).T.reset_index()  # Transpose and reset index
first_rows_df.columns = ['portfolio'] + list(first_rows_df.columns[1:])
melted_df = first_rows_df.melt(id_vars='portfolio', var_name='stocks', value_name='weights')

fig, ax = plt.subplots(figsize=(12, 4))
sns.barplot(x='stocks', y='weights', hue='portfolio', ax=ax, data=melted_df)
ax.set_title("Weight Allocation for Heuristic Portfolios")
plt.xticks(rotation=45)
plt.show()

# Plot of cumulative return
plot_cum_return(bt_ret)

# Plot of drawdown
plot_drawdown(bt_ret)

# Table of performance metrics
print_table_performance_metrics(bt_ret)

	Annualized Return	Annualized Volatility	Sharpe Ratio	Maximum Drawdown
GMVP	2.17	0.29	3.95	-0.11
IVP	2.42	0.33	3.69	-0.11
RPP	2.45	0.33	3.74	-0.11
MDP	2.88	0.33	4.11	-0.09
MDCP	3.39	0.38	3.89	-0.10