Python Code for Portfolio Optimization Chapter 6 – Portfolio Basics
Daniel P. Palomar (2025). Portfolio Optimization: Theory and Application. Cambridge University Press.
Last update: March 3, 2025
Contributors:
Packages¶
The following packages are used in the examples:
# Core numerical computing
import numpy as np
import pandas as pd
# For financial data
import yfinance as yf # Lodaing financial data
import empyrical as emp # Performance metrics
# Book data (pip install "git+https://github.com/dppalomar/pob.git#subdirectory=python")
from pob_python import SP500_stocks_2015to2020, SP500_index_2015to2020
# Plotting
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
sns.set_theme(style="darkgrid")
# Optimization
import cvxpy as cp # interface for convex optimization solvers
import riskparityportfolio as rpp
from scipy.optimize import minimize, LinearConstraint # for optimization
Some math definitions for the equations:
$\def\bm#1{\boldsymbol{#1}}$ $\def\textm#1{\textsf{#1}}$ $\def\T{{\mkern-2mu\raise-1mu\mathsf{T}}}}$ $\def\R{\mathbb{R}}$ $\def\E{\rm I\kern-.2em E}$ $\def\w{\bm{w}}$ $\def\bmu{\bm{\mu}}}$ $\def\bSigma{\bm{\Sigma}}}$
Preliminaries¶
We start with basic aspects such as data loading and plotting.
Loading market data¶
Loading market data could be conveniently done with the package yfinance
:
stocks = yf.download(["AAPL", "AMD", "ADI", "A", "MOH", "CVS", "APD", "AA", "CF"], start='2021-10-01', end='2021-12-31')
stocks[['Adj Close']].head()
Price | Adj Close | ||||||||
---|---|---|---|---|---|---|---|---|---|
Ticker | A | AA | AAPL | ADI | AMD | APD | CF | CVS | MOH |
Date |
However, for convenience we will use stock data and cryptocurrency data from the official portfolio optimization book package pob_python
:
from pob_python import SP500_stocks_2015to2020, cryptos_2017to2021_hourly
# stock S&P500 market data
SP500_stocks_2015to2020.iloc[:, :5].head()
A | AAL | AAP | AAPL | ABBV | |
---|---|---|---|---|---|
Date | |||||
2015-01-05 | 37.7523 | 51.0467 | 154.217 | 24.239 | 50.8722 |
2015-01-06 | 37.1642 | 50.2556 | 154.108 | 24.241 | 50.6204 |
2015-01-07 | 37.6574 | 50.2272 | 157.420 | 24.581 | 52.6663 |
2015-01-08 | 38.7862 | 50.8430 | 158.800 | 25.526 | 53.2171 |
2015-01-09 | 38.5016 | 49.2891 | 157.991 | 25.553 | 51.7614 |
SP500_stocks_2015to2020[["AMD", "MGM", "AAPL"]].head()
AMD | MGM | AAPL | |
---|---|---|---|
Date | |||
2015-01-05 | 2.66 | 19.3206 | 24.239 |
2015-01-06 | 2.63 | 18.5833 | 24.241 |
2015-01-07 | 2.58 | 19.3112 | 24.581 |
2015-01-08 | 2.61 | 19.5853 | 25.526 |
2015-01-09 | 2.63 | 19.3868 | 25.553 |
# crypto data
cryptos_2017to2021_hourly.iloc[:, :5].head()
BTC | ETH | ADA | DOT | XRP | |
---|---|---|---|---|---|
Date | |||||
2021-01-07 09:00:00 | 37485.61 | 1204.525106 | 0.330998 | 10.005659 | 0.305133 |
2021-01-07 10:00:00 | 37040.38 | 1183.403101 | 0.308546 | 9.677910 | 0.323733 |
2021-01-07 11:00:00 | 37806.57 | 1201.001309 | 0.311904 | 9.823281 | 0.357990 |
2021-01-07 12:00:00 | 37936.25 | 1227.161815 | 0.317906 | 9.966991 | 0.336457 |
2021-01-07 13:00:00 | 38154.69 | 1218.126633 | 0.318592 | 9.882065 | 0.328512 |
Plotting data¶
# Plot stock prices
stock_prices = SP500_stocks_2015to2020[["AMD", "MGM", "AAPL"]].loc["2019-10":]
fig, ax = plt.subplots(figsize=(12, 6))
stock_prices.plot(ax=ax)
ax.set_title('Prices of stocks')
ax.set_xlabel(None)
ax.legend(title="Stocks")
plt.show()
stock_returns = stock_prices.pct_change()
# Plot returns
fig, ax = plt.subplots(figsize=(12, 6))
stock_returns.plot(ax=ax)
ax.set_title('Returns of stocks')
ax.set_xlabel(None)
ax.legend(title="Stocks")
plt.show()
# Compute drawdowns
cumulative_returns = (1 + stock_returns).cumprod()
running_max = cumulative_returns.cummax()
drawdowns = (cumulative_returns - running_max) / running_max
# Plot drawdowns
fig, ax = plt.subplots(figsize=(12, 6))
drawdowns.plot(ax=ax)
ax.set_title('Drawdown of stocks')
ax.set_xlabel(None)
ax.legend(title="Stocks")
plt.show()
Backtesting¶
We now consider how to perform backtests and explore rebalancing aspects.
Naive backtesting¶
For illustrative purposes we now backtest the $1/N$ portfolio:
stock_prices = SP500_stocks_2015to2020[["AMD", "MGM", "AAPL", "AMZN", "TSCO"]].loc["2019-10":]
T, N = stock_prices.shape
# linear returns
X = stock_prices / stock_prices.shift(1) - 1
X = X.dropna()
# portfolio
w = np.repeat(1/N, N)
# naive backtest (assuming daily rebalancing and no transaction cost)
portf_ret = X @ w
portf_ret.head()
Date 2019-10-02 -0.013131 2019-10-03 0.012374 2019-10-04 0.009848 2019-10-07 -0.003872 2019-10-08 -0.016877 dtype: float64
However, multiplying the matrix of linear returns of the assets X
by the portfolio w
implicitly assumes that we are rebalancing at every period (and also ignoring the transaction costs). To perform more realistic backtests, we now define a rolling-window backtest function that rebalances the portfolio every certain number of periods using a specified lookback window of the data (as well as transaction costs).
def EWP(X):
N = X.shape[1]
return np.repeat(1/N, N)
def backtest_single_portfolio(portfolio_func, portfolio_name, prices, lookback, rebalance_every=1, cost_bps=0):
N = prices.shape[1]
# Calculate returns
X = prices.pct_change().dropna()
# Initialize variables
current_w = np.repeat(0, N)
w = pd.DataFrame(index=X.index, columns=X.columns)
portf_ret = pd.Series(index=X.index)
portf_ret.name = portfolio_name
for t in range(lookback, len(X)):
# Rebalance portfolio if necessary
if (t - lookback) % rebalance_every == 0:
new_w = portfolio_func(X.iloc[t-lookback:t]) # Note that the row at time t is not included
turnover = np.abs(new_w - current_w).sum()
transaction_cost = turnover * cost_bps / 1e4
current_w = new_w
# Store weights
w.iloc[t] = current_w
# Calculate portfolio return for this period
period_return = (X.iloc[t] * current_w).sum()
portf_ret.iloc[t] = period_return - transaction_cost
# Update normalized weights based on asset performance
current_w = current_w * (1 + X.iloc[t])
current_w = current_w / current_w.sum()
return portf_ret, w
As a sanity check, let’s start by reproducing the previous naive backtest assuming daily rebalancing (note that we choose a lookback of 0 since the $1/N$ portfolio does not really need any data):
bt_ret, bt_w = backtest_single_portfolio(EWP, "1/N", stock_prices, lookback=0, rebalance_every=1)
bt_ret.head()
Date 2019-10-02 -0.013131 2019-10-03 0.012374 2019-10-04 0.009848 2019-10-07 -0.003872 2019-10-08 -0.016877 Name: 1/N, dtype: float64
Rebalancing in backtesting¶
Now we can perform a more realistic backtest rebalancing, say, every week (i.e., 5 days), including transaction costs:
bt_ret, bt_w = backtest_single_portfolio(EWP, "1/N", stock_prices, lookback=0, rebalance_every=5, cost_bps=30e-4)
bt_ret.head()
Date 2019-10-02 -0.013132 2019-10-03 0.012464 2019-10-04 0.009735 2019-10-07 -0.003923 2019-10-08 -0.016739 Name: 1/N, dtype: float64
def plot_cum_return(returns):
cumulative_returns = (1 + returns).cumprod() - 1
fig, ax = plt.subplots(figsize=(12, 6))
cumulative_returns.plot(ax=ax)
ax.set_title('Cumulative Return')
ax.set_xlabel(None)
ax.legend(title="portfolio")
plt.show()
plot_cum_return(bt_ret)
def plot_drawdown(returns):
cumulative_returns = (1 + returns).cumprod()
running_max = cumulative_returns.cummax()
drawdowns = (cumulative_returns - running_max) / running_max
fig, ax = plt.subplots(figsize=(12, 6))
drawdowns.plot(ax=ax)
ax.set_title('Drawdown')
ax.set_xlabel(None)
ax.legend(title="portfolio")
plt.show()
plot_drawdown(bt_ret)
Let's observe the evolution of the $1/N$ portfolio over time for a universe of 5 stocks, showing the effect of rebalancing (indicated with black vertical lines):
# Run backtest
bt_ret, bt_w = backtest_single_portfolio(EWP, "1/N", stock_prices, lookback=0, rebalance_every=90)
# Filter the weights DataFrame for the desired date range
filtered_w = bt_w.loc["2020-01":"2020-08"]
# Create the plot
fig, ax = plt.subplots(figsize=(12, 6))
# Calculate bar width (in days)
bar_width = (filtered_w.index[-1] - filtered_w.index[0]).days / len(filtered_w)
# Plot stacked bars
bottom = np.zeros(len(filtered_w))
for column in filtered_w.columns:
ax.bar(filtered_w.index, filtered_w[column], bottom=bottom, width=3*bar_width, label=column, edgecolor='none')
bottom += filtered_w[column]
# Customize the x-axis
ax.xaxis.set_major_locator(mdates.MonthLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
plt.xticks(rotation=45, ha='right')
# Customize other aspects of the plot
plt.title("Weight Allocation Over Time for Portfolio 1/N")
plt.xlabel(None)
plt.ylabel("Weight")
plt.legend(title="Stocks", bbox_to_anchor=(1.05, 1), loc='upper left')
# Add vertical lines for specific dates
plt.axvline(pd.to_datetime("2020-02-11"), color="black", linestyle="-", linewidth=3)
plt.axvline(pd.to_datetime("2020-06-19"), color="black", linestyle="-", linewidth=3)
plt.axhline(0.0, color="black", linestyle="--")
plt.axhline(0.2, color="black", linestyle="--")
plt.axhline(0.4, color="black", linestyle="--")
plt.axhline(0.6, color="black", linestyle="--")
plt.axhline(0.8, color="black", linestyle="--")
plt.axhline(1.0, color="black", linestyle="--")
# Adjust layout and display
plt.tight_layout()
plt.show()
Backtest function¶
Let's now rewrite the backtest function to accept multiple portfolios:
def backtest(portfolio_funcs, prices, lookback, rebalance_every=1, cost_bps=0):
N = prices.shape[1]
X = prices.pct_change().dropna()
portf_rets = {}
ws = {}
for portfolio_name, portfolio_func in portfolio_funcs.items():
# Initialize variables
current_w = np.repeat(0, N)
w = pd.DataFrame(index=X.index, columns=X.columns)
portf_ret = pd.Series(index=X.index)
portf_ret.name = portfolio_name
for t in range(lookback, len(X)):
# Rebalance portfolio if necessary
if (t - lookback) % rebalance_every == 0:
new_w = portfolio_func(X.iloc[t-lookback:t]) # Note that the row at time t is not included
turnover = np.abs(new_w - current_w).sum()
transaction_cost = turnover * cost_bps / 1e4
current_w = new_w
# Store weights
w.iloc[t] = current_w
# Calculate portfolio return for this period
period_return = (X.iloc[t] * current_w).sum()
portf_ret.iloc[t] = period_return - transaction_cost
# Update normalized weights based on asset performance
current_w = current_w * (1 + X.iloc[t])
current_w = current_w / current_w.sum()
# Keep a record (remove inital NaNs)
portf_rets[portfolio_name] = portf_ret[lookback:]
ws[portfolio_name] = w[lookback:]
# Combine all portfolio returns into a single DataFrame
portf_rets_df = pd.DataFrame(portf_rets)
return portf_rets_df, ws
Heuristic portfolios¶
We now compare the following heuristic portfolios:
- $1/N$ portfolio:
- GMRP:
- Quintile portfolio (sorting stocks by $\bm{\mu}$):
- Quintile portfolio (sorting stocks by $\bm{\mu}/\bm{\sigma}$); and
- Quintile portfolio (sorting stocks by $\bm{\mu}/\bm{\sigma}^2$).
# Get data
stock_prices = SP500_stocks_2015to2020[
["AAPL", "AMZN", "AMD", "GM", "GOOGL", "MGM", "MSFT", "QCOM", "TSCO", "UPS"]
].loc["2019":]
T = stock_prices.shape[0]
T_trn = round(0.70*T)
# Define heuristic portfolios
def QuintP_mu(X_lin):
X_log = np.log(1 + X_lin)
mu = X_log.mean()
idx = mu.argsort()[::-1]
w = np.zeros(len(mu))
w[idx[:len(mu)//5]] = 1 / (len(mu)//5)
return w
def QuintP_mu_over_sigma(X_lin):
X_log = np.log(1 + X_lin)
mu = X_log.mean()
sigma = X_log.std()
idx = (mu / sigma).argsort()[::-1]
w = np.zeros(len(mu))
w[idx[:len(mu)//5]] = 1 / (len(mu)//5)
return w
def QuintP_mu_over_sigma2(X_lin):
X_log = np.log(1 + X_lin)
mu = X_log.mean()
sigma = X_log.std()
idx = (mu / sigma**2).argsort()[::-1]
w = np.zeros(len(mu))
w[idx[:len(mu)//5]] = 1 / (len(mu)//5)
return w
def GMRP(X_lin):
X_log = np.log(1 + X_lin)
mu = X_log.mean()
w = np.zeros(len(mu))
w[mu.argmax()] = 1
return w
# Run backtest
bt_ret, bt_w = backtest(
{
"1/N": EWP,
"GMRP": GMRP,
"QuintP (sorted by mu)": QuintP_mu,
"QuintP (sorted by mu/sigma)": QuintP_mu_over_sigma,
"QuintP (sorted by mu/sigma2)": QuintP_mu_over_sigma2,
},
stock_prices, lookback=T_trn, rebalance_every=30)
bt_w["1/N"].head()
AAPL | AMZN | AMD | GM | GOOGL | MGM | MSFT | QCOM | TSCO | UPS | |
---|---|---|---|---|---|---|---|---|---|---|
Date | ||||||||||
2020-03-19 | 0.1 | 0.1 | 0.1 | 0.1 | 0.1 | 0.1 | 0.1 | 0.1 | 0.1 | 0.1 |
2020-03-20 | 0.096425 | 0.099873 | 0.098908 | 0.102432 | 0.098993 | 0.10479 | 0.098768 | 0.103219 | 0.102368 | 0.094222 |
2020-03-23 | 0.09069 | 0.098444 | 0.098808 | 0.105369 | 0.09553 | 0.124511 | 0.095466 | 0.097139 | 0.102161 | 0.091883 |
2020-03-24 | 0.088295 | 0.100934 | 0.103324 | 0.101693 | 0.093774 | 0.124397 | 0.094015 | 0.099196 | 0.1045 | 0.089872 |
2020-03-25 | 0.087564 | 0.092752 | 0.103366 | 0.109933 | 0.090601 | 0.149244 | 0.092436 | 0.093593 | 0.096549 | 0.083963 |
# Extract the first row of each portfolio's weights and prepare for plotting
first_rows = {
portfolio_name: weights.iloc[0] # Get the first row of each DataFrame
for portfolio_name, weights in bt_w.items()
}
# Convert the dictionary of first rows into a DataFrame
first_rows_df = pd.DataFrame(first_rows).T.reset_index() # Transpose and reset index
first_rows_df.columns = ['portfolio'] + list(first_rows_df.columns[1:])
first_rows_df
portfolio | AAPL | AMZN | AMD | GM | GOOGL | MGM | MSFT | QCOM | TSCO | UPS | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1/N | 0.1 | 0.1 | 0.1 | 0.1 | 0.1 | 0.1 | 0.1 | 0.1 | 0.1 | 0.1 |
1 | GMRP | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2 | QuintP (sorted by mu) | 0.5 | 0.0 | 0.5 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
3 | QuintP (sorted by mu/sigma) | 0.5 | 0.0 | 0.5 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
4 | QuintP (sorted by mu/sigma2) | 0.5 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.5 | 0.0 | 0.0 | 0.0 |
# Barplot of weight allocation
melted_df = first_rows_df.melt(id_vars='portfolio', var_name='stocks', value_name='weights')
fig, ax = plt.subplots(figsize=(12, 4))
sns.barplot(x='stocks', y='weights', hue='portfolio', ax=ax, data=melted_df)
ax.set_title("Weight Allocation for Heuristic Portfolios")
plt.xticks(rotation=45)
plt.show()
# Plot of cumulative return
plot_cum_return(bt_ret)
# Plot of drawdown
plot_drawdown(bt_ret)
#
# Table of performance metrics
#
def print_table_performance_metrics(bt_ret):
risk_free_rate = 0.0 # Risk-free rate (e.g., 0 for simplicity)
trading_days = 252 # Number of trading days in a year
# Calculate Annualized Return
annualized_return = (1 + bt_ret.mean())**trading_days - 1
# Calculate Annualized Volatility
annualized_volatility = bt_ret.std() * np.sqrt(trading_days)
# Calculate Sharpe Ratio
sharpe_ratio = (bt_ret.mean() - risk_free_rate) / bt_ret.std()
sharpe_ratio *= np.sqrt(trading_days) # Annualize Sharpe ratio
# Calculate Maximum Drawdown
def calculate_max_drawdown(returns):
cumulative_returns = (1 + returns).cumprod()
running_max = cumulative_returns.cummax()
drawdown = (cumulative_returns - running_max) / running_max
max_drawdown = drawdown.min()
return max_drawdown
max_drawdown = bt_ret.apply(calculate_max_drawdown)
# Combine all metrics into a single DataFrame
performance_table = pd.DataFrame({
'Annualized Return': annualized_return,
'Annualized Volatility': annualized_volatility,
'Sharpe Ratio': sharpe_ratio,
'Maximum Drawdown': max_drawdown
})
# Display the performance table
performance_table_styled = performance_table.style.format("{:.2f}")
display(performance_table_styled)
print_table_performance_metrics(bt_ret)
Annualized Return | Annualized Volatility | Sharpe Ratio | Maximum Drawdown | |
---|---|---|---|---|
1/N | 2.74 | 0.36 | 3.66 | -0.11 |
GMRP | 3.56 | 0.63 | 2.43 | -0.19 |
QuintP (sorted by mu) | 2.92 | 0.47 | 2.91 | -0.20 |
QuintP (sorted by mu/sigma) | 2.92 | 0.46 | 3.00 | -0.20 |
QuintP (sorted by mu/sigma2) | 1.82 | 0.40 | 2.61 | -0.18 |
Risk-based portfolios¶
We now compare the following risk-based portfolios:
- Global minimum variance portfolio (GMVP):
- Inverse volatility portfolio (IVP):
Risk parity portfolio (RPP);
Most diversified portfolio (MDP):
- Maximum decorrelation portfolio (MDCP):
where $\bm{C} = \textm{Diag}\left(\bSigma\right)^{-1/2} \bSigma \textm{Diag}\left(\bSigma\right)^{-1/2}.$
# Define risk-based portfolios
def GMVP(X_lin):
# Estimate parameters
X_log = np.log(1 + X_lin)
Sigma = X_log.cov().values
# Optimize portfolio
N = len(Sigma)
w = cp.Variable(N)
prob = cp.Problem(cp.Minimize(cp.quad_form(w, Sigma)),
[cp.sum(w) == 1, w >= 0])
prob.solve()
return w.value
def IVP(X_lin):
# Estimate parameters
X_log = np.log(1 + X_lin)
sigma = X_log.std()
# Design portfolio
w = 1 / sigma
return w / w.sum()
def RPP(X_lin):
# Estimate parameters
X_log = np.log(1 + X_lin)
Sigma = X_log.cov().values
# Optimize portfolio
N = len(Sigma)
my_portfolio = rpp.RiskParityPortfolio(covariance=Sigma, weights=np.ones(N) / N, budget=np.ones(N) / N)
my_portfolio.design()
return my_portfolio.weights
def MDP(X_lin):
# Estimate parameters
X_log = np.log(1 + X_lin)
Sigma = X_log.cov().values
mu = np.sqrt(np.diag(Sigma))
# Optimize portfolio
N = len(Sigma)
w = cp.Variable(N)
prob = cp.Problem(cp.Minimize(cp.quad_form(w, Sigma)),
[mu @ w == 1, w >= 0])
prob.solve()
return w.value / w.value.sum()
def MDCP(X_lin):
# Estimate parameters
X_log = np.log(1 + X_lin)
Sigma = X_log.cov().values
D = np.diag(1 / np.sqrt(np.diag(Sigma)))
C = D @ Sigma @ D
# Optimize portfolio
N = len(C)
w = cp.Variable(N)
prob = cp.Problem(cp.Minimize(cp.quad_form(w, C)),
[cp.sum(w) == 1, w >= 0])
prob.solve()
return w.value
# Run backtest
bt_ret, bt_w = backtest(
{
"GMVP": GMVP,
"IVP": IVP,
"RPP": RPP,
"MDP": MDP,
"MDCP": MDCP,
},
stock_prices, lookback=T_trn, rebalance_every=30)
#
# Barplot of weight allocation
#
first_rows = {
portfolio_name: weights.iloc[0] # Get the first row of each DataFrame
for portfolio_name, weights in bt_w.items()
}
first_rows_df = pd.DataFrame(first_rows).T.reset_index() # Transpose and reset index
first_rows_df.columns = ['portfolio'] + list(first_rows_df.columns[1:])
melted_df = first_rows_df.melt(id_vars='portfolio', var_name='stocks', value_name='weights')
fig, ax = plt.subplots(figsize=(12, 4))
sns.barplot(x='stocks', y='weights', hue='portfolio', ax=ax, data=melted_df)
ax.set_title("Weight Allocation for Heuristic Portfolios")
plt.xticks(rotation=45)
plt.show()
# Plot of cumulative return
plot_cum_return(bt_ret)
# Plot of drawdown
plot_drawdown(bt_ret)
# Table of performance metrics
print_table_performance_metrics(bt_ret)
Annualized Return | Annualized Volatility | Sharpe Ratio | Maximum Drawdown | |
---|---|---|---|---|
GMVP | 2.17 | 0.29 | 3.95 | -0.11 |
IVP | 2.42 | 0.33 | 3.69 | -0.11 |
RPP | 2.45 | 0.33 | 3.74 | -0.11 |
MDP | 2.88 | 0.33 | 4.11 | -0.09 |
MDCP | 3.39 | 0.38 | 3.89 | -0.10 |