Python Code for Portfolio Optimization Chapter 2 – Financial Data: Stylized Facts
Daniel P. Palomar (2025). Portfolio Optimization: Theory and Application. Cambridge University Press.
Last update: February 17, 2025
Packages¶
The following packages are used in the examples:
# Core data handling
import yfinance as yf
import pandas as pd
import numpy as np
# Book data (pip install "git+https://github.com/dppalomar/pob.git#subdirectory=python")
from pob_python import SP500_stocks_2015to2020, cryptos_2017to2021_daily
# Statistical analysis
from scipy.stats import norm, skew, kurtosis
import arch
# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme(style="darkgrid")
import statsmodels.api as sm
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
Prices and returns¶
First download data for S&P 500 and Bitcoin:
# S&P 500 data
sp500 = yf.download('^GSPC', start='2007-01-01', end='2022-11-04')
sp500_prices = sp500['Close']
# Bitcoin data
btc = yf.download('BTC-USD', start='2017-01-01', end='2022-11-04')
btc_prices = btc['Close']
- S&P 500 price over time:
# Plot S&P 500 price
fig, ax = plt.subplots(figsize=(12, 6))
np.log(sp500_prices).plot(ax=ax)
<Axes: xlabel='Date'>
- Bitcoin price over time:
# Plot Bitcoin price
fig, ax = plt.subplots(figsize=(12, 6))
np.log(btc_prices).plot(ax=ax)
<Axes: xlabel='Date'>
Now, plot daily log-returns (recall that linear returns can be similarly plotted, but they are almost identical):
# S&P 500 returns
sp500_returns = np.log(sp500_prices).diff().dropna()
# Bitcoin returns
btc_returns = np.log(btc_prices).diff().dropna()
def plot_returns(returns, title):
fig, ax = plt.subplots(figsize=(12,6))
ax.plot(returns, linewidth=0.5)
ax.set_title(title)
ax.set_xlabel('Date')
ax.set_ylabel('Log Return')
plt.show()
- S&P 500 returns:
plot_returns(sp500_returns, 'S&P 500 Daily Log Returns')
We can observe: High-volatility period during global financial crisis in 2008, as well as the high peak in volatility in early 2020 due to the COVID-19 pandemic.
- Bitcoin returns:
plot_returns(btc_returns, 'Bitcoin Daily Log Returns')
We can observe: Bitcoin flash crash on March 12, 2020, with a drop close to 50% in a single day.
Non-Gaussianity¶
Histogram (with Gaussian fit) and Q-Q plots for S&P 500 and Bitcoin:
def analyze_distribution(returns, asset_name):
print(f"{asset_name} Distribution Properties:")
print(f"Skewness: {skew(returns).item():.4f}") # Fixed line
print(f"Excess Kurtosis: {kurtosis(returns, fisher=False).item():.4f}")
fig, ax = plt.subplots(1,2, figsize=(15,5))
# Histogram with normal fit
sns.histplot(returns, kde=False, ax=ax[0], stat='density')
x = np.linspace(returns.min(), returns.max(), 100)
ax[0].plot(x, norm.pdf(x, returns.mean(), 0.6*returns.std()))
ax[0].set_title('Return Distribution')
# Q-Q Plot
sm.graphics.qqplot(returns.squeeze(), line='45', fit=True, ax=ax[1])
ax[1].set_xlim(-4, 4)
ax[1].set_title('Q-Q Plot')
plt.tight_layout()
plt.show()
- Histogram and Q-Q plot for S&P 500 daily log-returns:
analyze_distribution(sp500_returns, 'S&P 500')
S&P 500 Distribution Properties: Skewness: -0.5376 Excess Kurtosis: 14.9277
- Histogram and Q-Q plot for Bitcoin daily log-returns:
analyze_distribution(btc_returns, 'Bitcoin')
Bitcoin Distribution Properties: Skewness: -0.7073 Excess Kurtosis: 13.3596
Temporal structure¶
Linear structure (autocorrelation)¶
def plot_autocorrelation(returns, title):
fig, (ax1, ax2) = plt.subplots(2,1, figsize=(12,8))
plot_acf(returns, ax=ax1, lags=40)
plot_pacf(returns, ax=ax2, lags=40)
fig.suptitle(title)
plt.tight_layout()
plt.show()
- Autocorrelation analysis of S&P 500 daily log-returns:
plot_autocorrelation(sp500_returns, 'S&P 500 ACF/PACF')
- Autocorrelation analysis of Bitcoin daily log-returns:
plot_autocorrelation(btc_returns, 'Bitcoin ACF/PACF')
Nonlinear structure¶
Volatility clustering¶
def estimate_volatility(returns):
model = arch.arch_model(returns, vol='Garch', p=1, q=1, rescale=False)
result = model.fit(disp='off')
return result.conditional_volatility
sp500_vol = estimate_volatility(sp500_returns)
btc_vol = estimate_volatility(btc_returns)
def plot_volatility(returns, vol, title):
fig, ax = plt.subplots(figsize=(12,6))
ax.plot(returns, alpha=0.5, label='Returns')
ax.plot(vol, color='red', label='Conditional Volatility')
ax.set_title(title)
ax.legend()
plt.show()
- Volatility clustering in S&P 500:
plot_volatility(sp500_returns, sp500_vol, 'S&P 500 Volatility Clustering')
- Volatility clustering in Bitcoin:
plot_volatility(btc_returns, btc_vol, 'Bitcoin Volatility Clustering')
Autocorrelation of absolute value of log-returns:¶
- S&P 500:
plot_autocorrelation(abs(sp500_returns), 'S&P 500 ACF/PACF of absolute value of returns')
- Bitcoin:
plot_autocorrelation(abs(btc_returns), 'Bitcoin ACF/PACF of absolute value of returns')
Asset structure¶
Correlation matrix of returns for stocks:
from pob_python import SP500_stocks_2015to2020, cryptos_2017to2021_hourly
def plot_correlation_matrix(prices, title):
returns = np.log(prices).diff().dropna()
corr_matrix = returns.corr()
plt.figure(figsize=(12,10))
sns.heatmap(corr_matrix, annot=False, cmap='viridis',
xticklabels=True, yticklabels=True)
plt.title(title)
plt.show()
- Stocks correlation matrix:
# Example 40 random stocks
data = SP500_stocks_2015to2020.sample(n=40, axis='columns')[-200:]
data.head()
WMT | LYV | J | CDNS | AIV | SJM | AEE | MMC | DGX | TWTR | ... | FTI | AWK | PEG | LH | SYY | SWK | EL | FDX | FCX | MRK | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Date | |||||||||||||||||||||
2019-12-06 | 118.2439 | 70.48 | 84.4506 | 67.18 | 51.0220 | 104.818 | 73.3020 | 106.8198 | 105.0778 | 30.19 | ... | 18.6079 | 121.1977 | 56.5589 | 171.68 | 80.7791 | 156.573 | 196.9802 | 154.6401 | 11.5970 | 86.2047 |
2019-12-09 | 117.8293 | 69.05 | 85.2652 | 66.03 | 51.0608 | 104.066 | 73.0680 | 106.9086 | 103.4169 | 30.21 | ... | 18.4610 | 121.0100 | 56.3157 | 169.08 | 81.1305 | 156.297 | 197.7565 | 154.5413 | 12.0553 | 86.0786 |
2019-12-10 | 117.6121 | 69.60 | 86.5465 | 65.85 | 50.5468 | 103.607 | 73.2005 | 107.2638 | 103.1319 | 29.84 | ... | 18.7059 | 120.8815 | 56.1211 | 167.54 | 80.5449 | 154.986 | 196.9802 | 155.0252 | 12.2346 | 86.4472 |
2019-12-11 | 117.4739 | 69.44 | 87.5001 | 65.89 | 49.9746 | 102.014 | 73.5538 | 107.5303 | 102.9256 | 30.55 | ... | 18.9409 | 119.4489 | 56.6854 | 167.42 | 80.8865 | 157.282 | 197.6172 | 157.0790 | 12.7925 | 86.3308 |
2019-12-12 | 118.2242 | 69.43 | 89.4172 | 66.88 | 49.4606 | 100.100 | 73.3183 | 108.8230 | 103.5054 | 30.30 | ... | 19.4894 | 117.0086 | 56.9091 | 169.16 | 81.2671 | 165.334 | 199.6674 | 162.8159 | 13.0117 | 86.9033 |
5 rows × 40 columns
plot_correlation_matrix(data, 'Stock Correlation Matrix')
- Crypto correlation matrix:
# Example 40 random cryptos
data = cryptos_2017to2021_hourly.sample(n=40, axis='columns')[-200:]
data.head()
ONT | AVAX | SAND | RSR | CHZ | COMP | DCR | DGB | AR | GLM | ... | SOL | SNX | ADA | BTC | ETH | AAVE | STORJ | VET | WAVES | XEM | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Date | |||||||||||||||||||||
2021-06-08 23:00:00 | 0.963655 | 14.817984 | 0.281052 | 0.032231 | 0.233320 | 349.545935 | 129.477529 | 0.060947 | 17.069404 | 0.280718 | ... | 41.124720 | 10.440982 | 1.582169 | 33379.10 | 2507.170959 | 329.885645 | 1.003042 | 0.112821 | 14.421106 | 0.164559 |
2021-06-09 00:00:00 | 0.940558 | 14.312923 | 0.268684 | 0.031246 | 0.225931 | 341.889509 | 127.633050 | 0.058900 | 16.022042 | 0.294993 | ... | 40.686364 | 10.102118 | 1.528900 | 32886.64 | 2452.882931 | 321.664226 | 0.983311 | 0.108809 | 13.744971 | 0.161145 |
2021-06-09 01:00:00 | 0.924639 | 14.104410 | 0.265391 | 0.030621 | 0.219208 | 335.738749 | 126.320769 | 0.058197 | 15.589102 | 0.293361 | ... | 40.246044 | 9.992155 | 1.496075 | 32523.37 | 2430.634057 | 314.728651 | 0.959114 | 0.106257 | 13.524193 | 0.159365 |
2021-06-09 02:00:00 | 0.927200 | 14.036190 | 0.265900 | 0.030781 | 0.220543 | 337.617939 | 128.118645 | 0.056569 | 16.006285 | 0.291866 | ... | 40.692296 | 10.053600 | 1.511590 | 32867.79 | 2450.819629 | 315.202106 | 0.955795 | 0.108102 | 13.645392 | 0.160395 |
2021-06-09 03:00:00 | 0.929342 | 13.900369 | 0.267827 | 0.030940 | 0.222477 | 334.898374 | 127.702393 | 0.056753 | 16.073216 | 0.292802 | ... | 40.509526 | 10.068640 | 1.508045 | 32862.17 | 2455.527067 | 312.716410 | 0.960561 | 0.108192 | 13.743288 | 0.159710 |
5 rows × 40 columns
plot_correlation_matrix(data, 'Crypto Correlation Matrix')