Python Code for Portfolio Optimization Chapter 2 – Financial Data: Stylized Facts
Daniel P. Palomar (2025). Portfolio Optimization: Theory and Application. Cambridge University Press.
Last update: February 8, 2026
Contributors:
Libraries¶
The following libraries are used in the examples:
# Core data handling
import yfinance as yf
import pandas as pd
import numpy as np
# Book data (pip install "git+https://github.com/dppalomar/pob.git#subdirectory=python")
from pob_python import SP500_stocks_2015to2020, cryptos_2017to2021_daily
# Statistical analysis
from scipy.stats import norm, skew, kurtosis
import arch
# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme(style="darkgrid")
import statsmodels.api as sm
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
Prices and Returns¶
First download data for S&P 500 and Bitcoin:
# S&P 500 data
sp500 = yf.download('^GSPC', start='2007-01-01', end='2022-11-04')
sp500_prices = sp500['Close']
# Bitcoin data
btc = yf.download('BTC-USD', start='2017-01-01', end='2022-11-04')
btc_prices = btc['Close']
Prices¶
S&P 500 price over time:
# Plot S&P 500 price
fig, ax = plt.subplots(figsize=(12, 6))
np.log(sp500_prices).plot(ax=ax)
<Axes: xlabel='Date'>
Bitcoin price over time:
# Plot Bitcoin price
fig, ax = plt.subplots(figsize=(12, 6))
np.log(btc_prices).plot(ax=ax)
<Axes: xlabel='Date'>
Returns¶
Now, plot daily log-returns (recall that linear returns can be similarly plotted, but they are almost identical):
# S&P 500 returns
sp500_returns = np.log(sp500_prices).diff().dropna()
# Bitcoin returns
btc_returns = np.log(btc_prices).diff().dropna()
def plot_returns(returns, title):
fig, ax = plt.subplots(figsize=(12,6))
ax.plot(returns, linewidth=0.5)
ax.set_title(title)
ax.set_xlabel('Date')
ax.set_ylabel('Log Return')
plt.show()
S&P 500 returns:
plot_returns(sp500_returns, 'S&P 500 Daily Log Returns')
We can observe: High-volatility period during global financial crisis in 2008, as well as the high peak in volatility in early 2020 due to the COVID-19 pandemic.
Bitcoin returns:
plot_returns(btc_returns, 'Bitcoin Daily Log Returns')
We can observe: Bitcoin flash crash on March 12, 2020, with a drop close to 50% in a single day.
Non-Gaussianity¶
Histogram (with Gaussian fit) and Q-Q plots for S&P 500 and Bitcoin:
def analyze_distribution(returns, asset_name):
print(f"{asset_name} Distribution Properties:")
print(f"Skewness: {skew(returns).item():.4f}") # Fixed line
print(f"Excess Kurtosis: {kurtosis(returns, fisher=False).item():.4f}")
fig, ax = plt.subplots(1,2, figsize=(15,5))
# Histogram with normal fit
sns.histplot(returns, kde=False, ax=ax[0], stat='density')
x = np.linspace(returns.min(), returns.max(), 100)
ax[0].plot(x, norm.pdf(x, returns.mean(), 0.6*returns.std()))
ax[0].set_title('Return Distribution')
# Q-Q Plot
sm.graphics.qqplot(returns.squeeze(), line='45', fit=True, ax=ax[1])
ax[1].set_xlim(-4, 4)
ax[1].set_title('Q-Q Plot')
plt.tight_layout()
plt.show()
Histogram and Q-Q plot for S&P 500 daily log-returns:
analyze_distribution(sp500_returns, 'S&P 500')
S&P 500 Distribution Properties: Skewness: -0.5376 Excess Kurtosis: 14.9277
Histogram and Q-Q plot for Bitcoin daily log-returns:
analyze_distribution(btc_returns, 'Bitcoin')
Bitcoin Distribution Properties: Skewness: -0.7073 Excess Kurtosis: 13.3596
Temporal Structure¶
Linear Structure¶
Autocorrelation of Returns¶
def plot_autocorrelation(returns, title):
fig, (ax1, ax2) = plt.subplots(2,1, figsize=(12,8))
plot_acf(returns, ax=ax1, lags=40)
plot_pacf(returns, ax=ax2, lags=40)
fig.suptitle(title)
plt.tight_layout()
plt.show()
Autocorrelation analysis of S&P 500 daily log-returns:
plot_autocorrelation(sp500_returns, 'S&P 500 ACF/PACF')
Autocorrelation analysis of Bitcoin daily log-returns:
plot_autocorrelation(btc_returns, 'Bitcoin ACF/PACF')
Nonlinear Structure¶
Volatility Clustering¶
def estimate_volatility(returns):
model = arch.arch_model(returns, vol='Garch', p=1, q=1, rescale=False)
result = model.fit(disp='off')
return result.conditional_volatility
sp500_vol = estimate_volatility(sp500_returns)
btc_vol = estimate_volatility(btc_returns)
def plot_volatility(returns, vol, title):
fig, ax = plt.subplots(figsize=(12,6))
ax.plot(returns, alpha=0.5, label='Returns')
ax.plot(vol, color='red', label='Conditional Volatility')
ax.set_title(title)
ax.legend()
plt.show()
Volatility clustering in S&P 500:
plot_volatility(sp500_returns, sp500_vol, 'S&P 500 Volatility Clustering')
Volatility clustering in Bitcoin:
plot_volatility(btc_returns, btc_vol, 'Bitcoin Volatility Clustering')
Autocorrelation of Magnitude of Returns¶
S&P 500:
plot_autocorrelation(abs(sp500_returns), 'S&P 500 ACF/PACF of absolute value of returns')
Bitcoin:
plot_autocorrelation(abs(btc_returns), 'Bitcoin ACF/PACF of absolute value of returns')
Asset structure¶
Correlation matrix of returns for stocks:
from pob_python import SP500_stocks_2015to2020, cryptos_2017to2021_hourly
def plot_correlation_matrix(prices, title):
returns = np.log(prices).diff().dropna()
corr_matrix = returns.corr()
plt.figure(figsize=(12,10))
sns.heatmap(corr_matrix, annot=False, cmap='viridis',
xticklabels=True, yticklabels=True)
plt.title(title)
plt.show()
Stocks correlation matrix:
# Example 40 random stocks
data = SP500_stocks_2015to2020.sample(n=40, axis='columns')[-200:]
data.head()
| UHS | ARE | RF | TRV | MET | FAST | VRSK | ABMD | NRG | HSIC | ... | RSG | HD | NSC | PPL | HPQ | ULTA | UPS | TXN | HII | CME | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Date | |||||||||||||||||||||
| 2019-12-06 | 144.4843 | 158.6986 | 16.4274 | 132.129 | 47.7894 | 35.2484 | 145.8491 | 189.22 | 37.4937 | 69.42 | ... | 87.4646 | 210.482 | 188.1078 | 32.5706 | 19.7567 | 262.20 | 114.440 | 119.7261 | 249.281 | 199.973 |
| 2019-12-09 | 143.4959 | 159.1494 | 16.4563 | 132.668 | 47.8570 | 35.4151 | 146.4948 | 181.69 | 37.5814 | 68.68 | ... | 87.3759 | 212.693 | 186.1669 | 32.3332 | 19.7278 | 252.64 | 115.034 | 119.3641 | 249.104 | 199.758 |
| 2019-12-10 | 144.3545 | 158.7966 | 16.3888 | 132.756 | 47.5867 | 35.2484 | 146.3061 | 179.75 | 37.4060 | 68.33 | ... | 87.2380 | 212.074 | 185.8911 | 32.4195 | 19.6366 | 250.37 | 115.423 | 118.7477 | 247.678 | 199.865 |
| 2019-12-11 | 144.4743 | 155.8959 | 16.2345 | 131.865 | 47.3936 | 35.5377 | 145.8392 | 178.87 | 37.9323 | 69.10 | ... | 87.5237 | 208.243 | 188.1866 | 32.3811 | 19.5102 | 251.64 | 113.719 | 121.0470 | 249.606 | 199.485 |
| 2019-12-12 | 146.0419 | 153.7105 | 16.7937 | 132.825 | 48.7935 | 36.9255 | 146.6850 | 179.15 | 38.3319 | 68.82 | ... | 87.2774 | 208.282 | 187.6940 | 32.2277 | 19.8602 | 258.87 | 113.875 | 123.3462 | 250.402 | 197.955 |
5 rows × 40 columns
plot_correlation_matrix(data, 'Stock Correlation Matrix')
Crypto correlation matrix:
# Example 40 random cryptos
data = cryptos_2017to2021_hourly.sample(n=40, axis='columns')[-200:]
data.head()
| EOS | CHZ | COMP | BTC | CRV | AVAX | ONT | ATOM | CRO | YFI | ... | FIL | DGB | FTT | NEO | SUSHI | LINK | ETH | BNT | OMG | MANA | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Date | |||||||||||||||||||||
| 2021-06-08 23:00:00 | 5.048589 | 0.233320 | 349.545935 | 33379.10 | 2.316510 | 14.817984 | 0.963655 | 13.569939 | 0.113823 | 39714.453180 | ... | 75.970832 | 0.060947 | 30.951438 | 48.933761 | 10.081156 | 24.106052 | 2507.170959 | 4.106631 | 5.374035 | 0.711976 |
| 2021-06-09 00:00:00 | 4.925761 | 0.225931 | 341.889509 | 32886.64 | 2.252735 | 14.312923 | 0.940558 | 13.177019 | 0.112143 | 38819.389856 | ... | 72.219061 | 0.058900 | 30.368181 | 47.521195 | 9.848562 | 23.421207 | 2452.882931 | 4.017761 | 5.196089 | 0.682727 |
| 2021-06-09 01:00:00 | 4.845657 | 0.219208 | 335.738749 | 32523.37 | 2.188823 | 14.104410 | 0.924639 | 12.854537 | 0.110579 | 38101.127955 | ... | 72.266928 | 0.058197 | 30.024925 | 46.573466 | 9.688061 | 22.952393 | 2430.634057 | 3.965900 | 5.138692 | 0.670307 |
| 2021-06-09 02:00:00 | 4.849642 | 0.220543 | 337.617939 | 32867.79 | 2.221863 | 14.036190 | 0.927200 | 12.874642 | 0.112079 | 38182.511643 | ... | 71.816121 | 0.056569 | 30.251514 | 46.639394 | 9.667074 | 22.983460 | 2450.819629 | 3.988506 | 5.127375 | 0.675104 |
| 2021-06-09 03:00:00 | 4.850785 | 0.222477 | 334.898374 | 32862.17 | 2.237914 | 13.900369 | 0.929342 | 12.883942 | 0.111731 | 38350.152390 | ... | 72.559671 | 0.056753 | 30.344271 | 46.861454 | 9.687768 | 22.941409 | 2455.527067 | 3.996697 | 5.126499 | 0.681233 |
5 rows × 40 columns
plot_correlation_matrix(data, 'Crypto Correlation Matrix')