基于python实现配对交易

配对交易原理

金融时间序列大多是非平稳性序列,通过单位根或者ADF检验可以检验出时间序列的平稳性,通过差分可以让非平稳序列变平稳。

如果两个不平稳时间序列的线性组合为平稳时间序列,那么我们称这两个事件序列是“协整”关系。此外,还有一点需要注意的是不平稳时间序列需要同阶单整,即它们差分平稳的阶数需要一致。时间序列不平稳和同阶单整是满足协整关系的最基本条件,只有满足了这两个条件后才能进行下面的协整检验。协整检验的目的在于确定两个不平稳时间序列的线性关系是否是长期稳定的。协整检验的方法一般有EG两步法和JJ检验。

代码实现

相关包用法

常用金融数据获取方法

###方法一
from pandas_datareader import data as pdr
import fix_yahoo_finance as yf
yf.pdr_override() # <== that's all it takes :-)
# download dataframe
data = pdr.get_data_yahoo("SPY", start="2017-01-01", end="2017-04-30")
# download Panel
data = pdr.get_data_yahoo(["SPY", "IWM"], start="2017-01-01", end="2017-04-30")
###方法二
quandl.get("WIKI/GOOGL", start_date="2017-7-10", end_date="2018-7-10")

实操

import numpy as np
import pandas as pd
import statsmodels.api as sm
import seaborn as sns
import quandl
import matplotlib.pyplot as plt
def find_cointegrated_pairs(dataframe):
    # 得到DataFrame长度
    n = dataframe.shape[1]
    # 初始化p值矩阵
    pvalue_matrix = np.ones((n, n))
    # 抽取列的名称
    keys = dataframe.keys()
    # 初始化强协整组
    pairs = []
    # 对于每一个i
    for i in range(n):
        # 对于大于i的j
        for j in range(i+1, n):
            # 获取相应的两只股票的价格Series
            stock1 = dataframe[keys[i]]
            stock2 = dataframe[keys[j]]
            # 分析它们的协整关系
            result = sm.tsa.stattools.coint(stock1, stock2)
            # 取出并记录p值
            pvalue = result[1]
            pvalue_matrix[i, j] = pvalue
            # 如果p值小于0.05
            if pvalue < 0.05:
                # 记录股票对和相应的p值
                pairs.append((keys[i], keys[j], pvalue))
    # 返回结果
    return pvalue_matrix, pairs

stock_list = ['united','america','facebook']
start_date='01-07-2015'
end_date='01-07-2017'
united=quandl.get('WIKI/UAL',start_date=start_date,end_date=end_date)['Adj. Close']
america=quandl.get('WIKI/AAL',start_date=start_date,end_date=end_date)['Adj. Close']
facebook=quandl.get('WIKI/FB',start_date=start_date,end_date=end_date)['Adj. Close']
prices_df=pd.concat([united,america,facebook],axis=1)
prices_df.columns= ['united','america','facebook']
pvalues, pairs = find_cointegrated_pairs(prices_df)
sns.heatmap(1-pvalues, xticklabels=stock_list, yticklabels=stock_list, cmap='RdYlGn_r', mask = (pvalues == 1))  #cmap从数字到色彩空间的映射,mask控制某个矩阵块是否显示出来
stock_df1 = prices_df['united']
stock_df2 = prices_df['america']
plt.plot(stock_df1); plt.plot(stock_df2)
plt.xlabel("Time"); plt.ylabel("Price")
plt.legend(["united", "america"],loc='best')

x = stock_df1
y = stock_df2
X = sm.add_constant(x) 
result = (sm.OLS(y,X)).fit()
print(result.summary())

fig, ax = plt.subplots(figsize=(8,6))
ax.plot(x, y, 'o', label="data")
ax.plot(x, result.fittedvalues, 'r', label="OLS")
ax.legend(loc='best')

plt.plot(0.4326*stock_df1-stock_df2);
plt.axhline((0.4326*stock_df1-stock_df2).mean(), color="red", linestyle="--")
plt.xlabel("Time"); plt.ylabel("Stationary Series")
plt.legend(["Stationary Series", "Mean"])

def zscore(series):
    return (series - series.mean()) / np.std(series)

plt.plot(zscore(0.4326*stock_df1-stock_df2))
plt.axhline(zscore(0.4326*stock_df1-stock_df2).mean(), color="black")
plt.axhline(1.0, color="red", linestyle="--")
plt.axhline(-1.0, color="green", linestyle="--")
plt.legend(["z-score", "mean", "+1", "-1"])

参考资料

[01]. 【量化课堂】基于协整的搬砖策略