xiaoyr100 / Python-loop-error

Always be cool
0 stars 0 forks source link

Calender Spread Arbitrage Strategy #1

Open xiaoyr100 opened 4 years ago

xiaoyr100 commented 4 years ago

Sample code.pdf The file is part of the calendar spread arbitrage strategy based on the Chinese Treasury futures, finished by python. For the confidential purpose, this part is about 20% of the whole project, function as extracting data from Wind(a Chinese financial database). I used 1.1 million yuan margins and got a 9.7% annual yield based on the data from May 2015 to July 2019.

xiaoyr100 commented 4 years ago

-- coding: utf-8 --

""" Created on Wed Jul 24 14:29:06 2019

@author: Kris """ import os os.chdir('C:/Users/18036/Desktop/当季+远季') print('路径修改为:' + os.getcwd())

import pandas as pd import numpy as np import re import functools import itertools from datetime import datetime from dateutil.relativedelta import relativedelta from WindPy import w w.start()

1. 获取移动平均值

def MA1(df, col_name, x_day, start_date, end_date): raw_ma = df[col_name].rolling(window=x_day).mean()

处理日期,将输入的字符串型日期转换成datetime对象,用于切片

#去掉分秒
start_date_obj = datetime.date(datetime.strptime(start_date, '%Y-%m-%d'))
end_date_obj = datetime.date(datetime.strptime(end_date, '%Y-%m-%d'))

temp = raw_ma[(raw_ma.index >= start_date_obj) & (raw_ma.index <= end_date_obj)]
temp.name = str(x_day) + 'MA'
return temp

2. 改变单个hiscode

def change_code(sub_func, jiansuo_str): m = re.sub(r'\d+', sub_func, jiansuo_str) return m

def sub_yuanji(matched): dangji_code = matched.group(0) #字符串 if (dangji_code[2:4] == '09') or (dangji_code[2:4] == '12'): yuanji_code = str(int(dangji_code) + 94) else: yuanji_code = str(int(dangji_code) + 6) return yuanji_code

def sub_ciji(matched): dangji_code = matched.group(0) #字符串 if dangji_code[2:4] == '12': ciji_code = str(int(dangji_code) + 91) else: ciji_code = str(int(dangji_code) + 3) return ciji_code

添加次季,远季hiscode列

def hiscode_add_season(df, col_name, sub_func): df[col_name] = np.nan for i, item in enumerate(df['TRADE_HISCODE']): temp = change_code(sub_func, item) df.loc[df.index[i], col_name] = temp return df

3. 提取CTD

替换日期

def get_next_trade_date(matched, df): #原理:提取当前交易日索引,加一,获取后一个datetime date_str = matched.group("number") datetime_date = datetime.date(datetime.strptime(date_str, "%Y%m%d")) index = df.index.tolist().index(datetime_date) next_index = index + 1 if next_index < 1062: next_datetime_date = df.index[next_index] date_str = next_datetime_date.strftime("%Y%m%d") else: pass return date_str

def get_CTD(df, col_name, options): df[col_name + 'CTD'] = np.nan for i, item in enumerate(df[col_name]): data = w.wss(item, "tbf_CTD", options).Data[0][0] df.loc[df.index[i], col_name + 'CTD'] = data options = re.sub("(?P\d{8})", get_next_trade_date2, options) return df

4. 因为日期序列不好处理,这里就把日期序列改成了index,同时新添加了一列日期列

def reset_date_index(df): df1 = df.copy() df1 = df1.reset_index() #保留了原index df1 = df1.rename(columns={'index' : '日期'}) return df1

返回一个新的数字index的dataframe

5. 获取dataframe的列名的index

def get_col_index(df, col_name): column_names = df.columns.tolist() col_index = column_names.index(col_name) return col_index

4. 按区间提取CTD代码

''' 思路:定义一个判断器,判断 当前行 与 下一行(需要设置下一行index不能超过1062) 的当季合约是否一样 定义一个 起始行index 如果不一样,就记录 起始行 合约和 起始行index 对应的日期 和 当前行对应的日期; 需要输出[合约,起始日期,当前行日期] 重设起始行为 下一行index 根据code,起始、结束日期提取对应指标 '''

def count_groups(df, col_name): #用于计算大循环的次数 groups = len(df[col_name].value_counts().index) return groups

def comp_all_len(list_temp): #计算列表中所有元组的最后一个元组的第3位数字和 temp = 0 for i in list_temp: temp += i[3] return temp

def internal_loop(df, col_name, start_index): index_list = []

df = reset_date_index(df=df) #把日期index改成一列
df = df[start_index:] #每次都向后切片
df = df.reset_index(drop=True)
length = len(df) #切片后的series是不断变短的
#因为loc不好切片,改成iloc

code = df.loc[0, col_name]
start_date = df.loc[0, '日期']

#向后执行判断
for i, item in enumerate(df[col_name]):  #i是行号,每次都从零开始,item是当前行的code,这里的切片是series
    index_list.append(i) #添加当前行号,用于测量list长度,方便后面填充
    if i >= length-1:     #这里注意最后一行不能比较了
        end_date = df.loc[i, '日期']
    else:
        if item != df.loc[i+1, col_name]: #如果当前行code不等于下一行code
            end_date = df.loc[i, '日期']
            break

data_len = len(index_list)
return (code, start_date, end_date, data_len) #返回一个元组,data_len就是下一个应该加上的

def get_groups(df, col_name): data_list = [] num_of_groups = count_groups(df, col_name)

start_index = 0
temp1 = internal_loop(df, col_name, start_index)
data_list.append(temp1)
for i in range(num_of_groups-1):
    temp2 = internal_loop(df, col_name, comp_all_len(data_list))
    data_list.append(temp2)
return data_list

def get_data_by_interval_settle_price(df, col_name): #这里的col_name直接命名成想要的名字,例如当季合约 future_contract_group_list = get_groups(df, col_name) temp_list = []

for item in future_contract_group_list:
    code = item[0]
    start_date = item[1]
    end_date = item[2]
    #mm = w.wsd("T1512.CFE", "settle", "2015-03-20", "2015-05-20", usedf=True)[1]
    one_data_set = w.wsd(code, "settle", start_date, end_date, "unit=1", usedf=True)[1]
    temp_list.append(one_data_set)

combination = pd.concat(i for i in temp_list) #完整的一列dataframe
df[col_name+'交割价'] = combination.iloc[:,0]   #这里应该是结算价
return df

5. 用CTD提CTD净值、到期收益率

def count_groups_CTD(df, col_name): #去除相邻重复元素,返回长度,用于计算下面的循环次数 list_temp = df[col_name].tolist() for i in range(len(list_temp)-1, 0, -1): if list_temp[i] == list_temp[i-1]: del list_temp[i] return len(list_temp)

def get_groups_CTD(df, col_name): data_list = [] num_of_groups = count_groups_CTD(df, col_name)

start_index = 0
temp1 = internal_loop(df, col_name, start_index)
data_list.append(temp1)
for i in range(num_of_groups-1):
    temp2 = internal_loop(df, col_name, comp_all_len(data_list))
    data_list.append(temp2)
return data_list

def get_data_by_interval_CTD_net_yield(df, col_name): #提取list中的信息用于提取数据 future_contract_group_list = get_groups_CTD(df, col_name) temp_list = []

for item in future_contract_group_list:
    code = item[0]
    start_date = item[1]
    end_date = item[2]
    #w.wsd("140012.IB", "net_csi,ytm_b", "2015-03-20", "2019-07-24", "returnType=1", usedf=True)
    one_data_set = w.wsd(code, "net_csi,dirty_csi,ytm_b", start_date, end_date, "returnType=1", usedf=True)[1]
    temp_list.append(one_data_set)

combination = pd.concat(i for i in temp_list) #合并成完整的一列dataframe
#坏消息是得出的index中包含CTD名称,不知道为撒;好消息是index的顺序是对的,所以直接赋值就好
df[col_name+'净值'] = combination['NET_CSI'].values
df[col_name+'全价'] = combination['DIRTY_CSI'].values
df[col_name+'到期收益率'] = combination['YTM_B'].values
return df

6. 用CTD提取转换因子,因为前面的合约也会变,所以只能一个一个提了

def change_options_code(contract_code, options_str): #contract_code是要替换成的代码,是options m = re.sub(r'T\d+.CFE', contract_code, options_str) return m #输出也是options

添加

def get_factor(df, col_name, options): #输入CTD code

需要把index date转化成str格式

df[col_name+'转换因子'] = np.nan
col_index = get_col_index(df, col_name[:-3])

for i, item in enumerate(df[col_name]): #按照CTD循环
    options = change_options_code(df.iloc[i, col_index], options) #去除当季合约CTD中的CTD字符,提到当季合约
    factor = w.wss(item, "tbf_cvf", options).Data[0][0]
    df.loc[df.index[i], col_name+'转换因子'] = factor
return df

7. 填充新的CTD

def fill_new_CTD(df, col_name, dic): df[col_name+'CTD'] = np.nan for i, item in enumerate(df[col_name]): df.iloc[i, get_col_index(df, col_name+'CTD')] = dic[item] return df

if name == 'main':

#因为算移动平均,提取数据的时候需要多取一点
#资金利率
FR007 = w.wsd("FR007.IR", "close", "2015-01-01", "2019-07-24", usedf=True)[1]
FR007_5MA = MA1(FR007, 'CLOSE', 5, '2015-03-20', '2019-07-24')
FR007_20MA = MA1(FR007, 'CLOSE', 20, '2015-03-20', '2019-07-24')

#FR007.to_excel('FR007.xlsx')
#FR007_5MA.to_excel('FR007_5MA.xlsx')
#FR007_20MA.to_excel('FR007_20MA.xlsx')
#10年国债期货

#trade_hiscode是合约名称
T_CFE = w.wsd("T.CFE", "settle,trade_hiscode", "2015-03-20", "2019-07-24", usedf=True)[1]
T_CFE = hiscode_add_season(T_CFE, '次季合约', sub_ciji)
T_CFE = hiscode_add_season(T_CFE, '远季合约', sub_yuanji)

T_CFE = T_CFE.rename(columns={'TRADE_HISCODE' : '当季合约'})
T_CFE = T_CFE.rename(columns={'SETTLE' : '当季合约交割价'})

#提次季合约交割价数据
#get_data_by_interval(T_CFE, '当季合约交割价', future_contract_group_dangji) 当季合约已经有了
T_CFE = get_data_by_interval_settle_price(T_CFE, '次季合约')
T_CFE = get_data_by_interval_settle_price(T_CFE, '远季合约')
#T_CFE.to_excel('contracts.xlsx')    #包含合约和交割价信息

#因为sub中的函数不能传入多余的参数
#所以需要对原始的get_next_trade_date做一个处理,使转换后的函数get_next_trade_date2只需要传入一个参数
#get_next_trade_date2 = functools.partial(get_next_trade_date, df=T_CFE)
#T_CFE = get_CTD(T_CFE, '当季合约', options="tradeDate=20150320;exchangeType=NIB")
#T_CFE = get_CTD(T_CFE, '次季合约', options="tradeDate=20150320;exchangeType=NIB")
#远季合约不需要用到CTD

#填充新的CTD,按照时间算远近的
#T_CFE = fill_new_CTD(T_CFE, '当季合约', new_CTD)
#T_CFE = fill_new_CTD(T_CFE, '次季合约', new_CTD)

#from get_CTD3 import 
T_CFE = pd.read_excel('Contracts_CTD_change_by_hand.xlsx', index_col=1) #这个里面包含处理的好的CTD了
try:
    T_CFE.drop(['Unnamed: 0'],axis=1, inplace=True)
except:
    KeyError

'''
#对当季合约交割价重新排下序,不然看着难受
T_CFE_temp = T_CFE['当季合约交割价']   #都应该改成结算价
T_CFE = T_CFE.drop('当季合约交割价',axis=1)
T_CFE.insert(5,'当季合约交割价',T_CFE_temp)
'''
#用CTD提CTD净值、到期收益率#########################07/30/17:34
#w.wsd("150016.IB", "net_csi,ytm_b", "2015-03-20", "returnType=1")
#w.wss("150016.IB", "net_cnbd","tradeDate=20150320;credibility=1")

#提取净值收益率
T_CFE = get_data_by_interval_CTD_net_yield(T_CFE, '当季合约CTD')
T_CFE = get_data_by_interval_CTD_net_yield(T_CFE, '次季合约CTD')

T_CFE1 = T_CFE.copy() #保存一下

#提取转换因子
T_CFE1 = get_factor(T_CFE1, '当季合约CTD', "contractCode=T1509.CFE")
T_CFE1 = get_factor(T_CFE1, '次季合约CTD', "contractCode=T1512.CFE")
T_CFE['当季合约CTD转换因子'] = T_CFE1['当季合约CTD转换因子']
T_CFE['次季合约CTD转换因子'] = T_CFE1['次季合约CTD转换因子']

T_CFE3 = T_CFE.copy()  #T_CFE被我填充了nan,但是理论上是应该都有数据的,日期我明明也已经处理了,就很奇怪

#T_CFE3 = T_CFE3.fillna(method = 'ffill')转换因子没有缺失

#保所有存数据
#T_CFE.to_excel('8.5T_CFE.xlsx', sheet_name='sheet1')  #转换因子之类有缺失值
T_CFE3.to_excel('T_CFE_with_ind.xlsx', sheet_name='sheet1')  #按前填充了缺失值

'''
#提取CTD的代码,只提取某一天
w.wss("T1909.CFE", "tbf_CTD","tradeDate=20190725;exchangeType=NIB")
#按区间提取CTD代码
w.wsd("T1909.CFE", "tbf_CTD", "2015-03-20", "2019-07-24", "exchangeType=NIB")

#提取CTD净值、到期收益率代码
w.wsd("140012.IB", "net_csi,ytm_b", "2015-03-20", "2019-07-24", "returnType=1")
#提取转换因子代码,需要指定当期合约
w.wss("140012.IB", "tbf_cvf","contractCode=T1509.CFE")
#提取全价
w.wsd("140029.IB", "dirty_csi", "2015-03-20", "2015-04-30", "")
'''

函数next

''' it = iter([1, 2, 3, 4, 5])

循环:

while True: try:

获得下一个值:

    x = next(it)
    print(x)
except StopIteration:
    # 遇到StopIteration就退出循环
    break

'''