HRPその2(使用データ作成)
今回やること
使用データ作成
参考コード
参考コードの対応部分
stocks = { "Apple": "AAPL", "Amazon": "AMZN", "Alphabet": "GOOG", "Microsoft": "MSFT", "Facebook": "FB", "Alibaba": "BABA", "Berkshire Hathaway": "BRK-A", "Tencent": "TCEHY", "JPMorgan": "JPM", "ExxonMobil": "XOM", "Johnson & Johnson": "JNJ", "Samsung Electronics": "005930.KS", "Bank of America": "BAC" } stocks = pd.DataFrame(list(stocks.items()), columns=['name', 'symbol']) ts = TimeSeries(key=config.key, output_format='pandas') stocks_close = pd.DataFrame() for symbol in stocks.symbol.values: data, _ = ts.get_daily(symbol=symbol, outputsize='full') close = data['4. close'] close.index = pd.to_datetime(close.index) stocks_close = stocks_close.append(close) stocks_close = stocks_close.T stocks_close = stocks_close.sort_index() stocks_close = stocks_close.fillna(method='ffill') stocks_close.columns = stocks.name.values stocks_close = stocks_close["2015-01-01":"2018-01-01"] returns = stocks_close.to_returns().dropna()
やること
やることの整理
まずは上記の参考コードでやっている処理を整理。やっていることは恐らく、以下の通り。
stock
内の各資産についてalpha_vantage
からstocks
内のプライス情報を取得取得したプライスの終値(close)を抽出
インデックスをdatetime形式へ変換
stock_close
へ追加
全て結合した
stock_close
についてインデックスでソートしたり
列名つけたり
各時点のプライスからリターンへ変換
やること検証
不安だし、せっかくコードがあるので、実行しながら上記の内容があっているか確認
APIのキーは自身で取得したものを引用した
import scipy.cluster.hierarchy as sch import numpy as np import pandas as pd import datetime as date import matplotlib.pyplot as plt import cvxopt as opt from alpha_vantage.timeseries import TimeSeries stocks = { "Apple": "AAPL", "Amazon": "AMZN", "Alphabet": "GOOG", "Microsoft": "MSFT", "Facebook": "FB", "Alibaba": "BABA", "Berkshire Hathaway": "BRK-A", "Tencent": "TCEHY", "JPMorgan": "JPM", "ExxonMobil": "XOM", "Johnson & Johnson": "JNJ", "Samsung Electronics": "005930.KS", "Bank of America": "BAC" } stocks = pd.DataFrame(list(stocks.items()), columns=['name', 'symbol']) ts = TimeSeries(key='自身で取得したやつ', output_format='pandas') stocks_close = pd.DataFrame() for symbol in stocks.symbol.values: data, _ = ts.get_daily(symbol=symbol, outputsize='full') close = data['4. close'] close.index = pd.to_datetime(close.index) stocks_close = stocks_close.append(close) stocks_close = stocks_close.T stocks_close = stocks_close.sort_index() stocks_close = stocks_close.fillna(method='ffill') stocks_close.columns = stocks.name.values stocks_close = stocks_close["2015-01-01":"2018-01-01"] returns = stocks_close.to_returns().dropna() print (returns.head(10))
結果
ValueError: Thank you for using Alpha Vantage! Our standard API call frequency is 5 calls per minute and 500 calls per day. Please visit https://www.alphavantage.co/premium/ if you would like to target a higher API call frequency.
くそっ、、無料版だとダメか、、、
ということで、資産の数を5つに減らして再チャレンジした結果が以下。
Apple Amazon Alphabet Microsoft Facebook 2015-01-05 -0.028172 -0.020517 -0.020846 -0.009303 -0.016061 2015-01-06 0.000094 -0.022833 -0.023177 -0.014571 -0.013473 2015-01-07 0.014022 0.010600 -0.001713 0.012705 0.000000 2015-01-08 0.038422 0.006836 0.003153 0.029418 0.026592 2015-01-09 0.001072 -0.011749 -0.012951 -0.008405 -0.005564 2015-01-12 -0.024641 -0.018590 -0.007296 -0.012503 -0.013121 2015-01-13 0.008879 0.011427 0.007370 -0.005258 -0.003519 2015-01-14 -0.003811 -0.004987 0.009452 -0.008629 -0.002224 2015-01-15 -0.027140 -0.021550 0.001837 -0.010336 -0.029234 2015-01-16 -0.007770 0.013208 0.012535 0.016711 0.015260
間違ってなかった!
実際に用意したデータ
用いたデータ
以下の6資産。期間は2005/1/4~2020/3/9
USD/JPY
AUD/JPY
NZD/USD
USD/ZAR
Nifty50
コード
前作成した(してもらった)関数をそのままおっけー
def read_and_pre_process_asset_df(asset_file_path : str) -> pd.DataFrame : asset_name_ext = os.path.basename(asset_file_path) asset_name = os.path.splitext(asset_name_ext)[0] asset_df = pd.read_csv(asset_file_path, encoding = "utf-8", usecols = ['日付け','終値']) asset_df = asset_df.rename(columns = {'日付け' : 'Date', '終値' : asset_name}) asset_df['Date'] = pd.to_datetime(asset_df['Date'], format = '%Y年%m月%d日') asset_df['Date'] = asset_df['Date'].dt.date return asset_df def merge_df(df_list : list, on : str = 'Date') -> pd.DataFrame : total_df = df_list[0] for df in df_list[1:]: total_df = pd.merge(df, total_df, on = on) return total_df def pre_process_total_df(total_df : pd.DataFrame) -> pd.DataFrame : total_df = total_df.sort_values(['Date']) total_df = total_df.set_index('Date') total_df = total_df.astype(str)\ .applymap(lambda x: x.replace(',', ''))\ .astype(float) return total_df asset_file_paths = glob.glob('data/*.csv') # dataディレクトリ直下のcsvファイル読み込み asset_df_list = [read_and_pre_process_asset_df(asset_file_path) for asset_file_path in asset_file_paths] asset_total_df = merge_df(asset_df_list) asset_total_df = pre_process_total_df(asset_total_df) df_return = asset_total_df.pct_change().dropna() print(df_return.head(10))
結果
USDZAR USDJPY NZDUSD Nifty50 N225 AUDJPY Date 2005-01-05 0.017574 -0.004971 -0.005266 -0.034011 -0.006966 -0.007857 2005-01-06 0.032452 0.009415 -0.002003 -0.016657 0.004786 0.004148 2005-01-07 -0.003688 -0.002570 -0.004158 0.008582 -0.005136 -0.006510 2005-01-11 -0.022719 -0.013645 0.004463 -0.031481 0.009337 -0.006804 2005-01-12 -0.001894 -0.009190 0.014046 -0.019697 -0.007504 -0.002537 2005-01-13 0.014610 -0.000098 -0.004523 0.021399 -0.008309 -0.002417 2005-01-14 -0.000331 -0.003711 -0.006815 -0.011998 0.007058 -0.012878 2005-01-17 0.000017 0.000392 -0.004861 0.000932 0.004258 -0.000646 2005-01-18 0.008675 0.002155 -0.002299 0.000595 -0.005558 -0.001422 2005-01-19 -0.005728 0.005181 0.009791 -0.003826 -0.001569 0.010743
終了〜
現在のコード
import scipy.cluster.hierarchy as sch import numpy as np import pandas as pd import datetime as date import matplotlib.pyplot as plt import cvxopt as opt from alpha_vantage.timeseries import TimeSeries import ffn import glob def read_and_pre_process_asset_df(asset_file_path : str) -> pd.DataFrame : asset_name_ext = os.path.basename(asset_file_path) asset_name = os.path.splitext(asset_name_ext)[0] asset_df = pd.read_csv(asset_file_path, encoding = "utf-8", usecols = ['日付け','終値']) asset_df = asset_df.rename(columns = {'日付け' : 'Date', '終値' : asset_name}) asset_df['Date'] = pd.to_datetime(asset_df['Date'], format = '%Y年%m月%d日') asset_df['Date'] = asset_df['Date'].dt.date return asset_df def merge_df(df_list : list, on : str = 'Date') -> pd.DataFrame : total_df = df_list[0] for df in df_list[1:]: total_df = pd.merge(df, total_df, on = on) return total_df def pre_process_total_df(total_df : pd.DataFrame) -> pd.DataFrame : total_df = total_df.sort_values(['Date']) total_df = total_df.set_index('Date') total_df = total_df.astype(str)\ .applymap(lambda x: x.replace(',', ''))\ .astype(float) return total_df asset_file_paths = glob.glob('data/*.csv') # dataディレクトリ直下のcsvファイル読み込み asset_df_list = [read_and_pre_process_asset_df(asset_file_path) for asset_file_path in asset_file_paths] asset_total_df = merge_df(asset_df_list) asset_total_df = pre_process_total_df(asset_total_df) # 収益率を取得 df_return = asset_total_df.pct_change().dropna()