関数で綺麗にする
今日やったこと
def read_and_pre_process_asset_df(asset_file_path : str) -> pd.DataFrame : asset_name_ext = os.path.basename(asset_file_path) asset_name = os.path.splitext(asset_name_ext)[0] asset_df = pd.read_csv(asset_file_path, encoding = "utf-8", usecols = ['日付け','終値']) asset_df = asset_df.rename(columns = {'日付け' : 'Date', '終値' : asset_name}) asset_df['Date'] = pd.to_datetime(asset_df['Date'], format = '%Y年%m月%d日') asset_df['Date'] = asset_df['Date'].dt.date return asset_df def merge_df(df_list : list, on : str = 'Date') -> pd.DataFrame : total_df = df_list[0] for df in df_list[1:]: total_df = pd.merge(df, total_df, on = on) return total_df def pre_process_total_df(total_df : pd.DataFrame) -> pd.DataFrame : total_df = total_df.sort_values(['Date']) total_df = total_df.set_index('Date') total_df = total_df.astype(str)\ .applymap(lambda x: x.replace(',', ''))\ .astype(float) return total_df asset_file_paths = glob.glob('data/*.csv') # dataディレクトリ直下のcsvファイル読み込み asset_df_list = [read_and_pre_process_asset_df(asset_file_path) for asset_file_path in asset_file_paths] asset_total_df = merge_df(asset_df_list) asset_total_df = pre_process_total_df(asset_total_df) # 収益率を取得 df_return = asset_total_df.pct_change() df_return = df_return.dropna()
ほぼうつしただけ
asset_df_list = [read_and_pre_process_asset_df(asset_file_path) for asset_file_path in asset_file_paths]
ここはリストの各要素がpandasのデータフレームってこと?
.type
で型を見たらリストってでてきたけど、なんか不思議だった
ともあれ、複数のCSVを取り込んで、一つのデータフレームにまとめる作業はよく行う作業だろうから覚えておこう