関数で綺麗にする

今日やったこと

def read_and_pre_process_asset_df(asset_file_path : str) -> pd.DataFrame :
    asset_name_ext = os.path.basename(asset_file_path)
    asset_name = os.path.splitext(asset_name_ext)[0]

    asset_df = pd.read_csv(asset_file_path, encoding = "utf-8", usecols = ['日付け','終値'])

    asset_df = asset_df.rename(columns = {'日付け' : 'Date', '終値' : asset_name})
    asset_df['Date'] = pd.to_datetime(asset_df['Date'], format = '%Y年%m月%d日')
    asset_df['Date'] = asset_df['Date'].dt.date

    return asset_df

def merge_df(df_list : list, on : str = 'Date') -> pd.DataFrame :
    total_df = df_list[0]
    for df in df_list[1:]:
        total_df = pd.merge(df, total_df, on = on)

    return total_df

def pre_process_total_df(total_df : pd.DataFrame) -> pd.DataFrame :
    total_df = total_df.sort_values(['Date'])
    total_df = total_df.set_index('Date')
    total_df = total_df.astype(str)\
                .applymap(lambda x: x.replace(',', ''))\
                .astype(float)
    return total_df

asset_file_paths = glob.glob('data/*.csv') #  dataディレクトリ直下のcsvファイル読み込み
asset_df_list = [read_and_pre_process_asset_df(asset_file_path)
                 for asset_file_path in asset_file_paths]
asset_total_df = merge_df(asset_df_list)
asset_total_df = pre_process_total_df(asset_total_df)

#  収益率を取得
df_return = asset_total_df.pct_change()
df_return = df_return.dropna()

ほぼうつしただけ

asset_df_list = [read_and_pre_process_asset_df(asset_file_path)
                 for asset_file_path in asset_file_paths]

ここはリストの各要素がpandasのデータフレームってこと?

.typeで型を見たらリストってでてきたけど、なんか不思議だった

ともあれ、複数のCSVを取り込んで、一つのデータフレームにまとめる作業はよく行う作業だろうから覚えておこう