蒲田さん修論
をテンプレートにして作成
[
トップ
] [
新規
|
一覧
|
検索
|
最終更新
|
ヘルプ
]
開始行:
[[技術資料]]
*AIによる数法則発見の時系列データへの拡張と金融データへの...
**もくじ [#q244e13f]
#CONTENTS
**必要なモジュール [#rfca96e2]
|必要なモジュール|バージョン|インストール方法| |必要なモ...
|pandas|1.5.3|pip install pandas==1.5.3| |numpy|1.23.5|p...
|torch|2.7.0+cu118|https://pytorch.org/get-started/locall...
|symbolicregression||https://github.com/facebookresearch/...
|IPython| || |sys|||
|os||| |sys|||
バージョンやインストール方法を記述していないものはおそら...
symbolic regression
**プログラム [#u7c030c5]
**実行手順 [#d5f974f5]
データの収集からはじめる.
データの収集には以下のプログラムを使う.
#ref(data.ipynb,,データ収集のプログラム)
***このプログラムではpythonのモジュールであるyfinanceを使...
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf
#investing.comから取得したデータ
#最新のデータダウンロードにはGoogle accountが必要
日本および米国の10年国債利回りなど一部のデータはyfinance...
https://jp.investing.com/ra...
jp_interest_rate = pd.read_csv("JP_InterestRate2Y.csv", ...
us_interest_rate = pd.read_csv("US_interestRate2Y.csv", ...
jp_10Y = pd.read_csv("JP_10Y_rimawari.csv", index_col=0,...
us_10Y = pd.read_csv("US_10Y_rimawari.csv", index_col=0,...
owarineJP = jp_interest_rate[["終値"]]
owarineUS = us_interest_rate[["終値"]]
aligned_data = owarineJP.join(owarineUS, how='inner',lsu...
aligned_data['金利差_JP-US'] = aligned_data['終値_JP'] -...
start_data = '2010-01-01'
end_data = '2025-10-01'
# S&P500、日経225、USD/JPYのデータを取得するためのコード
#アメリカの株式市場インデックスS&P500を取得
sp500_df = yf.download('^GSPC', start=start_data, end=en...
print("sp500_df columns:", sp500_df.columns)
if sp500_df.empty:
print("Warning: sp500_df is empty!")
sp500 = pd.Series(dtype=float) # 空のSeriesとして初...
else:
try:
# MultiIndexの 'Close' 列を指定
sp500 = sp500_df[('Close', '^GSPC')]
except KeyError:
print("KeyError: ('Close', '^GSPC') column not f...
sp500 = pd.Series(dtype=float) # エラー時は空のSe...
print("sp500 data (Series):")
print(sp500.head())
print(f"Type of sp500: {type(sp500)}")
if isinstance(sp500, pd.Series):
print(f"sp500 is a Series. Length: {len(sp500)}, Emp...
else:
print(f"sp500 is NOT a Series. Value: {sp500}")
#日本の株式市場インデックス日経225を取得
nikkei225_df = yf.download('^N225', start=start_data, en...
print("\nnikkei225_df columns:", nikkei225_df.columns)
if nikkei225_df.empty:
print("Warning: nikkei225_df is empty!")
nikkei225 = pd.Series(dtype=float)
else:
try:
# MultiIndexの 'Close' 列を指定
nikkei225 = nikkei225_df[('Close', '^N225')]
except KeyError:
print("KeyError: ('Close', '^N225') column not f...
nikkei225 = pd.Series(dtype=float)
print("\nnikkei225 data (Series):")
print(nikkei225.head())
print(f"Type of nikkei225: {type(nikkei225)}")
if isinstance(nikkei225, pd.Series):
print(f"nikkei225 is a Series. Length: {len(nikkei22...
else:
print(f"nikkei225 is NOT a Series. Value: {nikkei225...
#USD/JPYの為替レートを取得
usd_jpy_df = yf.download('JPY=X', start=start_data, end=...
print("\nusd_jpy_df columns:", usd_jpy_df.columns)
if usd_jpy_df.empty:
print("Warning: usd_jpy_df is empty!")
usd_jpy = pd.Series(dtype=float)
else:
try:
# MultiIndexの 'Close' 列を指定
usd_jpy = usd_jpy_df[('Close', 'JPY=X')]
except KeyError:
print("KeyError: ('Close', 'JPY=X') column not f...
usd_jpy = pd.Series(dtype=float)
print("\nusd_jpy data (Series):")
print(usd_jpy.head())
print(f"Type of usd_jpy: {type(usd_jpy)}")
if isinstance(usd_jpy, pd.Series):
print(f"usd_jpy is a Series. Length: {len(usd_jpy)},...
else:
print(f"usd_jpy is NOT a Series. Value: {usd_jpy}")
#データフレームに結合
# Seriesであることを確認し、空でないことを確認
is_sp500_valid = isinstance(sp500, pd.Series) and not sp...
is_nikkei225_valid = isinstance(nikkei225, pd.Series) an...
is_usd_jpy_valid = isinstance(usd_jpy, pd.Series) and no...
if is_sp500_valid and is_nikkei225_valid and is_usd_jpy_...
print("\nAll series are valid and non-empty. Attempt...
print(f"Debug - Type of sp500 before DataFrame: {typ...
print(f"Debug - Type of nikkei225 before DataFrame: ...
print(f"Debug - Type of usd_jpy before DataFrame: {t...
try:
combined_data = pd.DataFrame({
'SP500': sp500,
'Nikkei225': nikkei225,
'USD/JPY': usd_jpy,
}).dropna()
print("\nCombined Data Head:")
print(combined_data.head())
print("\nCombined Data Info:")
combined_data.info()
except ValueError as e:
print(f"\nValueError during DataFrame creation: ...
print("Please check the types and contents of sp...
except Exception as e:
print(f"\nAn unexpected error occurred during Da...
else:
print("\nError: One or more data series are not vali...
print(f"SP500 valid: {is_sp500_valid}, Nikkei225 val...
import pandas as pd
import yfinance as yf
# コモディティデータの取得とデータフレームへの格納
start_date_commodity = '2015-01-01'
end_date_commodity = '2025-01-01'
# WTI原油価格と金価格のティッカーシンボル
commodity_tickers = ['CL=F', 'GC=F'] # CL=F はWTI原油先...
# データを一度にダウンロード
raw_commodity_data = yf.download(commodity_tickers, star...
if not raw_commodity_data.empty:
# 'Close' 価格のみを抽出
# yf.download に複数のティッカーを渡すと、列がMultiI...
# raw_commodity_data['Close'] で各ティッカーの終値の...
commodity_prices = raw_commodity_data['Close']
# 列名を分かりやすいものに変更
commodity_prices = commodity_prices.rename(columns={'...
# NaNを含む行を削除 (いずれかのデータが存在しない日を...
commodity_prices = commodity_prices.dropna()
print("\nCommodity Data Head:")
print(commodity_prices.head())
print("\nCommodity Data Info:")
commodity_prices.info()
# 完成したデータフレームを表示
commodity_data = commodity_prices
print("\nFinal Commodity DataFrame:")
print(commodity_data.head())
else:
print("コモディティデータのダウンロードに失敗しました...
commodity_data = pd.DataFrame() # 空のDataFrameを作成
# 変数 commodity_data を次のセルで使えるようにする (Jupy...
commodity_data
owarineJP10Y = jp_10Y[["終値"]]
owarineUS10Y = us_10Y[["終値"]]
aligned_data10Y = owarineJP10Y.join(owarineUS10Y, how='...
aligned_data10Y['金利差_JP-US_10Y'] = aligned_data10Y['...
# 金利差が計算されたデータを確認
print("\nAligned Data with Interest Rate Difference Head...
print(aligned_data10Y.head())
print("\nAligned Data with Interest Rate Difference Info...
InterestRateDifferentials10Y = aligned_data10Y[["終値_JP...
InterestRateDifferentials10Y
vix = pd.read_csv("VIX.csv", index_col=0, parse_dates=Tr...
vix = vix["終値"]
import pandas as pd
# 各データフレームの準備と列名の調整
# VIXデータ (SeriesからDataFrameへ変換し、列名を'VIX'に)
if isinstance(vix, pd.Series):
vix_df = vix.to_frame(name='VIX')
else: # もしvixが既にDataFrameで、適切な列名がついている...
# 必要であればここで列名を確認・変更してください
vix_df = vix
# 2年債金利差の列名を変更
InterestRateDifferentials = InterestRateDifferentials.re...
columns={'金利差_JP-US': '金利差_JP-US_2Y'}
)
# 10年債金利関連の列名を変更
InterestRateDifferentials10Y = InterestRateDifferentials...
columns={
'終値_JP': 'JP_10Y_Yield',
'終値_US': 'US_10Y_Yield',
'金利差_JP-US_10Y': '金利差_JP-US_10Y' # こちら...
}
)
# 結合するデータフレームのリストを作成
# combined_data: 'SP500', 'Nikkei225', 'USD/JPY'
# InterestRateDifferentials: '金利差_JP-US_2Y'
# InterestRateDifferentials10Y: 'JP_10Y_Yield', 'US_10Y_...
# commodity_data: 'WTI_Oil', 'Gold'
# vix_df: 'VIX'
dataframes_to_join = [
combined_data, # combined_data には 'USD/JPY' が含ま...
InterestRateDifferentials,
InterestRateDifferentials10Y,
commodity_data,
vix_df
]
# pd.concat を使用して、共通のインデックス(日付)で内部...
# axis=1 は列方向に結合することを意味します
# join='inner' はすべてのデータフレームに存在するインデ...
for_predict_usdjpy = pd.concat(dataframes_to_join, axis=...
# 'USD/JPY' 列を削除する処理を削除またはコメントアウト
# if 'USD/JPY' in for_predict_usdjpy.columns:
# for_predict_usdjpy = for_predict_usdjpy.drop(colum...
# print("\n'USD/JPY' column has been dropped.")
# else:
# print("\n'USD/JPY' column not found in the DataFra...
# 結果の確認
print("Combined DataFrame for Prediction (for_predict_us...
print(for_predict_usdjpy.head())
print("\nCombined DataFrame for Prediction (for_predict_...
for_predict_usdjpy.info()
print("\nCombined DataFrame for Prediction (for_predict_...
print(for_predict_usdjpy.describe())
# 完成したデータフレームを表示
for_predict_usdjpy
#ref(dataframe.png)
***用いるデータ [#rfca96e2]
|データ項目|時間足|ダウンロード方法| |データ項目|時間足|...
|SP500|日足|yfinanceによって取得.データ収集のプログラム...
|USD/JPY|日足|yfinanceによって取得,データ収集のプログラ...
|symbolicregression||https://github.com/facebookresearch/...
|IPython| || |sys|||
|os||| |sys|||
***分析のプログラム [#pa324f5e]
import torch
import numpy as np
import sympy as sp
import os, sys
import symbolicregression
import requests
import pandas as pd
import pathlib
from IPython.display import display
from sklearn.metrics import mean_squared_error
import pathlib
model_path = "model.pt"
try:
if not os.path.isfile(model_path):
url = "https://dl.fbaipublicfiles.com/symboli...
r = requests.get(url, allow_redirects=True)
open(model_path, 'wb').write(r.content)
_posix_path_backup = None
_posix_path_created = False # Flag to track if Po...
if sys.platform == "win32": # Apply patch only on...
if hasattr(pathlib, 'PosixPath'):
_posix_path_backup = pathlib.PosixPath # ...
pathlib.PosixPath = pathlib.WindowsPath #...
else:
# If pathlib.PosixPath does not exist, cr...
# as the pickled object might refer to 'p...
pathlib.PosixPath = pathlib.WindowsPath
_posix_path_created = True
try: # Inner try for torch.load, which needs the ...
if not torch.cuda.is_available():
model = torch.load(model_path, map_locati...
else:
model = torch.load(model_path, weights_on...
model = model.cuda()
print(model.device)
print("Model successfully loaded!")
finally: # This finally block ensures the patch i...
if sys.platform == "win32":
if _posix_path_backup is not None:
pathlib.PosixPath = _posix_path_backu...
elif _posix_path_created:
delattr(pathlib, 'PosixPath') # Remov...
except Exception as e:
print("ERROR: model not loaded! path was: {}".for...
print(e)
ここの数字をいじれば精度が変わります.
色々動かしてみたところこれくらいが計算速度,精度ともに良...
対象によっては色々変えてみることを推奨します.
est = symbolicregression.model.SymbolicTransformerReg...
model=model,
max_input_points=2000,
n_trees_to_refine=5000,
rescale=True
)
data = pd.read_csv("C:/kenkyu/market_data/for_predict...
import pandas as pd
import numpy as np
# データの読み込み
file_path = "C:/kenkyu/market_data/for_predict_usdjpy...
data = pd.read_csv(file_path, encoding="utf-8-sig", i...
print("Data loaded successfully. Shape:", data.shape)
print("Columns:", data.columns)
# 目的変数と説明変数の設定
y_target_name = 'USD/JPY'
if y_target_name in data.columns:
y_series = data[y_target_name]
X_df = data.drop(columns=[y_target_name])
print(f"\nTarget variable '{y_target_name}' found...
print(f"Shape of X_df (features): {X_df.shape}")
print(f"Shape of y_series (target): {y_series.sha...
else:
print(f"Error: Target column '{y_target_name}' no...
print("Please ensure 'USD/JPY' column exists in f...
# エラーが発生した場合、後続の処理が失敗するため...
# この例では、処理を進められるようにダミーデータ...
X_df = data.copy() # 全データをXとする(不適切だ...
y_series = pd.Series(np.random.rand(len(data)), i...
print("Using dummy y_series due to missing target...
# NaN値の確認と処理 (もしあれば)
# X_df と y_series を結合して NaN を一括で処理し、再...
temp_combined_df = X_df.join(y_series, how='inner') #...
if temp_combined_df.isnull().values.any():
print("\nNaN values found. Dropping rows with any...
rows_before_dropna = len(temp_combined_df)
temp_combined_df.dropna(inplace=True)
rows_after_dropna = len(temp_combined_df)
print(f"Dropped {rows_before_dropna - rows_after_...
else:
print("\nNo NaN values found in the combined X an...
# NaN処理後のデータで再度Xとyを定義
if not temp_combined_df.empty and y_target_name in te...
X_df_aligned = temp_combined_df.drop(columns=[y_t...
y_series_aligned = temp_combined_df[y_target_name]
else:
print("Error: Data became empty after NaN handlin...
# Fallback to potentially problematic data to avo...
X_df_aligned = X_df
y_series_aligned = y_series
# NumPy配列に変換
X_ts = X_df_aligned.values
y_ts = y_series_aligned.values
print("\nData prepared for Symbolic Regression:")
print(f"X_ts shape: {X_ts.shape}")
print(f"y_ts shape: {y_ts.shape}")
if X_ts.shape[0] > 0 and y_ts.shape[0] > 0:
print("\nFirst 5 rows of X_df_aligned (features):")
print(X_df_aligned.head())
print("\nFirst 5 values of y_series_aligned (targ...
print(y_series_aligned.head())
# グローバル変数に x, y を設定して、後続のセルで...
x = X_ts
y = y_ts
print("\nGlobal variables 'x' and 'y' have been s...
else:
print("\nError: Not enough data after processing....
# グローバル変数を空の配列などで初期化するか、エ...
x = np.array([])
y = np.array([])
print("Global variables 'x' and 'y' are empty due...
import pandas as pd
import numpy as np
# データの読み込み
file_path = "C:/kenkyu/market_data/for_predict_usdjpy...
data = pd.read_csv(file_path, encoding="utf-8-sig", i...
print("Data loaded successfully. Shape:", data.shape)
print("Columns:", data.columns)
目的変数と説明変数を設定する.
この変数を変えるだけではうまくいかないので時系列ずらしか...
# 目的変数と説明変数の設定
y_target_name = 'USD/JPY'
if y_target_name in data.columns:
y_series_t = data[y_target_name].copy() # y(t) を...
X_df_t_others = data.drop(columns=[y_target_name]...
print(f"\nTarget variable '{y_target_name}' (y(t)...
print(f"Other features X_others(t) found. Shape: ...
else:
print(f"Error: Target column '{y_target_name}' no...
# 適切なエラー処理 (現在のコードと同様)
X_df_t_others = data.copy()
y_series_t = pd.Series(np.random.rand(len(data)),...
print("Using dummy y_series due to missing target...
# --- ラグ特徴量の生成 ---
# 1. 他の説明変数 X_others の t-1 の値を作成
X_df_t_minus_1_others = X_df_t_others.shift(1)
print("\nCreated lagged features for other variables ...
# 2. y の t-1 の値 (USD/JPY_lag1) を作成
y_series_t_minus_1 = y_series_t.shift(1).rename(f"{y_...
print(f"Created lagged target variable ({y_target_nam...
# 3. 全ての特徴量 (X_others(t-1) と y(t-1)) を結合し...
# ここで y_series_t_minus_1 (y(t-1)) が特徴量とし...
X_df_features_t_minus_1 = X_df_t_minus_1_others.join(...
print("Combined all lagged features (X_others(t-1) an...
if f"{y_target_name}_lag1" not in X_df_features_t_min...
print(f"Warning: {y_target_name}_lag1 was not suc...
# 4. y(t) と 全特徴量(t-1) を結合して、共通のインデッ...
# y_series_t は t 시점のまま (これが目的変数)
# X_df_features_t_minus_1 は t-1 시점の特徴量群
# これらを結合し、ラグ生成で発生したNaNを持つ行を除去
combined_df_for_regression = X_df_features_t_minus_1....
print(f"\nShape before dropping NaN from combined (Fe...
combined_df_for_regression.dropna(inplace=True)
print(f"Shape after dropping NaN: {combined_df_for_re...
if not combined_df_for_regression.empty and y_target_...
# X_df_final は y(t) を除いた全ての特徴量 (X_othe...
X_df_final = combined_df_for_regression.drop(colu...
# y_series_final は y(t)
y_series_final = combined_df_for_regression[y_tar...
# NumPy配列に変換
X_ts_lagged_with_y_lag = X_df_final.values
y_ts_aligned = y_series_final.values
print("\nData prepared for Symbolic Regression with l...
print(f"X_ts (features at t-1, including y(t-1)) shap...
print(f"y_ts (target at t) shape: {y_ts_aligned.shape...
print(f"Columns in X_df_final: {X_df_final.columns.to...
# --- 追加のデバッグプリント ---
print(f"Debug: Shape of X_ts_lagged_with_y_lag before...
print(f"Debug: Shape of y_ts_aligned before global as...
if X_ts_lagged_with_y_lag.shape[0] > 0:
print(f"Debug: First 5 rows of X_ts_lagged_with_y...
if y_ts_aligned.shape[0] > 0:
print(f"Debug: First 5 values of y_ts_aligned: \n...
# --- ここまで追加 ---
if X_ts_lagged_with_y_lag.shape[0] > 0 and y_ts_align...
print("\nFirst 5 rows of X_df_final (features...
display(X_df_final.head())
print("\nFirst 5 values of y_series_final (ta...
display(y_series_final.head())
# グローバル変数に x, y を設定して、後続のセ...
x = X_ts_lagged_with_y_lag # シンボリック回帰...
y = y_ts_aligned # シンボリック回帰モデルへ...
print("\nGlobal variables 'x' and 'y' have be...
else:
print("\nError: Not enough data after processing ...
x = np.array([])
y = np.array([])
print("Global variables 'x' and 'y' are empty.")
# 後続のプロットセルで日付インデックスを使うために X_...
if 'X_df_final' in locals() and 'y_series_final' in l...
X_df_aligned = X_df_final.copy() # X_df_final に...
y_series_aligned = y_series_final.copy() # y_seri...
print("\nUpdated X_df_aligned and y_series_aligne...
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_sc...
import matplotlib.pyplot as plt
import sympy as sp # sympyがインポートされていること...
import numpy as np # numpy をインポート
# グローバル変数 x, y が前のセルで X_ts_lagged, y_ts_...
if 'x' not in globals() or 'y' not in globals() or x....
print("Error: Input data x or y is not defined or...
sympy_expr_full = None
sympy_expr_train = None
else:
print(f"Using data for Symbolic Regression: x sha...
# 1. モデルの学習 (全データを使用)
print("\nFitting model on the entire dataset (lag...
est.fit(x, y)
print("Model fitting complete.")
# 2. 数式の取得と表示
replace_ops = {"add": "+", "mul": "*", "sub": "-"...
try:
model_str_full = est.retrieve_tree(with_infos...
for op, replace_op in replace_ops.items():
model_str_full = model_str_full.replace(o...
sympy_expr_full = sp.parse_expr(model_str_full)
print("\nRetrieved symbolic expression (train...
display(sympy_expr_full)
except Exception as e:
print(f"Error retrieving or parsing symbolic ...
sympy_expr_full = None
# 3. 評価指標の計算 (全データ)
if sympy_expr_full:
try:
num_features_full = x.shape[1]
variables_full = [sp.Symbol(f'x_{i}') for...
def safe_inv(x_val):
res = np.divide(1.0, x_val, out=np.fu...
return res
custom_modules = [{'inv': safe_inv}, 'num...
func_full = sp.lambdify(variables_full, s...
input_data_for_func_full = [x[:, i] for i...
y_pred_full = func_full(*input_data_for_f...
if isinstance(y_pred_full, (int, float)):
y_pred_full = np.full_like(y, y_pred_...
y_pred_full = np.asarray(y_pred_full).fla...
finite_mask_full = np.isfinite(y_pred_ful...
y_clean_full = y[finite_mask_full]
y_pred_clean_full = y_pred_full[finite_ma...
if len(y_pred_clean_full) > 0 and len(y_c...
if len(y_pred_clean_full) == len(y_cl...
mse_full = mean_squared_error(y_c...
rmse_full = np.sqrt(mse_full)
r2_full = r2_score(y_clean_full, ...
print(f"\n--- Evaluation on Entir...
print(f"RMSE (finite values only)...
print(f"R-squared (finite values ...
if np.sum(~finite_mask_full) > 0:
print(f"Note: {np.sum(~finite...
else:
print("Warning: Length mismatch a...
else:
print("No finite predictions availabl...
except Exception as e:
print(f"Error during metrics calculation ...
else:
print("Skipping metrics for full data as symb...
# --- 時系列を考慮した訓練/テスト分割 ---
split_ratio = 0.8
split_index = int(len(x) * split_ratio)
x_train_ts, x_test_ts = x[:split_index], x[split_...
y_train_ts, y_test_ts = y[:split_index], y[split_...
if 'X_df_aligned' in globals() and 'y_series_alig...
hasattr(X_df_aligned, 'index') and hasattr(y_seri...
len(X_df_aligned) == len(x) and len(y_series_alig...
train_dates = X_df_aligned.index[:split_index]
test_dates = X_df_aligned.index[split_index:]
if not train_dates.empty and not test_dates.e...
print(f"\nTraining data covers dates from...
print(f"Test data covers dates from {test...
else:
print("\nWarning: Date range for train/te...
train_dates = pd.RangeIndex(start=0, stop...
test_dates = pd.RangeIndex(start=len(x_tr...
else:
print("\nWarning: X_df_aligned or y_series_al...
train_dates = pd.RangeIndex(start=0, stop=len...
test_dates = pd.RangeIndex(start=len(x_train_...
print(f"\nTraining data shape: x_train_ts={x_trai...
print(f"Test data shape: x_test_ts={x_test_ts.sha...
if len(x_train_ts) > 0 and len(x_test_ts) > 0:
print("\nFitting model on training data (x_tr...
est.fit(x_train_ts, y_train_ts)
print("Model fitting on training data complet...
sympy_expr_train = None
func_train = None
try:
model_str_train = est.retrieve_tree(with_...
for op, replace_op in replace_ops.items():
model_str_train = model_str_train.rep...
sympy_expr_train = sp.parse_expr(model_st...
print(f"\nRetrieved symbolic expression (...
display(sympy_expr_train)
# --- 可読性向上処理ここから ---
from sympy import Float
def readable_expr_transform(expr, col_nam...
"""
Sympy数式を直接操作して、定数の丸めと...
"""
# 1. 数式内のすべての浮動小数点数を丸...
try:
replacements = {n: n.round(digits...
rounded_expr = expr.xreplace(repl...
except Exception as e:
print(f"Could not round floats: {...
rounded_expr = expr
# 2. x_i 形式の変数を実際の列名に置換...
try:
# インデックスの大きい順に置換(x...
subs_dict = {
sp.Symbol(f'x_{i}'): sp.Symbo...
for i, name in reversed(list(...
}
final_expr = rounded_expr.subs(su...
except Exception as e:
print(f"Could not substitute vari...
final_expr = rounded_expr
return final_expr
try:
if 'X_df_final' in globals() and symp...
# 変換処理を適用
simplified_expr = readable_expr_t...
print("\n--- 可読性向上後の式(簡...
display(simplified_expr)
print(sp.pretty(simplified_expr))
else:
print("Skipping readability impro...
except Exception as e:
print(f"可読性向上処理でエラー: {e}")
if 'sympy_expr_train' in locals():
print("Original expression:", sym...
# --- 可読性向上処理ここまで ---
num_features_train = x_train_ts.shape[1]
variables_train = [sp.Symbol(f'x_{i}') fo...
custom_modules_train = [{'inv': safe_inv}...
func_train = sp.lambdify(variables_train,...
print("Expression from training data lamb...
except Exception as e:
print(f"Error during expression retrieval...
# エラーが発生した場合、後続の処理が失敗...
func_train = None
sympy_expr_train = None
if func_train:
y_train_pred_ts = None
y_test_pred_ts = None
y_train_ts_clean = y_train_ts
y_train_pred_ts_clean = None
y_test_ts_clean = y_test_ts
y_test_pred_ts_clean = None
try:
if x_train_ts.shape[0] > 0:
input_data_train_func = [x_train_...
y_train_pred_ts = func_train(*inp...
if isinstance(y_train_pred_ts, (i...
y_train_pred_ts = np.full_lik...
y_train_pred_ts = np.asarray(y_tr...
finite_mask_train = np.isfinite(y...
y_train_ts_clean = y_train_ts[fin...
y_train_pred_ts_clean = y_train_p...
if len(y_train_pred_ts_clean) > 0...
mse_train = mean_squared_erro...
rmse_train = np.sqrt(mse_train)
r2_train = r2_score(y_train_t...
print(f"\n--- Evaluation on T...
print(f"RMSE (finite values o...
print(f"R-squared (finite val...
if np.sum(~finite_mask_train)...
print(f"Note: {np.sum(~fi...
else:
print("No finite predictions ...
except Exception as e:
print(f"Error predicting or calculati...
y_train_pred_ts = None
try:
if x_test_ts.shape[0] > 0:
input_data_test_func = [x_test_ts...
y_test_pred_ts = func_train(*inpu...
if isinstance(y_test_pred_ts, (in...
y_test_pred_ts = np.full_like...
y_test_pred_ts = np.asarray(y_tes...
finite_mask_test = np.isfinite(y_...
y_test_ts_clean = y_test_ts[finit...
y_test_pred_ts_clean = y_test_pre...
if len(y_test_pred_ts_clean) > 0 ...
mse_test = mean_squared_error...
rmse_test = np.sqrt(mse_test)
r2_test = r2_score(y_test_ts_...
print(f"\n--- Evaluation on T...
print(f"RMSE (finite values o...
print(f"R-squared (finite val...
if np.sum(~finite_mask_test) ...
print(f"Note: {np.sum(~fi...
else:
print("No finite predictions ...
except Exception as e:
print(f"Error predicting or calculati...
y_test_pred_ts = None
# 7. グラフの描画
plt.figure(figsize=(15, 7))
if len(train_dates) == len(y_train_ts):
plt.plot(train_dates, y_train_ts, lab...
if y_train_pred_ts is not None and len(tr...
plt.plot(train_dates, y_train_pred_ts...
if len(test_dates) == len(y_test_ts):
plt.plot(test_dates, y_test_ts, label...
if y_test_pred_ts is not None and len(tes...
plt.plot(test_dates, y_test_pred_ts, ...
plt.title('USD/JPY Prediction: Actual vs....
plt.xlabel('Date')
plt.ylabel('USD/JPY')
plt.legend()
plt.grid(True)
plt.show()
fig, axes = plt.subplots(1, 2, figsize=(1...
if y_train_pred_ts_clean is not None and ...
axes[0].scatter(y_train_ts_clean, y_t...
min_val_train = min(np.min(y_train_ts...
max_val_train = max(np.max(y_train_ts...
axes[0].plot([min_val_train, max_val_...
else:
axes[0].text(0.5, 0.5, "No valid trai...
axes[0].set_xlabel("Actual Values (Train)")
axes[0].set_ylabel("Predicted Values (Tra...
axes[0].set_title("Training Data: Actual ...
axes[0].legend()
axes[0].grid(True)
if y_test_pred_ts_clean is not None and l...
axes[1].scatter(y_test_ts_clean, y_te...
min_val_test = min(np.min(y_test_ts_c...
max_val_test = max(np.max(y_test_ts_c...
axes[1].plot([min_val_test, max_val_t...
else:
axes[1].text(0.5, 0.5, "No valid test...
axes[1].set_xlabel("Actual Values (Test)")
axes[1].set_ylabel("Predicted Values (Tes...
axes[1].set_title("Test Data: Actual vs. ...
axes[1].legend()
axes[1].grid(True)
plt.tight_layout()
plt.show()
else:
print("\nSkipping prediction and plotting...
else:
print("\nSkipping train/test split evaluation...
if 'sympy_expr_full' not in locals() or sympy_expr_fu...
print("\nSymbolic regression (full data) could no...
if 'sympy_expr_train' not in locals() or sympy_expr_t...
print("\nSymbolic regression (train data) could n...
#ref(kekka.png)
分析には以下のフォルダの中にあるExample.ipynbを実行すれば...
終了行:
[[技術資料]]
*AIによる数法則発見の時系列データへの拡張と金融データへの...
**もくじ [#q244e13f]
#CONTENTS
**必要なモジュール [#rfca96e2]
|必要なモジュール|バージョン|インストール方法| |必要なモ...
|pandas|1.5.3|pip install pandas==1.5.3| |numpy|1.23.5|p...
|torch|2.7.0+cu118|https://pytorch.org/get-started/locall...
|symbolicregression||https://github.com/facebookresearch/...
|IPython| || |sys|||
|os||| |sys|||
バージョンやインストール方法を記述していないものはおそら...
symbolic regression
**プログラム [#u7c030c5]
**実行手順 [#d5f974f5]
データの収集からはじめる.
データの収集には以下のプログラムを使う.
#ref(data.ipynb,,データ収集のプログラム)
***このプログラムではpythonのモジュールであるyfinanceを使...
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf
#investing.comから取得したデータ
#最新のデータダウンロードにはGoogle accountが必要
日本および米国の10年国債利回りなど一部のデータはyfinance...
https://jp.investing.com/ra...
jp_interest_rate = pd.read_csv("JP_InterestRate2Y.csv", ...
us_interest_rate = pd.read_csv("US_interestRate2Y.csv", ...
jp_10Y = pd.read_csv("JP_10Y_rimawari.csv", index_col=0,...
us_10Y = pd.read_csv("US_10Y_rimawari.csv", index_col=0,...
owarineJP = jp_interest_rate[["終値"]]
owarineUS = us_interest_rate[["終値"]]
aligned_data = owarineJP.join(owarineUS, how='inner',lsu...
aligned_data['金利差_JP-US'] = aligned_data['終値_JP'] -...
start_data = '2010-01-01'
end_data = '2025-10-01'
# S&P500、日経225、USD/JPYのデータを取得するためのコード
#アメリカの株式市場インデックスS&P500を取得
sp500_df = yf.download('^GSPC', start=start_data, end=en...
print("sp500_df columns:", sp500_df.columns)
if sp500_df.empty:
print("Warning: sp500_df is empty!")
sp500 = pd.Series(dtype=float) # 空のSeriesとして初...
else:
try:
# MultiIndexの 'Close' 列を指定
sp500 = sp500_df[('Close', '^GSPC')]
except KeyError:
print("KeyError: ('Close', '^GSPC') column not f...
sp500 = pd.Series(dtype=float) # エラー時は空のSe...
print("sp500 data (Series):")
print(sp500.head())
print(f"Type of sp500: {type(sp500)}")
if isinstance(sp500, pd.Series):
print(f"sp500 is a Series. Length: {len(sp500)}, Emp...
else:
print(f"sp500 is NOT a Series. Value: {sp500}")
#日本の株式市場インデックス日経225を取得
nikkei225_df = yf.download('^N225', start=start_data, en...
print("\nnikkei225_df columns:", nikkei225_df.columns)
if nikkei225_df.empty:
print("Warning: nikkei225_df is empty!")
nikkei225 = pd.Series(dtype=float)
else:
try:
# MultiIndexの 'Close' 列を指定
nikkei225 = nikkei225_df[('Close', '^N225')]
except KeyError:
print("KeyError: ('Close', '^N225') column not f...
nikkei225 = pd.Series(dtype=float)
print("\nnikkei225 data (Series):")
print(nikkei225.head())
print(f"Type of nikkei225: {type(nikkei225)}")
if isinstance(nikkei225, pd.Series):
print(f"nikkei225 is a Series. Length: {len(nikkei22...
else:
print(f"nikkei225 is NOT a Series. Value: {nikkei225...
#USD/JPYの為替レートを取得
usd_jpy_df = yf.download('JPY=X', start=start_data, end=...
print("\nusd_jpy_df columns:", usd_jpy_df.columns)
if usd_jpy_df.empty:
print("Warning: usd_jpy_df is empty!")
usd_jpy = pd.Series(dtype=float)
else:
try:
# MultiIndexの 'Close' 列を指定
usd_jpy = usd_jpy_df[('Close', 'JPY=X')]
except KeyError:
print("KeyError: ('Close', 'JPY=X') column not f...
usd_jpy = pd.Series(dtype=float)
print("\nusd_jpy data (Series):")
print(usd_jpy.head())
print(f"Type of usd_jpy: {type(usd_jpy)}")
if isinstance(usd_jpy, pd.Series):
print(f"usd_jpy is a Series. Length: {len(usd_jpy)},...
else:
print(f"usd_jpy is NOT a Series. Value: {usd_jpy}")
#データフレームに結合
# Seriesであることを確認し、空でないことを確認
is_sp500_valid = isinstance(sp500, pd.Series) and not sp...
is_nikkei225_valid = isinstance(nikkei225, pd.Series) an...
is_usd_jpy_valid = isinstance(usd_jpy, pd.Series) and no...
if is_sp500_valid and is_nikkei225_valid and is_usd_jpy_...
print("\nAll series are valid and non-empty. Attempt...
print(f"Debug - Type of sp500 before DataFrame: {typ...
print(f"Debug - Type of nikkei225 before DataFrame: ...
print(f"Debug - Type of usd_jpy before DataFrame: {t...
try:
combined_data = pd.DataFrame({
'SP500': sp500,
'Nikkei225': nikkei225,
'USD/JPY': usd_jpy,
}).dropna()
print("\nCombined Data Head:")
print(combined_data.head())
print("\nCombined Data Info:")
combined_data.info()
except ValueError as e:
print(f"\nValueError during DataFrame creation: ...
print("Please check the types and contents of sp...
except Exception as e:
print(f"\nAn unexpected error occurred during Da...
else:
print("\nError: One or more data series are not vali...
print(f"SP500 valid: {is_sp500_valid}, Nikkei225 val...
import pandas as pd
import yfinance as yf
# コモディティデータの取得とデータフレームへの格納
start_date_commodity = '2015-01-01'
end_date_commodity = '2025-01-01'
# WTI原油価格と金価格のティッカーシンボル
commodity_tickers = ['CL=F', 'GC=F'] # CL=F はWTI原油先...
# データを一度にダウンロード
raw_commodity_data = yf.download(commodity_tickers, star...
if not raw_commodity_data.empty:
# 'Close' 価格のみを抽出
# yf.download に複数のティッカーを渡すと、列がMultiI...
# raw_commodity_data['Close'] で各ティッカーの終値の...
commodity_prices = raw_commodity_data['Close']
# 列名を分かりやすいものに変更
commodity_prices = commodity_prices.rename(columns={'...
# NaNを含む行を削除 (いずれかのデータが存在しない日を...
commodity_prices = commodity_prices.dropna()
print("\nCommodity Data Head:")
print(commodity_prices.head())
print("\nCommodity Data Info:")
commodity_prices.info()
# 完成したデータフレームを表示
commodity_data = commodity_prices
print("\nFinal Commodity DataFrame:")
print(commodity_data.head())
else:
print("コモディティデータのダウンロードに失敗しました...
commodity_data = pd.DataFrame() # 空のDataFrameを作成
# 変数 commodity_data を次のセルで使えるようにする (Jupy...
commodity_data
owarineJP10Y = jp_10Y[["終値"]]
owarineUS10Y = us_10Y[["終値"]]
aligned_data10Y = owarineJP10Y.join(owarineUS10Y, how='...
aligned_data10Y['金利差_JP-US_10Y'] = aligned_data10Y['...
# 金利差が計算されたデータを確認
print("\nAligned Data with Interest Rate Difference Head...
print(aligned_data10Y.head())
print("\nAligned Data with Interest Rate Difference Info...
InterestRateDifferentials10Y = aligned_data10Y[["終値_JP...
InterestRateDifferentials10Y
vix = pd.read_csv("VIX.csv", index_col=0, parse_dates=Tr...
vix = vix["終値"]
import pandas as pd
# 各データフレームの準備と列名の調整
# VIXデータ (SeriesからDataFrameへ変換し、列名を'VIX'に)
if isinstance(vix, pd.Series):
vix_df = vix.to_frame(name='VIX')
else: # もしvixが既にDataFrameで、適切な列名がついている...
# 必要であればここで列名を確認・変更してください
vix_df = vix
# 2年債金利差の列名を変更
InterestRateDifferentials = InterestRateDifferentials.re...
columns={'金利差_JP-US': '金利差_JP-US_2Y'}
)
# 10年債金利関連の列名を変更
InterestRateDifferentials10Y = InterestRateDifferentials...
columns={
'終値_JP': 'JP_10Y_Yield',
'終値_US': 'US_10Y_Yield',
'金利差_JP-US_10Y': '金利差_JP-US_10Y' # こちら...
}
)
# 結合するデータフレームのリストを作成
# combined_data: 'SP500', 'Nikkei225', 'USD/JPY'
# InterestRateDifferentials: '金利差_JP-US_2Y'
# InterestRateDifferentials10Y: 'JP_10Y_Yield', 'US_10Y_...
# commodity_data: 'WTI_Oil', 'Gold'
# vix_df: 'VIX'
dataframes_to_join = [
combined_data, # combined_data には 'USD/JPY' が含ま...
InterestRateDifferentials,
InterestRateDifferentials10Y,
commodity_data,
vix_df
]
# pd.concat を使用して、共通のインデックス(日付)で内部...
# axis=1 は列方向に結合することを意味します
# join='inner' はすべてのデータフレームに存在するインデ...
for_predict_usdjpy = pd.concat(dataframes_to_join, axis=...
# 'USD/JPY' 列を削除する処理を削除またはコメントアウト
# if 'USD/JPY' in for_predict_usdjpy.columns:
# for_predict_usdjpy = for_predict_usdjpy.drop(colum...
# print("\n'USD/JPY' column has been dropped.")
# else:
# print("\n'USD/JPY' column not found in the DataFra...
# 結果の確認
print("Combined DataFrame for Prediction (for_predict_us...
print(for_predict_usdjpy.head())
print("\nCombined DataFrame for Prediction (for_predict_...
for_predict_usdjpy.info()
print("\nCombined DataFrame for Prediction (for_predict_...
print(for_predict_usdjpy.describe())
# 完成したデータフレームを表示
for_predict_usdjpy
#ref(dataframe.png)
***用いるデータ [#rfca96e2]
|データ項目|時間足|ダウンロード方法| |データ項目|時間足|...
|SP500|日足|yfinanceによって取得.データ収集のプログラム...
|USD/JPY|日足|yfinanceによって取得,データ収集のプログラ...
|symbolicregression||https://github.com/facebookresearch/...
|IPython| || |sys|||
|os||| |sys|||
***分析のプログラム [#pa324f5e]
import torch
import numpy as np
import sympy as sp
import os, sys
import symbolicregression
import requests
import pandas as pd
import pathlib
from IPython.display import display
from sklearn.metrics import mean_squared_error
import pathlib
model_path = "model.pt"
try:
if not os.path.isfile(model_path):
url = "https://dl.fbaipublicfiles.com/symboli...
r = requests.get(url, allow_redirects=True)
open(model_path, 'wb').write(r.content)
_posix_path_backup = None
_posix_path_created = False # Flag to track if Po...
if sys.platform == "win32": # Apply patch only on...
if hasattr(pathlib, 'PosixPath'):
_posix_path_backup = pathlib.PosixPath # ...
pathlib.PosixPath = pathlib.WindowsPath #...
else:
# If pathlib.PosixPath does not exist, cr...
# as the pickled object might refer to 'p...
pathlib.PosixPath = pathlib.WindowsPath
_posix_path_created = True
try: # Inner try for torch.load, which needs the ...
if not torch.cuda.is_available():
model = torch.load(model_path, map_locati...
else:
model = torch.load(model_path, weights_on...
model = model.cuda()
print(model.device)
print("Model successfully loaded!")
finally: # This finally block ensures the patch i...
if sys.platform == "win32":
if _posix_path_backup is not None:
pathlib.PosixPath = _posix_path_backu...
elif _posix_path_created:
delattr(pathlib, 'PosixPath') # Remov...
except Exception as e:
print("ERROR: model not loaded! path was: {}".for...
print(e)
ここの数字をいじれば精度が変わります.
色々動かしてみたところこれくらいが計算速度,精度ともに良...
対象によっては色々変えてみることを推奨します.
est = symbolicregression.model.SymbolicTransformerReg...
model=model,
max_input_points=2000,
n_trees_to_refine=5000,
rescale=True
)
data = pd.read_csv("C:/kenkyu/market_data/for_predict...
import pandas as pd
import numpy as np
# データの読み込み
file_path = "C:/kenkyu/market_data/for_predict_usdjpy...
data = pd.read_csv(file_path, encoding="utf-8-sig", i...
print("Data loaded successfully. Shape:", data.shape)
print("Columns:", data.columns)
# 目的変数と説明変数の設定
y_target_name = 'USD/JPY'
if y_target_name in data.columns:
y_series = data[y_target_name]
X_df = data.drop(columns=[y_target_name])
print(f"\nTarget variable '{y_target_name}' found...
print(f"Shape of X_df (features): {X_df.shape}")
print(f"Shape of y_series (target): {y_series.sha...
else:
print(f"Error: Target column '{y_target_name}' no...
print("Please ensure 'USD/JPY' column exists in f...
# エラーが発生した場合、後続の処理が失敗するため...
# この例では、処理を進められるようにダミーデータ...
X_df = data.copy() # 全データをXとする(不適切だ...
y_series = pd.Series(np.random.rand(len(data)), i...
print("Using dummy y_series due to missing target...
# NaN値の確認と処理 (もしあれば)
# X_df と y_series を結合して NaN を一括で処理し、再...
temp_combined_df = X_df.join(y_series, how='inner') #...
if temp_combined_df.isnull().values.any():
print("\nNaN values found. Dropping rows with any...
rows_before_dropna = len(temp_combined_df)
temp_combined_df.dropna(inplace=True)
rows_after_dropna = len(temp_combined_df)
print(f"Dropped {rows_before_dropna - rows_after_...
else:
print("\nNo NaN values found in the combined X an...
# NaN処理後のデータで再度Xとyを定義
if not temp_combined_df.empty and y_target_name in te...
X_df_aligned = temp_combined_df.drop(columns=[y_t...
y_series_aligned = temp_combined_df[y_target_name]
else:
print("Error: Data became empty after NaN handlin...
# Fallback to potentially problematic data to avo...
X_df_aligned = X_df
y_series_aligned = y_series
# NumPy配列に変換
X_ts = X_df_aligned.values
y_ts = y_series_aligned.values
print("\nData prepared for Symbolic Regression:")
print(f"X_ts shape: {X_ts.shape}")
print(f"y_ts shape: {y_ts.shape}")
if X_ts.shape[0] > 0 and y_ts.shape[0] > 0:
print("\nFirst 5 rows of X_df_aligned (features):")
print(X_df_aligned.head())
print("\nFirst 5 values of y_series_aligned (targ...
print(y_series_aligned.head())
# グローバル変数に x, y を設定して、後続のセルで...
x = X_ts
y = y_ts
print("\nGlobal variables 'x' and 'y' have been s...
else:
print("\nError: Not enough data after processing....
# グローバル変数を空の配列などで初期化するか、エ...
x = np.array([])
y = np.array([])
print("Global variables 'x' and 'y' are empty due...
import pandas as pd
import numpy as np
# データの読み込み
file_path = "C:/kenkyu/market_data/for_predict_usdjpy...
data = pd.read_csv(file_path, encoding="utf-8-sig", i...
print("Data loaded successfully. Shape:", data.shape)
print("Columns:", data.columns)
目的変数と説明変数を設定する.
この変数を変えるだけではうまくいかないので時系列ずらしか...
# 目的変数と説明変数の設定
y_target_name = 'USD/JPY'
if y_target_name in data.columns:
y_series_t = data[y_target_name].copy() # y(t) を...
X_df_t_others = data.drop(columns=[y_target_name]...
print(f"\nTarget variable '{y_target_name}' (y(t)...
print(f"Other features X_others(t) found. Shape: ...
else:
print(f"Error: Target column '{y_target_name}' no...
# 適切なエラー処理 (現在のコードと同様)
X_df_t_others = data.copy()
y_series_t = pd.Series(np.random.rand(len(data)),...
print("Using dummy y_series due to missing target...
# --- ラグ特徴量の生成 ---
# 1. 他の説明変数 X_others の t-1 の値を作成
X_df_t_minus_1_others = X_df_t_others.shift(1)
print("\nCreated lagged features for other variables ...
# 2. y の t-1 の値 (USD/JPY_lag1) を作成
y_series_t_minus_1 = y_series_t.shift(1).rename(f"{y_...
print(f"Created lagged target variable ({y_target_nam...
# 3. 全ての特徴量 (X_others(t-1) と y(t-1)) を結合し...
# ここで y_series_t_minus_1 (y(t-1)) が特徴量とし...
X_df_features_t_minus_1 = X_df_t_minus_1_others.join(...
print("Combined all lagged features (X_others(t-1) an...
if f"{y_target_name}_lag1" not in X_df_features_t_min...
print(f"Warning: {y_target_name}_lag1 was not suc...
# 4. y(t) と 全特徴量(t-1) を結合して、共通のインデッ...
# y_series_t は t 시점のまま (これが目的変数)
# X_df_features_t_minus_1 は t-1 시점の特徴量群
# これらを結合し、ラグ生成で発生したNaNを持つ行を除去
combined_df_for_regression = X_df_features_t_minus_1....
print(f"\nShape before dropping NaN from combined (Fe...
combined_df_for_regression.dropna(inplace=True)
print(f"Shape after dropping NaN: {combined_df_for_re...
if not combined_df_for_regression.empty and y_target_...
# X_df_final は y(t) を除いた全ての特徴量 (X_othe...
X_df_final = combined_df_for_regression.drop(colu...
# y_series_final は y(t)
y_series_final = combined_df_for_regression[y_tar...
# NumPy配列に変換
X_ts_lagged_with_y_lag = X_df_final.values
y_ts_aligned = y_series_final.values
print("\nData prepared for Symbolic Regression with l...
print(f"X_ts (features at t-1, including y(t-1)) shap...
print(f"y_ts (target at t) shape: {y_ts_aligned.shape...
print(f"Columns in X_df_final: {X_df_final.columns.to...
# --- 追加のデバッグプリント ---
print(f"Debug: Shape of X_ts_lagged_with_y_lag before...
print(f"Debug: Shape of y_ts_aligned before global as...
if X_ts_lagged_with_y_lag.shape[0] > 0:
print(f"Debug: First 5 rows of X_ts_lagged_with_y...
if y_ts_aligned.shape[0] > 0:
print(f"Debug: First 5 values of y_ts_aligned: \n...
# --- ここまで追加 ---
if X_ts_lagged_with_y_lag.shape[0] > 0 and y_ts_align...
print("\nFirst 5 rows of X_df_final (features...
display(X_df_final.head())
print("\nFirst 5 values of y_series_final (ta...
display(y_series_final.head())
# グローバル変数に x, y を設定して、後続のセ...
x = X_ts_lagged_with_y_lag # シンボリック回帰...
y = y_ts_aligned # シンボリック回帰モデルへ...
print("\nGlobal variables 'x' and 'y' have be...
else:
print("\nError: Not enough data after processing ...
x = np.array([])
y = np.array([])
print("Global variables 'x' and 'y' are empty.")
# 後続のプロットセルで日付インデックスを使うために X_...
if 'X_df_final' in locals() and 'y_series_final' in l...
X_df_aligned = X_df_final.copy() # X_df_final に...
y_series_aligned = y_series_final.copy() # y_seri...
print("\nUpdated X_df_aligned and y_series_aligne...
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_sc...
import matplotlib.pyplot as plt
import sympy as sp # sympyがインポートされていること...
import numpy as np # numpy をインポート
# グローバル変数 x, y が前のセルで X_ts_lagged, y_ts_...
if 'x' not in globals() or 'y' not in globals() or x....
print("Error: Input data x or y is not defined or...
sympy_expr_full = None
sympy_expr_train = None
else:
print(f"Using data for Symbolic Regression: x sha...
# 1. モデルの学習 (全データを使用)
print("\nFitting model on the entire dataset (lag...
est.fit(x, y)
print("Model fitting complete.")
# 2. 数式の取得と表示
replace_ops = {"add": "+", "mul": "*", "sub": "-"...
try:
model_str_full = est.retrieve_tree(with_infos...
for op, replace_op in replace_ops.items():
model_str_full = model_str_full.replace(o...
sympy_expr_full = sp.parse_expr(model_str_full)
print("\nRetrieved symbolic expression (train...
display(sympy_expr_full)
except Exception as e:
print(f"Error retrieving or parsing symbolic ...
sympy_expr_full = None
# 3. 評価指標の計算 (全データ)
if sympy_expr_full:
try:
num_features_full = x.shape[1]
variables_full = [sp.Symbol(f'x_{i}') for...
def safe_inv(x_val):
res = np.divide(1.0, x_val, out=np.fu...
return res
custom_modules = [{'inv': safe_inv}, 'num...
func_full = sp.lambdify(variables_full, s...
input_data_for_func_full = [x[:, i] for i...
y_pred_full = func_full(*input_data_for_f...
if isinstance(y_pred_full, (int, float)):
y_pred_full = np.full_like(y, y_pred_...
y_pred_full = np.asarray(y_pred_full).fla...
finite_mask_full = np.isfinite(y_pred_ful...
y_clean_full = y[finite_mask_full]
y_pred_clean_full = y_pred_full[finite_ma...
if len(y_pred_clean_full) > 0 and len(y_c...
if len(y_pred_clean_full) == len(y_cl...
mse_full = mean_squared_error(y_c...
rmse_full = np.sqrt(mse_full)
r2_full = r2_score(y_clean_full, ...
print(f"\n--- Evaluation on Entir...
print(f"RMSE (finite values only)...
print(f"R-squared (finite values ...
if np.sum(~finite_mask_full) > 0:
print(f"Note: {np.sum(~finite...
else:
print("Warning: Length mismatch a...
else:
print("No finite predictions availabl...
except Exception as e:
print(f"Error during metrics calculation ...
else:
print("Skipping metrics for full data as symb...
# --- 時系列を考慮した訓練/テスト分割 ---
split_ratio = 0.8
split_index = int(len(x) * split_ratio)
x_train_ts, x_test_ts = x[:split_index], x[split_...
y_train_ts, y_test_ts = y[:split_index], y[split_...
if 'X_df_aligned' in globals() and 'y_series_alig...
hasattr(X_df_aligned, 'index') and hasattr(y_seri...
len(X_df_aligned) == len(x) and len(y_series_alig...
train_dates = X_df_aligned.index[:split_index]
test_dates = X_df_aligned.index[split_index:]
if not train_dates.empty and not test_dates.e...
print(f"\nTraining data covers dates from...
print(f"Test data covers dates from {test...
else:
print("\nWarning: Date range for train/te...
train_dates = pd.RangeIndex(start=0, stop...
test_dates = pd.RangeIndex(start=len(x_tr...
else:
print("\nWarning: X_df_aligned or y_series_al...
train_dates = pd.RangeIndex(start=0, stop=len...
test_dates = pd.RangeIndex(start=len(x_train_...
print(f"\nTraining data shape: x_train_ts={x_trai...
print(f"Test data shape: x_test_ts={x_test_ts.sha...
if len(x_train_ts) > 0 and len(x_test_ts) > 0:
print("\nFitting model on training data (x_tr...
est.fit(x_train_ts, y_train_ts)
print("Model fitting on training data complet...
sympy_expr_train = None
func_train = None
try:
model_str_train = est.retrieve_tree(with_...
for op, replace_op in replace_ops.items():
model_str_train = model_str_train.rep...
sympy_expr_train = sp.parse_expr(model_st...
print(f"\nRetrieved symbolic expression (...
display(sympy_expr_train)
# --- 可読性向上処理ここから ---
from sympy import Float
def readable_expr_transform(expr, col_nam...
"""
Sympy数式を直接操作して、定数の丸めと...
"""
# 1. 数式内のすべての浮動小数点数を丸...
try:
replacements = {n: n.round(digits...
rounded_expr = expr.xreplace(repl...
except Exception as e:
print(f"Could not round floats: {...
rounded_expr = expr
# 2. x_i 形式の変数を実際の列名に置換...
try:
# インデックスの大きい順に置換(x...
subs_dict = {
sp.Symbol(f'x_{i}'): sp.Symbo...
for i, name in reversed(list(...
}
final_expr = rounded_expr.subs(su...
except Exception as e:
print(f"Could not substitute vari...
final_expr = rounded_expr
return final_expr
try:
if 'X_df_final' in globals() and symp...
# 変換処理を適用
simplified_expr = readable_expr_t...
print("\n--- 可読性向上後の式(簡...
display(simplified_expr)
print(sp.pretty(simplified_expr))
else:
print("Skipping readability impro...
except Exception as e:
print(f"可読性向上処理でエラー: {e}")
if 'sympy_expr_train' in locals():
print("Original expression:", sym...
# --- 可読性向上処理ここまで ---
num_features_train = x_train_ts.shape[1]
variables_train = [sp.Symbol(f'x_{i}') fo...
custom_modules_train = [{'inv': safe_inv}...
func_train = sp.lambdify(variables_train,...
print("Expression from training data lamb...
except Exception as e:
print(f"Error during expression retrieval...
# エラーが発生した場合、後続の処理が失敗...
func_train = None
sympy_expr_train = None
if func_train:
y_train_pred_ts = None
y_test_pred_ts = None
y_train_ts_clean = y_train_ts
y_train_pred_ts_clean = None
y_test_ts_clean = y_test_ts
y_test_pred_ts_clean = None
try:
if x_train_ts.shape[0] > 0:
input_data_train_func = [x_train_...
y_train_pred_ts = func_train(*inp...
if isinstance(y_train_pred_ts, (i...
y_train_pred_ts = np.full_lik...
y_train_pred_ts = np.asarray(y_tr...
finite_mask_train = np.isfinite(y...
y_train_ts_clean = y_train_ts[fin...
y_train_pred_ts_clean = y_train_p...
if len(y_train_pred_ts_clean) > 0...
mse_train = mean_squared_erro...
rmse_train = np.sqrt(mse_train)
r2_train = r2_score(y_train_t...
print(f"\n--- Evaluation on T...
print(f"RMSE (finite values o...
print(f"R-squared (finite val...
if np.sum(~finite_mask_train)...
print(f"Note: {np.sum(~fi...
else:
print("No finite predictions ...
except Exception as e:
print(f"Error predicting or calculati...
y_train_pred_ts = None
try:
if x_test_ts.shape[0] > 0:
input_data_test_func = [x_test_ts...
y_test_pred_ts = func_train(*inpu...
if isinstance(y_test_pred_ts, (in...
y_test_pred_ts = np.full_like...
y_test_pred_ts = np.asarray(y_tes...
finite_mask_test = np.isfinite(y_...
y_test_ts_clean = y_test_ts[finit...
y_test_pred_ts_clean = y_test_pre...
if len(y_test_pred_ts_clean) > 0 ...
mse_test = mean_squared_error...
rmse_test = np.sqrt(mse_test)
r2_test = r2_score(y_test_ts_...
print(f"\n--- Evaluation on T...
print(f"RMSE (finite values o...
print(f"R-squared (finite val...
if np.sum(~finite_mask_test) ...
print(f"Note: {np.sum(~fi...
else:
print("No finite predictions ...
except Exception as e:
print(f"Error predicting or calculati...
y_test_pred_ts = None
# 7. グラフの描画
plt.figure(figsize=(15, 7))
if len(train_dates) == len(y_train_ts):
plt.plot(train_dates, y_train_ts, lab...
if y_train_pred_ts is not None and len(tr...
plt.plot(train_dates, y_train_pred_ts...
if len(test_dates) == len(y_test_ts):
plt.plot(test_dates, y_test_ts, label...
if y_test_pred_ts is not None and len(tes...
plt.plot(test_dates, y_test_pred_ts, ...
plt.title('USD/JPY Prediction: Actual vs....
plt.xlabel('Date')
plt.ylabel('USD/JPY')
plt.legend()
plt.grid(True)
plt.show()
fig, axes = plt.subplots(1, 2, figsize=(1...
if y_train_pred_ts_clean is not None and ...
axes[0].scatter(y_train_ts_clean, y_t...
min_val_train = min(np.min(y_train_ts...
max_val_train = max(np.max(y_train_ts...
axes[0].plot([min_val_train, max_val_...
else:
axes[0].text(0.5, 0.5, "No valid trai...
axes[0].set_xlabel("Actual Values (Train)")
axes[0].set_ylabel("Predicted Values (Tra...
axes[0].set_title("Training Data: Actual ...
axes[0].legend()
axes[0].grid(True)
if y_test_pred_ts_clean is not None and l...
axes[1].scatter(y_test_ts_clean, y_te...
min_val_test = min(np.min(y_test_ts_c...
max_val_test = max(np.max(y_test_ts_c...
axes[1].plot([min_val_test, max_val_t...
else:
axes[1].text(0.5, 0.5, "No valid test...
axes[1].set_xlabel("Actual Values (Test)")
axes[1].set_ylabel("Predicted Values (Tes...
axes[1].set_title("Test Data: Actual vs. ...
axes[1].legend()
axes[1].grid(True)
plt.tight_layout()
plt.show()
else:
print("\nSkipping prediction and plotting...
else:
print("\nSkipping train/test split evaluation...
if 'sympy_expr_full' not in locals() or sympy_expr_fu...
print("\nSymbolic regression (full data) could no...
if 'sympy_expr_train' not in locals() or sympy_expr_t...
print("\nSymbolic regression (train data) could n...
#ref(kekka.png)
分析には以下のフォルダの中にあるExample.ipynbを実行すれば...
ページ名: