From a25cc3b5d64d79c9befd4c6f3a8144ffe68a0dee Mon Sep 17 00:00:00 2001 From: Yvictor <410175015@gms.ndhu.edu.tw> Date: Thu, 7 Dec 2017 15:40:34 +0800 Subject: [PATCH 1/7] support matplotlib show=false render and still plot the fig --- trading_env/envs/backtest_v1.py | 12 +++++++----- trading_env/envs/training_v1.py | 12 +++++++----- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/trading_env/envs/backtest_v1.py b/trading_env/envs/backtest_v1.py index 5306319..72b7860 100644 --- a/trading_env/envs/backtest_v1.py +++ b/trading_env/envs/backtest_v1.py @@ -339,7 +339,7 @@ def _plot_trading(self): c=trade_color_sell, edgecolors=(0,1,0,0.9), zorder=2) - def render(self, save=False): + def render(self, save=False, show=True): if self.render_on == 0: matplotlib.style.use('dark_background') self.render_on = 1 @@ -363,9 +363,10 @@ def render(self, save=False): self._plot_trading() self.ax.set_xlim(0,len(self.price[:self.step_st+self.obs_len])+200) - plt.ion() - #self.fig.tight_layout() - plt.show() + if show: + plt.ion() + #self.fig.tight_layout() + plt.show() if save: self.fig.savefig('fig/%s.png' % str(self.t_index)) @@ -387,4 +388,5 @@ def render(self, save=False): self.ax.set_xlim(0,len(self.price[:self.step_st+self.obs_len])+200) if save: self.fig.savefig('fig/%s.png' % str(self.t_index)) - plt.pause(0.0001) \ No newline at end of file + if show: + plt.pause(0.0001) \ No newline at end of file diff --git a/trading_env/envs/training_v1.py b/trading_env/envs/training_v1.py index 88caf6e..bf46e6e 100644 --- a/trading_env/envs/training_v1.py +++ b/trading_env/envs/training_v1.py @@ -330,7 +330,7 @@ def _plot_trading(self): c=trade_color_sell, edgecolors=(0,1,0,0.9), zorder=2) - def render(self, save=False): + def render(self, save=False, show=True): if self.render_on == 0: matplotlib.style.use('dark_background') self.render_on = 1 @@ -354,9 +354,10 @@ def render(self, save=False): self._plot_trading() self.ax.set_xlim(0,len(self.price[:self.step_st+self.obs_len])+200) - plt.ion() - #self.fig.tight_layout() - plt.show() + if show: + plt.ion() + #self.fig.tight_layout() + plt.show() if save: self.fig.savefig('fig/%s.png' % str(self.t_index)) @@ -378,6 +379,7 @@ def render(self, save=False): self.ax.set_xlim(0,len(self.price[:self.step_st+self.obs_len])+200) if save: self.fig.savefig('fig/%s.png' % str(self.t_index)) - plt.pause(0.0001) + if show: + plt.pause(0.0001) From 7d75cc1b65da9678c0ec8505c3041e6ba08fb631 Mon Sep 17 00:00:00 2001 From: Yvictor <410175015@gms.ndhu.edu.tw> Date: Mon, 11 Dec 2017 02:08:17 +0800 Subject: [PATCH 2/7] customize return transaction into state --- trading_env/__init__.py | 25 +++++++++ trading_env/envs/backtest_v1.py | 66 ++++++++++++++++------- trading_env/envs/training_v1.py | 93 +++++++++++++++++++++++---------- trading_env/test/__main__.py | 4 ++ 4 files changed, 142 insertions(+), 46 deletions(-) diff --git a/trading_env/__init__.py b/trading_env/__init__.py index 75a205c..d4157c3 100644 --- a/trading_env/__init__.py +++ b/trading_env/__init__.py @@ -8,7 +8,31 @@ def available_envs(): def make(env_id, obs_data_len, step_len, df, fee, deal_col_name='price', feature_names=['price', 'volume'], + return_transaction=False, *args , **kwargs): + """ + v0: currently not maintain + + v1: + # assert df + # need deal price as essential and specified the df format + # obs_data_leng -> observation data length + # step_len -> when call step rolling windows will + step_len + # df -> dataframe that contain data for trading(format as...) + # price + # datetime + # serial_number -> serial num of deal at each day recalculating + + # fee : when each deal will pay the fee, set with your product + # max_position : the max market position for you trading share + # deal_col_name : the column name for cucalate reward used. + # feature_names : list contain the feature columns to use in trading status. + # return_transaction : bool, list or dict default False + True will return all column include 'mkt_pos', 'mkt_pos_var', 'entry_cover', 'avg_hold_price', 'fluc_reward', 'make_real', 'reward' + use list to define which column to return + use dict to define which column to return and dict value as the func apply to array + # ?day trade option set as default if don't use this need modify + """ envs = available_envs() assert env_id in envs , "env_id: {} not exist. try one of {}".format(env_id, str(envs).strip('[]')) assert deal_col_name in df.columns, "deal_col not in Dataframe please define the correct column name of which column want to calculate the profit." @@ -22,5 +46,6 @@ def make(env_id, obs_data_len, step_len, step_len=step_len ,df=df, fee=fee, deal_col_name=deal_col_name, feature_names=feature_names, + return_transaction=return_transaction, *args, **kwargs) return env diff --git a/trading_env/envs/backtest_v1.py b/trading_env/envs/backtest_v1.py index 72b7860..e2d121b 100644 --- a/trading_env/envs/backtest_v1.py +++ b/trading_env/envs/backtest_v1.py @@ -14,7 +14,7 @@ class trading_env: def __init__(self, env_id, obs_data_len, step_len, df, fee, max_position=5, deal_col_name='price', feature_names=['price', 'volume'], - return_transaction=True, + return_transaction=False, fluc_div=100.0, gameover_limit=5, *args, **kwargs): """ @@ -117,17 +117,35 @@ def reset(self): self.obs_reward_fluctuant = self.reward_fluctuant_arr[self.step_st: self.step_st+self.obs_len] self.obs_makereal = self.reward_makereal_arr[self.step_st: self.step_st+self.obs_len] self.obs_reward = self.reward_arr[self.step_st: self.step_st+self.obs_len] + + self.transaction_dict = {'mkt_pos': self.obs_posi[:, np.newaxis], + 'mkt_pos_var': self.obs_posi_var[:, np.newaxis], + 'entry_cover': self.obs_posi_entry_cover[:, np.newaxis], + 'avg_hold_price': self.obs_price_mean[:, np.newaxis], + 'fluc_reward': self.obs_reward_fluctuant[:, np.newaxis], + 'make_real': self.obs_makereal[:, np.newaxis], + 'reward': self.obs_reward[:, np.newaxis],} if self.return_transaction: - self.obs_return = np.concatenate((self.obs_state, - self.obs_posi[:, np.newaxis], - self.obs_posi_var[:, np.newaxis], - self.obs_posi_entry_cover[:, np.newaxis], - self.obs_price[:, np.newaxis], - self.obs_price_mean[:, np.newaxis], - self.obs_reward_fluctuant[:, np.newaxis], - self.obs_makereal[:, np.newaxis], - self.obs_reward[:, np.newaxis]), axis=1) + if isinstance(self.return_transaction, bool): + self.obs_return = np.concatenate((self.obs_state, + self.obs_posi[:, np.newaxis], + self.obs_posi_var[:, np.newaxis], + self.obs_posi_entry_cover[:, np.newaxis], + self.obs_price_mean[:, np.newaxis], + self.obs_reward_fluctuant[:, np.newaxis], + self.obs_makereal[:, np.newaxis], + self.obs_reward[:, np.newaxis]), axis=1) + elif isinstance(self.return_transaction, list): + self.obs_return = np.concatenate((self.obs_state,)+ \ + tuple(self.transaction_dict[need] \ + for need in self.return_transaction), + axis=1) + elif isinstance(self.return_transaction, dict): + self.obs_return = np.concatenate((self.obs_state,)+ \ + tuple(self.return_transaction[need](self.transaction_dict[need]) \ + for need in self.return_transaction), + axis=1) else: self.obs_return = self.obs_state @@ -265,15 +283,25 @@ def step(self, action): self.chg_reward_fluctuant[:] = (self.chg_price - self.chg_price_mean)*self.chg_posi - np.abs(self.chg_posi)*self.fee if self.return_transaction: - self.obs_return = np.concatenate((self.obs_state, - self.obs_posi[:, np.newaxis], - self.obs_posi_var[:, np.newaxis], - self.obs_posi_entry_cover[:, np.newaxis], - self.obs_price[:, np.newaxis], - self.obs_price_mean[:, np.newaxis], - self.obs_reward_fluctuant[:, np.newaxis], - self.obs_makereal[:, np.newaxis], - self.obs_reward[:, np.newaxis]), axis=1) + if isinstance(self.return_transaction, bool): + self.obs_return = np.concatenate((self.obs_state, + self.obs_posi[:, np.newaxis], + self.obs_posi_var[:, np.newaxis], + self.obs_posi_entry_cover[:, np.newaxis], + self.obs_price_mean[:, np.newaxis], + self.obs_reward_fluctuant[:, np.newaxis], + self.obs_makereal[:, np.newaxis], + self.obs_reward[:, np.newaxis]), axis=1) + elif isinstance(self.return_transaction, list): + self.obs_return = np.concatenate((self.obs_state,)+ \ + tuple(self.transaction_dict[need] \ + for need in self.return_transaction), + axis=1) + elif isinstance(self.return_transaction, dict): + self.obs_return = np.concatenate((self.obs_state,)+ \ + tuple(self.return_transaction[need](self.transaction_dict[need]) \ + for need in self.return_transaction), + axis=1) else: self.obs_return = self.obs_state diff --git a/trading_env/envs/training_v1.py b/trading_env/envs/training_v1.py index bf46e6e..b898849 100644 --- a/trading_env/envs/training_v1.py +++ b/trading_env/envs/training_v1.py @@ -13,8 +13,8 @@ class trading_env: def __init__(self, env_id, obs_data_len, step_len, df, fee, max_position=5, deal_col_name='price', feature_names=['price', 'volume'], - return_transaction=True, - fluc_div=100.0, gameover_limit=5, + return_transaction=False, + #fluc_div=100.0, gameover_limit=5, *args, **kwargs): """ #assert df @@ -52,16 +52,14 @@ def __init__(self, env_id, obs_data_len, step_len, self.fee = fee self.max_position = max_position - self.fluc_div = fluc_div - self.gameover = gameover_limit + #self.gameover = gameover_limit self.return_transaction = return_transaction self.begin_fs = self.df[self.df['serial_number']==0] self.date_leng = len(self.begin_fs) self.render_on = 0 - self.buy_color, self.sell_color = (1, 2) - self.new_rotation, self.cover_rotation = (1, 2) + self.transaction_details = pd.DataFrame() self.logger.info('Making new env: {}'.format(env_id)) @@ -109,16 +107,43 @@ def reset(self): self.obs_makereal = self.reward_makereal_arr[self.step_st: self.step_st+self.obs_len] self.obs_reward = self.reward_arr[self.step_st: self.step_st+self.obs_len] + self.transaction_dict = {'mkt_pos': self.obs_posi[:, np.newaxis], + 'mkt_pos_var': self.obs_posi_var[:, np.newaxis], + 'entry_cover': self.obs_posi_entry_cover[:, np.newaxis], + 'avg_hold_price': self.obs_price_mean[:, np.newaxis], + 'fluc_reward': self.obs_reward_fluctuant[:, np.newaxis], + 'make_real': self.obs_makereal[:, np.newaxis], + 'reward': self.obs_reward[:, np.newaxis],} + self.transaction_all_dict = {'mkt_pos': self.posi_arr, + 'mkt_pos_var': self.posi_variation_arr, + 'entry_cover': self.posi_entry_cover_arr, + 'avg_hold_price': self.price_mean_arr, + 'fluc_reward': self.reward_fluctuant_arr, + 'make_real': self.reward_makereal_arr, + 'reward': self.reward_arr,} + if self.return_transaction: - self.obs_return = np.concatenate((self.obs_state, - self.obs_posi[:, np.newaxis], - self.obs_posi_var[:, np.newaxis], - self.obs_posi_entry_cover[:, np.newaxis], - self.obs_price[:, np.newaxis], - self.obs_price_mean[:, np.newaxis], - self.obs_reward_fluctuant[:, np.newaxis], - self.obs_makereal[:, np.newaxis], - self.obs_reward[:, np.newaxis]), axis=1) + if isinstance(self.return_transaction, bool): + self.obs_return = np.concatenate((self.obs_state, + self.obs_posi[:, np.newaxis], + self.obs_posi_var[:, np.newaxis], + self.obs_posi_entry_cover[:, np.newaxis], + self.obs_price_mean[:, np.newaxis], + self.obs_reward_fluctuant[:, np.newaxis], + self.obs_makereal[:, np.newaxis], + self.obs_reward[:, np.newaxis]), axis=1) + elif isinstance(self.return_transaction, list): + self.obs_return = np.concatenate((self.obs_state,)+ \ + tuple(self.transaction_dict[need] \ + for need in self.return_transaction), + axis=1) + elif isinstance(self.return_transaction, dict): + self.obs_return = np.concatenate((self.obs_state,)+ \ + tuple(self.return_transaction[need](self.transaction_dict[need]) \ + for need in self.return_transaction), + axis=1) + else: + self.obs_return = self.obs_state else: self.obs_return = self.obs_state @@ -214,6 +239,10 @@ def step(self, action): self.chg_posi_entry_cover[:1] = -2 self.chg_makereal[:1] = 1 self.chg_reward[:] = ((self.chg_price - self.chg_price_mean)*(current_mkt_position) - abs(current_mkt_position)*self.fee)*self.chg_makereal + + transaction_details_index = ['position', 'position_variation', 'entry_cover', + 'price_mean', 'reward_fluctuant', 'reward_makereal', + 'reward'] self.transaction_details = pd.DataFrame([self.posi_arr, self.posi_variation_arr, self.posi_entry_cover_arr, @@ -221,9 +250,7 @@ def step(self, action): self.reward_fluctuant_arr, self.reward_makereal_arr, self.reward_arr], - index=['position', 'position_variation', 'entry_cover', - 'price_mean', 'reward_fluctuant', 'reward_makereal', - 'reward'], + index=transaction_details_index, columns=self.df_sample.index).T self.info = self.df_sample.join(self.transaction_details) @@ -256,15 +283,27 @@ def step(self, action): self.chg_reward_fluctuant[:] = (self.chg_price - self.chg_price_mean)*self.chg_posi - np.abs(self.chg_posi)*self.fee if self.return_transaction: - self.obs_return = np.concatenate((self.obs_state, - self.obs_posi[:, np.newaxis], - self.obs_posi_var[:, np.newaxis], - self.obs_posi_entry_cover[:, np.newaxis], - self.obs_price[:, np.newaxis], - self.obs_price_mean[:, np.newaxis], - self.obs_reward_fluctuant[:, np.newaxis], - self.obs_makereal[:, np.newaxis], - self.obs_reward[:, np.newaxis]), axis=1) + if isinstance(self.return_transaction, bool): + self.obs_return = np.concatenate((self.obs_state, + self.obs_posi[:, np.newaxis], + self.obs_posi_var[:, np.newaxis], + self.obs_posi_entry_cover[:, np.newaxis], + self.obs_price_mean[:, np.newaxis], + self.obs_reward_fluctuant[:, np.newaxis], + self.obs_makereal[:, np.newaxis], + self.obs_reward[:, np.newaxis]), axis=1) + elif isinstance(self.return_transaction, list): + self.obs_return = np.concatenate((self.obs_state,)+ \ + tuple(self.transaction_dict[need] \ + for need in self.return_transaction), + axis=1) + elif isinstance(self.return_transaction, dict): + self.obs_return = np.concatenate((self.obs_state,)+ \ + tuple(self.return_transaction[need](self.transaction_dict[need]) \ + for need in self.return_transaction), + axis=1) + else: + self.obs_return = self.obs_state else: self.obs_return = self.obs_state diff --git a/trading_env/test/__main__.py b/trading_env/test/__main__.py index 251e5f8..3e221d8 100644 --- a/trading_env/test/__main__.py +++ b/trading_env/test/__main__.py @@ -8,12 +8,16 @@ df = pd.read_csv('trading_env/test/data/SGXTWsample.csv', index_col=0, parse_dates=['datetime']) +def test(x): + return x + env = trading_env.make(env_id='training_v1', obs_data_len=256, step_len=128, df=df, fee=0.1, max_position=5, deal_col_name='Price', feature_names=['Price', 'Volume', 'Ask_price','Bid_price', 'Ask_deal_vol','Bid_deal_vol', 'Bid/Ask_deal', 'Updown'], + return_transaction={'mkt_pos': test}, fluc_div=100.0) env.reset() From b4c373962b28a3b96516eff77062e7162dfd4c2b Mon Sep 17 00:00:00 2001 From: Yvictor <410175015@gms.ndhu.edu.tw> Date: Sat, 16 Dec 2017 22:38:07 +0800 Subject: [PATCH 3/7] fixed return transaction features len for build model shape with return_state_len instead feature_len --- trading_env/envs/backtest_v1.py | 11 ++++++++++- trading_env/envs/training_v1.py | 8 ++++++++ trading_env/test/__main__.py | 4 +--- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/trading_env/envs/backtest_v1.py b/trading_env/envs/backtest_v1.py index e2d121b..e334948 100644 --- a/trading_env/envs/backtest_v1.py +++ b/trading_env/envs/backtest_v1.py @@ -42,9 +42,18 @@ def __init__(self, env_id, obs_data_len, step_len, self.action_describe = {0:'do nothing', 1:'long', 2:'short'} - + + + if isinstance(return_transaction, bool) and return_transaction: + transaction_num = 7 + elif not return_transaction: + transaction_num = 0 + else: + transaction_num = len(return_transaction) + self.obs_len = obs_data_len self.feature_len = len(feature_names) + self.return_state_len = len(feature_names) + transaction_num self.observation_space = np.array([self.obs_len*self.feature_len,]) self.using_feature = feature_names self.price_name = deal_col_name diff --git a/trading_env/envs/training_v1.py b/trading_env/envs/training_v1.py index b898849..bb6c4c2 100644 --- a/trading_env/envs/training_v1.py +++ b/trading_env/envs/training_v1.py @@ -41,9 +41,17 @@ def __init__(self, env_id, obs_data_len, step_len, self.action_describe = {0:'do nothing', 1:'long', 2:'short'} + + if isinstance(return_transaction, bool) and return_transaction: + transaction_num = 7 + elif not return_transaction: + transaction_num = 0 + else: + transaction_num = len(return_transaction) self.obs_len = obs_data_len self.feature_len = len(feature_names) + self.return_state_len = len(feature_names) + transaction_num self.observation_space = np.array([self.obs_len*self.feature_len,]) self.using_feature = feature_names self.price_name = deal_col_name diff --git a/trading_env/test/__main__.py b/trading_env/test/__main__.py index 3e221d8..7e7be5f 100644 --- a/trading_env/test/__main__.py +++ b/trading_env/test/__main__.py @@ -8,8 +8,6 @@ df = pd.read_csv('trading_env/test/data/SGXTWsample.csv', index_col=0, parse_dates=['datetime']) -def test(x): - return x env = trading_env.make(env_id='training_v1', obs_data_len=256, step_len=128, df=df, fee=0.1, max_position=5, deal_col_name='Price', @@ -17,7 +15,7 @@ def test(x): 'Ask_price','Bid_price', 'Ask_deal_vol','Bid_deal_vol', 'Bid/Ask_deal', 'Updown'], - return_transaction={'mkt_pos': test}, + return_transaction={'mkt_pos': lambda x:x}, fluc_div=100.0) env.reset() From 381f83801f95bc4ef7af5e6773d1dd585d879f5a Mon Sep 17 00:00:00 2001 From: Yvictor <410175015@gms.ndhu.edu.tw> Date: Wed, 20 Dec 2017 00:31:35 +0800 Subject: [PATCH 4/7] add step with custom done support for set loss max --- trading_env/envs/backtest_v1.py | 4 ++-- trading_env/envs/training_v1.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/trading_env/envs/backtest_v1.py b/trading_env/envs/backtest_v1.py index e334948..14fde5a 100644 --- a/trading_env/envs/backtest_v1.py +++ b/trading_env/envs/backtest_v1.py @@ -210,7 +210,7 @@ def _stayon(self, current_price_mean, current_mkt_position): self.chg_posi[:] = current_mkt_position self.chg_price_mean[:] = current_price_mean - def step(self, action): + def step(self, action, custom_done=False): current_index = self.step_st + self.obs_len -1 current_price_mean = self.price_mean_arr[current_index] current_mkt_position = self.posi_arr[current_index] @@ -240,7 +240,7 @@ def step(self, action): self.chg_reward = self.obs_reward[-self.step_len:] done = False - if self.step_st+self.obs_len+self.step_len >= len(self.price): + if self.step_st+self.obs_len+self.step_len >= len(self.price) or custom_done: done = True action = -1 if current_mkt_position != 0: diff --git a/trading_env/envs/training_v1.py b/trading_env/envs/training_v1.py index bb6c4c2..556db39 100644 --- a/trading_env/envs/training_v1.py +++ b/trading_env/envs/training_v1.py @@ -207,7 +207,7 @@ def _stayon(self, current_price_mean, current_mkt_position): self.chg_posi[:] = current_mkt_position self.chg_price_mean[:] = current_price_mean - def step(self, action): + def step(self, action, custom_done=False): current_index = self.step_st + self.obs_len -1 current_price_mean = self.price_mean_arr[current_index] current_mkt_position = self.posi_arr[current_index] @@ -237,7 +237,7 @@ def step(self, action): self.chg_reward = self.obs_reward[-self.step_len:] done = False - if self.step_st+self.obs_len+self.step_len >= len(self.price): + if self.step_st+self.obs_len+self.step_len >= len(self.price) or custom_done: done = True action = -1 if current_mkt_position != 0: From 94e0a5a2cd6f1d8c22da8f792742e64e08a505d3 Mon Sep 17 00:00:00 2001 From: Yvictor <410175015@gms.ndhu.edu.tw> Date: Thu, 5 Apr 2018 19:30:49 +0800 Subject: [PATCH 5/7] fixed return transaction when call step not updated --- trading_env/envs/backtest_v1.py | 7 +++++++ trading_env/envs/training_v1.py | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/trading_env/envs/backtest_v1.py b/trading_env/envs/backtest_v1.py index 14fde5a..b6117af 100644 --- a/trading_env/envs/backtest_v1.py +++ b/trading_env/envs/backtest_v1.py @@ -292,6 +292,13 @@ def step(self, action, custom_done=False): self.chg_reward_fluctuant[:] = (self.chg_price - self.chg_price_mean)*self.chg_posi - np.abs(self.chg_posi)*self.fee if self.return_transaction: + self.transaction_dict = {'mkt_pos': self.obs_posi[:, np.newaxis], + 'mkt_pos_var': self.obs_posi_var[:, np.newaxis], + 'entry_cover': self.obs_posi_entry_cover[:, np.newaxis], + 'avg_hold_price': self.obs_price_mean[:, np.newaxis], + 'fluc_reward': self.obs_reward_fluctuant[:, np.newaxis], + 'make_real': self.obs_makereal[:, np.newaxis], + 'reward': self.obs_reward[:, np.newaxis],} if isinstance(self.return_transaction, bool): self.obs_return = np.concatenate((self.obs_state, self.obs_posi[:, np.newaxis], diff --git a/trading_env/envs/training_v1.py b/trading_env/envs/training_v1.py index 556db39..d228533 100644 --- a/trading_env/envs/training_v1.py +++ b/trading_env/envs/training_v1.py @@ -291,6 +291,13 @@ def step(self, action, custom_done=False): self.chg_reward_fluctuant[:] = (self.chg_price - self.chg_price_mean)*self.chg_posi - np.abs(self.chg_posi)*self.fee if self.return_transaction: + self.transaction_dict = {'mkt_pos': self.obs_posi[:, np.newaxis], + 'mkt_pos_var': self.obs_posi_var[:, np.newaxis], + 'entry_cover': self.obs_posi_entry_cover[:, np.newaxis], + 'avg_hold_price': self.obs_price_mean[:, np.newaxis], + 'fluc_reward': self.obs_reward_fluctuant[:, np.newaxis], + 'make_real': self.obs_makereal[:, np.newaxis], + 'reward': self.obs_reward[:, np.newaxis],} if isinstance(self.return_transaction, bool): self.obs_return = np.concatenate((self.obs_state, self.obs_posi[:, np.newaxis], From 6d2bd102ee49a72d657bea1e415ee9a4d135a9a5 Mon Sep 17 00:00:00 2001 From: Yvictor <410175015@gms.ndhu.edu.tw> Date: Thu, 5 Apr 2018 19:31:26 +0800 Subject: [PATCH 6/7] v2 init --- trading_env/envs/__init__.py | 4 +- trading_env/envs/backtest_v2.py | 440 +++++++++++++++++++++++++++++++ trading_env/envs/training_v2.py | 445 ++++++++++++++++++++++++++++++++ 3 files changed, 888 insertions(+), 1 deletion(-) create mode 100644 trading_env/envs/backtest_v2.py create mode 100644 trading_env/envs/training_v2.py diff --git a/trading_env/envs/__init__.py b/trading_env/envs/__init__.py index 1a20416..97c0f4a 100644 --- a/trading_env/envs/__init__.py +++ b/trading_env/envs/__init__.py @@ -1,6 +1,8 @@ from . import training_v0 from . import training_v1 +from . import training_v2 from . import backtest_v0 from . import backtest_v1 +from . import backtest_v2 -available_envs_module = [training_v0, training_v1, backtest_v0, backtest_v1] \ No newline at end of file +available_envs_module = [training_v0, training_v1, training_v2, backtest_v0, backtest_v1, backtest_v2] \ No newline at end of file diff --git a/trading_env/envs/backtest_v2.py b/trading_env/envs/backtest_v2.py new file mode 100644 index 0000000..7e4f06e --- /dev/null +++ b/trading_env/envs/backtest_v2.py @@ -0,0 +1,440 @@ + +import os +import logging + +import numpy as np +import pandas as pd +import matplotlib +import matplotlib.pyplot as plt +import matplotlib.patches as patches +from colour import Color + + +class trading_env: + def __init__(self, env_id, obs_data_len, step_len, + df, fee, max_position=5, deal_col_name='price', + feature_names=['price', 'volume'], + return_transaction=False, + action_first=False, + *args, **kwargs): + """ + #assert df + # need deal price as essential and specified the df format + # obs_data_leng -> observation data length + # step_len -> when call step rolling windows will + step_len + # df -> dataframe that contain data for trading(format as...) + # price + # datetime + # serial_number -> serial num of deal at each day recalculating + + # fee -> when each deal will pay the fee, set with your product + # max_position -> the max market position for you trading share + # deal_col_name -> the column name for cucalate reward used. + # feature_names -> list contain the feature columns to use in trading status. + # ?day trade option set as default if don't use this need modify + """ + logging.basicConfig(level=logging.INFO, format='[%(asctime)s] %(message)s') + self.logger = logging.getLogger(env_id) + #self.file_loc_path = os.environ.get('FILEPATH', '') + + self.df = df + self.action_space = 3 + self.action_describe = {0:'do nothing', + 1:'long', + 2:'short'} + + + if isinstance(return_transaction, bool) and return_transaction: + transaction_num = 7 + elif not return_transaction: + transaction_num = 0 + else: + transaction_num = len(return_transaction) + + self.obs_len = obs_data_len + self.feature_len = len(feature_names) + self.return_state_len = len(feature_names) + transaction_num + self.observation_space = np.array([self.obs_len*self.feature_len,]) + self.using_feature = feature_names + self.price_name = deal_col_name + + self.step_len = step_len + self.fee = fee + self.max_position = max_position + + self.return_transaction = return_transaction + self.action_first = action_first + + self.begin_fs = self.df[self.df['serial_number']==0] + self.date_leng = len(self.begin_fs) + + self.date_record = 0 + self.backtest_done = False + + self.render_on = 0 + self.buy_color, self.sell_color = (1, 2) + self.new_rotation, self.cover_rotation = (1, 2) + self.transaction_details = pd.DataFrame() + self.logger.info('Making new env: {}'.format(env_id)) + + def _choice_section(self): + assert self.date_record < self.date_leng, 'Backtest Done.' + section_int = self.date_record + if section_int == self.date_leng - 1: + begin_point = self.begin_fs.index[section_int] + end_point = None + else: + begin_point, end_point = self.begin_fs.index[section_int: section_int+2] + df_section = self.df.iloc[begin_point: end_point] + self.date_record += 1 + if self.date_record >= self.date_leng: + self.backtest_done = True + return df_section + + def reset(self): + self.render_on = 0 + self.df_sample = self._choice_section() + self.step_st = 0 + # define the price to calculate the reward + self.price = self.df_sample[self.price_name].as_matrix() + # define the observation feature + self.obs_features = self.df_sample[self.using_feature].as_matrix() + #maybe make market position feature in final feature, set as option + self.posi_arr = np.zeros_like(self.price) + # position variation + self.posi_variation_arr = np.zeros_like(self.posi_arr) + # position entry or cover :new_entry->1 increase->2 cover->-1 decrease->-2 + self.posi_entry_cover_arr = np.zeros_like(self.posi_arr) + # self.position_feature = np.array(self.posi_l[self.step_st:self.step_st+self.obs_len])/(self.max_position*2)+0.5 + + self.price_mean_arr = self.price.copy() + self.reward_fluctuant_arr = (self.price - self.price_mean_arr)*self.posi_arr + self.reward_makereal_arr = self.posi_arr.copy() + self.reward_arr = self.reward_fluctuant_arr*self.reward_makereal_arr + + self.info = None + self.transaction_details = pd.DataFrame() + + # observation part + self.obs_state = self.obs_features[self.step_st: self.step_st+self.obs_len] + self.obs_posi = self.posi_arr[self.step_st: self.step_st+self.obs_len] + self.obs_posi_var = self.posi_variation_arr[self.step_st: self.step_st+self.obs_len] + self.obs_posi_entry_cover = self.posi_entry_cover_arr[self.step_st: self.step_st+self.obs_len] + self.obs_price = self.price[self.step_st: self.step_st+self.obs_len] + self.obs_price_mean = self.price_mean_arr[self.step_st: self.step_st+self.obs_len] + self.obs_reward_fluctuant = self.reward_fluctuant_arr[self.step_st: self.step_st+self.obs_len] + self.obs_makereal = self.reward_makereal_arr[self.step_st: self.step_st+self.obs_len] + self.obs_reward = self.reward_arr[self.step_st: self.step_st+self.obs_len] + + self.transaction_dict = {'mkt_pos': self.obs_posi[:, np.newaxis], + 'mkt_pos_var': self.obs_posi_var[:, np.newaxis], + 'entry_cover': self.obs_posi_entry_cover[:, np.newaxis], + 'avg_hold_price': self.obs_price_mean[:, np.newaxis], + 'fluc_reward': self.obs_reward_fluctuant[:, np.newaxis], + 'make_real': self.obs_makereal[:, np.newaxis], + 'reward': self.obs_reward[:, np.newaxis],} + + if self.return_transaction: + if isinstance(self.return_transaction, bool): + self.obs_return = np.concatenate((self.obs_state, + self.obs_posi[:, np.newaxis], + self.obs_posi_var[:, np.newaxis], + self.obs_posi_entry_cover[:, np.newaxis], + self.obs_price_mean[:, np.newaxis], + self.obs_reward_fluctuant[:, np.newaxis], + self.obs_makereal[:, np.newaxis], + self.obs_reward[:, np.newaxis]), axis=1) + elif isinstance(self.return_transaction, list): + self.obs_return = np.concatenate((self.obs_state,)+ \ + tuple(self.transaction_dict[need] \ + for need in self.return_transaction), + axis=1) + elif isinstance(self.return_transaction, dict): + self.obs_return = np.concatenate((self.obs_state,)+ \ + tuple(self.return_transaction[need](self.transaction_dict[need]) \ + for need in self.return_transaction), + axis=1) + else: + self.obs_return = self.obs_state + + self.t_index = 0 + return self.obs_return + + + def _long(self, open_posi, enter_price, current_mkt_position, current_price_mean): + if open_posi: + self.chg_price_mean[:] = enter_price + self.chg_posi[:] = 1 + self.chg_posi_var[:1] = 1 + self.chg_posi_entry_cover[:1] = 1 + else: + after_act_mkt_position = current_mkt_position + 1 + self.chg_price_mean[:] = (current_price_mean*current_mkt_position + \ + enter_price)/after_act_mkt_position + self.chg_posi[:] = after_act_mkt_position + self.chg_posi_var[:1] = 1 + self.chg_posi_entry_cover[:1] = 2 + + def _short(self, open_posi, enter_price, current_mkt_position, current_price_mean): + if open_posi: + self.chg_price_mean[:] = enter_price + self.chg_posi[:] = -1 + self.chg_posi_var[:1] = -1 + self.chg_posi_entry_cover[:1] = 1 + else: + after_act_mkt_position = current_mkt_position - 1 + self.chg_price_mean[:] = (current_price_mean*abs(current_mkt_position) + \ + enter_price)/abs(after_act_mkt_position) + self.chg_posi[:] = after_act_mkt_position + self.chg_posi_var[:1] = -1 + self.chg_posi_entry_cover[:1] = 2 + + def _short_cover(self, current_price_mean, current_mkt_position): + self.chg_price_mean[:] = current_price_mean + self.chg_posi[:] = current_mkt_position + 1 + self.chg_makereal[:1] = 1 + self.chg_reward[:] = ((self.chg_price - self.chg_price_mean)*(-1) - self.fee)*self.chg_makereal + self.chg_posi_var[:1] = 1 + self.chg_posi_entry_cover[:1] = -1 + + def _long_cover(self, current_price_mean, current_mkt_position): + self.chg_price_mean[:] = current_price_mean + self.chg_posi[:] = current_mkt_position - 1 + self.chg_makereal[:1] = 1 + self.chg_reward[:] = ((self.chg_price - self.chg_price_mean)*(1) - self.fee)*self.chg_makereal + self.chg_posi_var[:1] = -1 + self.chg_posi_entry_cover[:1] = -1 + + def _stayon(self, current_price_mean, current_mkt_position): + self.chg_posi[:] = current_mkt_position + self.chg_price_mean[:] = current_price_mean + + def step(self, action, custom_done=False): + current_index = self.step_st + self.obs_len - (1 + self.action_first) + current_price_mean = self.price_mean_arr[current_index] + current_mkt_position = self.posi_arr[current_index] + + if not self.action_first: + self.t_index += 1 + self.step_st += self.step_len + # observation part + self.obs_state = self.obs_features[self.step_st: self.step_st+self.obs_len] + self.obs_posi = self.posi_arr[self.step_st: self.step_st+self.obs_len] + # position variation + self.obs_posi_var = self.posi_variation_arr[self.step_st: self.step_st+self.obs_len] + # position entry or cover :new_entry->1 increase->2 cover->-1 decrease->-2 + self.obs_posi_entry_cover = self.posi_entry_cover_arr[self.step_st: self.step_st+self.obs_len] + self.obs_price = self.price[self.step_st: self.step_st+self.obs_len] + self.obs_price_mean = self.price_mean_arr[self.step_st: self.step_st+self.obs_len] + self.obs_reward_fluctuant = self.reward_fluctuant_arr[self.step_st: self.step_st+self.obs_len] + self.obs_makereal = self.reward_makereal_arr[self.step_st: self.step_st+self.obs_len] + self.obs_reward = self.reward_arr[self.step_st: self.step_st+self.obs_len] + # change part + self.chg_posi = self.obs_posi[-self.step_len:] + self.chg_posi_var = self.obs_posi_var[-self.step_len:] + self.chg_posi_entry_cover = self.obs_posi_entry_cover[-self.step_len:] + self.chg_price = self.obs_price[-self.step_len:] + self.chg_price_mean = self.obs_price_mean[-self.step_len:] + self.chg_reward_fluctuant = self.obs_reward_fluctuant[-self.step_len:] + self.chg_makereal = self.obs_makereal[-self.step_len:] + self.chg_reward = self.obs_reward[-self.step_len:] + + done = False + if self.step_st+self.obs_len+self.step_len >= len(self.price) or custom_done: + done = True + action = -1 + if current_mkt_position != 0: + self.chg_price_mean[:] = current_price_mean + self.chg_posi[:] = 0 + self.chg_posi_var[:1] = -current_mkt_position + self.chg_posi_entry_cover[:1] = -2 + self.chg_makereal[:1] = 1 + self.chg_reward[:] = ((self.chg_price - self.chg_price_mean)*(current_mkt_position) - abs(current_mkt_position)*self.fee)*self.chg_makereal + self.transaction_details = pd.DataFrame([self.posi_arr, + self.posi_variation_arr, + self.posi_entry_cover_arr, + self.price_mean_arr, + self.reward_fluctuant_arr, + self.reward_makereal_arr, + self.reward_arr], + index=['position', 'position_variation', 'entry_cover', + 'price_mean', 'reward_fluctuant', 'reward_makereal', + 'reward'], + columns=self.df_sample.index).T + self.info = self.df_sample.join(self.transaction_details) + + + # use next tick, maybe choice avg in first 10 tick will be better to real backtest + enter_price = self.chg_price[0] + if action == 1 and self.max_position > current_mkt_position >= 0: + open_posi = (current_mkt_position == 0) + self._long(open_posi, enter_price, current_mkt_position, current_price_mean) + + elif action == 2 and -self.max_position < current_mkt_position <= 0: + open_posi = (current_mkt_position == 0) + self._short(open_posi, enter_price, current_mkt_position, current_price_mean) + + elif action == 1 and current_mkt_position<0: + self._short_cover(current_price_mean, current_mkt_position) + + elif action == 2 and current_mkt_position>0: + self._long_cover(current_price_mean, current_mkt_position) + + elif action == 1 and current_mkt_position==self.max_position: + action = 0 + elif action == 2 and current_mkt_position==-self.max_position: + action = 0 + + if action == 0: + if current_mkt_position != 0: + self._stayon(current_price_mean, current_mkt_position) + + self.chg_reward_fluctuant[:] = (self.chg_price - self.chg_price_mean)*self.chg_posi - np.abs(self.chg_posi)*self.fee + + if self.return_transaction: + self.transaction_dict = {'mkt_pos': self.obs_posi[:, np.newaxis], + 'mkt_pos_var': self.obs_posi_var[:, np.newaxis], + 'entry_cover': self.obs_posi_entry_cover[:, np.newaxis], + 'avg_hold_price': self.obs_price_mean[:, np.newaxis], + 'fluc_reward': self.obs_reward_fluctuant[:, np.newaxis], + 'make_real': self.obs_makereal[:, np.newaxis], + 'reward': self.obs_reward[:, np.newaxis],} + if isinstance(self.return_transaction, bool): + self.obs_return = np.concatenate((self.obs_state, + self.obs_posi[:, np.newaxis], + self.obs_posi_var[:, np.newaxis], + self.obs_posi_entry_cover[:, np.newaxis], + self.obs_price_mean[:, np.newaxis], + self.obs_reward_fluctuant[:, np.newaxis], + self.obs_makereal[:, np.newaxis], + self.obs_reward[:, np.newaxis]), axis=1) + elif isinstance(self.return_transaction, list): + self.obs_return = np.concatenate((self.obs_state,)+ \ + tuple(self.transaction_dict[need] \ + for need in self.return_transaction), + axis=1) + elif isinstance(self.return_transaction, dict): + self.obs_return = np.concatenate((self.obs_state,)+ \ + tuple(self.return_transaction[need](self.transaction_dict[need]) \ + for need in self.return_transaction), + axis=1) + else: + self.obs_return = self.obs_state + + if self.action_first: + self.t_index += 1 + self.step_st += self.step_len + + return self.obs_return, self.obs_reward.sum(), done, self.info + + def _gen_trade_color(self, ind, long_entry=(1, 0, 0, 0.5), long_cover=(1, 1, 1, 0.5), + short_entry=(0, 1, 0, 0.5), short_cover=(1, 1, 1, 0.5)): + if self.posi_variation_arr[ind]>0 and self.posi_entry_cover_arr[ind]>0: + return long_entry + elif self.posi_variation_arr[ind]>0 and self.posi_entry_cover_arr[ind]<0: + return long_cover + elif self.posi_variation_arr[ind]<0 and self.posi_entry_cover_arr[ind]>0: + return short_entry + elif self.posi_variation_arr[ind]<0 and self.posi_entry_cover_arr[ind]<0: + return short_cover + + def _plot_trading(self): + price_x = list(range(len(self.price[:self.step_st+self.obs_len]))) + self.price_plot = self.ax.plot(price_x, self.price[:self.step_st+self.obs_len], c=(0, 0.68, 0.95, 0.9),zorder=1) + # maybe seperate up down color + #self.price_plot = self.ax.plot(price_x, self.price[:self.step_st+self.obs_len], c=(0, 0.75, 0.95, 0.9),zorder=1) + self.features_plot = [self.ax3.plot(price_x, self.obs_features[:self.step_st+self.obs_len, i], + c=self.features_color[i])[0] for i in range(self.feature_len)] + rect_high = self.obs_price.max() - self.obs_price.min() + self.target_box = self.ax.add_patch( + patches.Rectangle( + (self.step_st, self.obs_price.min()), self.obs_len, rect_high, + label='observation',edgecolor=(0.9, 1, 0.2, 0.8),facecolor=(0.95,1,0.1,0.3), + linestyle='-',linewidth=1.5, + fill=True) + ) # remove background) + self.fluc_reward_plot_p = self.ax2.fill_between(price_x, 0, self.reward_fluctuant_arr[:self.step_st+self.obs_len], + where=self.reward_fluctuant_arr[:self.step_st+self.obs_len]>=0, + facecolor=(1, 0.8, 0, 0.2), edgecolor=(1, 0.8, 0, 0.9), linewidth=0.8) + self.fluc_reward_plot_n = self.ax2.fill_between(price_x, 0, self.reward_fluctuant_arr[:self.step_st+self.obs_len], + where=self.reward_fluctuant_arr[:self.step_st+self.obs_len]<=0, + facecolor=(0, 1, 0.8, 0.2), edgecolor=(0, 1, 0.8, 0.9), linewidth=0.8) + self.posi_plot_long = self.ax2.fill_between(price_x, 0, self.posi_arr[:self.step_st+self.obs_len], + where=self.posi_arr[:self.step_st+self.obs_len]>=0, + facecolor=(1, 0.5, 0, 0.2), edgecolor=(1, 0.5, 0, 0.9), linewidth=1) + self.posi_plot_short = self.ax2.fill_between(price_x, 0, self.posi_arr[:self.step_st+self.obs_len], + where=self.posi_arr[:self.step_st+self.obs_len]<=0, + facecolor=(0, 0.5, 1, 0.2), edgecolor=(0, 0.5, 1, 0.9), linewidth=1) + self.reward_plot_p = self.ax2.fill_between(price_x, 0, + self.reward_arr[:self.step_st+self.obs_len].cumsum(), + where=self.reward_arr[:self.step_st+self.obs_len].cumsum()>=0, + facecolor=(1, 0, 0, 0.2), edgecolor=(1, 0, 0, 0.9), linewidth=1) + self.reward_plot_n = self.ax2.fill_between(price_x, 0, + self.reward_arr[:self.step_st+self.obs_len].cumsum(), + where=self.reward_arr[:self.step_st+self.obs_len].cumsum()<=0, + facecolor=(0, 1, 0, 0.2), edgecolor=(0, 1, 0, 0.9), linewidth=1) + + trade_x = self.posi_variation_arr.nonzero()[0] + trade_x_buy = [i for i in trade_x if self.posi_variation_arr[i]>0] + trade_x_sell = [i for i in trade_x if self.posi_variation_arr[i]<0] + trade_y_buy = [self.price[i] for i in trade_x_buy] + trade_y_sell = [self.price[i] for i in trade_x_sell] + trade_color_buy = [self._gen_trade_color(i) for i in trade_x_buy] + trade_color_sell = [self._gen_trade_color(i) for i in trade_x_sell] + self.trade_plot_buy = self.ax.scatter(x=trade_x_buy, y=trade_y_buy, s=100, marker='^', + c=trade_color_buy, edgecolors=(1,0,0,0.9), zorder=2) + self.trade_plot_sell = self.ax.scatter(x=trade_x_sell, y=trade_y_sell, s=100, marker='v', + c=trade_color_sell, edgecolors=(0,1,0,0.9), zorder=2) + + + def render(self, save=False, show=True): + if self.render_on == 0: + matplotlib.style.use('dark_background') + self.render_on = 1 + + left, width = 0.1, 0.8 + rect1 = [left, 0.4, width, 0.55] + rect2 = [left, 0.2, width, 0.2] + rect3 = [left, 0.05, width, 0.15] + + self.fig = plt.figure(figsize=(15,8)) + self.fig.suptitle('%s'%self.df_sample['datetime'].iloc[0].date(), fontsize=14, fontweight='bold') + #self.ax = self.fig.add_subplot(1,1,1) + self.ax = self.fig.add_axes(rect1) # left, bottom, width, height + self.ax2 = self.fig.add_axes(rect2, sharex=self.ax) + self.ax3 = self.fig.add_axes(rect3, sharex=self.ax) + self.ax.grid(color='gray', linestyle='-', linewidth=0.5) + self.ax2.grid(color='gray', linestyle='-', linewidth=0.5) + self.ax3.grid(color='gray', linestyle='-', linewidth=0.5) + self.features_color = [c.rgb+(0.9,) for c in Color('yellow').range_to(Color('cyan'), self.feature_len)] + #fig, ax = plt.subplots() + self._plot_trading() + + self.ax.set_xlim(0,len(self.price[:self.step_st+self.obs_len])+200) + if show: + plt.ion() + #self.fig.tight_layout() + plt.show() + if save: + self.fig.savefig('fig/%s.png' % str(self.t_index)) + + elif self.render_on == 1: + self.ax.lines.remove(self.price_plot[0]) + [self.ax3.lines.remove(plot) for plot in self.features_plot] + self.fluc_reward_plot_p.remove() + self.fluc_reward_plot_n.remove() + self.target_box.remove() + self.reward_plot_p.remove() + self.reward_plot_n.remove() + self.posi_plot_long.remove() + self.posi_plot_short.remove() + self.trade_plot_buy.remove() + self.trade_plot_sell.remove() + + self._plot_trading() + + self.ax.set_xlim(0,len(self.price[:self.step_st+self.obs_len])+200) + if save: + self.fig.savefig('fig/%s.png' % str(self.t_index)) + if show: + plt.pause(0.0001) \ No newline at end of file diff --git a/trading_env/envs/training_v2.py b/trading_env/envs/training_v2.py new file mode 100644 index 0000000..997cc4f --- /dev/null +++ b/trading_env/envs/training_v2.py @@ -0,0 +1,445 @@ +import os +import logging + +import numpy as np +import pandas as pd +import matplotlib +import matplotlib.pyplot as plt +import matplotlib.patches as patches +from colour import Color + +class trading_env: + def __init__(self, env_id, obs_data_len, step_len, + df, fee, max_position=5, deal_col_name='price', + feature_names=['price', 'volume'], + return_transaction=False, + action_first=False, + *args, **kwargs): + """ + #assert df + # need deal price as essential and specified the df format + # obs_data_leng -> observation data length + # step_len -> when call step rolling windows will + step_len + # df -> dataframe that contain data for trading(format as...) + # price + # datetime + # serial_number -> serial num of deal at each day recalculating + + # fee -> when each deal will pay the fee, set with your product + # max_position -> the max market position for you trading share + # deal_col_name -> the column name for cucalate reward used. + # feature_names -> list contain the feature columns to use in trading status. + # ?day trade option set as default if don't use this need modify + """ + logging.basicConfig(level=logging.INFO, format='[%(asctime)s] %(message)s') + self.logger = logging.getLogger(env_id) + #self.file_loc_path = os.environ.get('FILEPATH', '') + + self.df = df + self.action_space = 3 + self.action_describe = {0:'do nothing', + 1:'long', + 2:'short'} + + if isinstance(return_transaction, bool) and return_transaction: + transaction_num = 7 + elif not return_transaction: + transaction_num = 0 + else: + transaction_num = len(return_transaction) + + self.obs_len = obs_data_len + self.feature_len = len(feature_names) + self.return_state_len = len(feature_names) + transaction_num + self.observation_space = np.array([self.obs_len*self.feature_len,]) + self.using_feature = feature_names + self.price_name = deal_col_name + + self.step_len = step_len + self.fee = fee + self.max_position = max_position + + #self.gameover = gameover_limit + self.return_transaction = return_transaction + self.action_first = action_first + + self.begin_fs = self.df[self.df['serial_number']==0] + self.date_leng = len(self.begin_fs) + + self.render_on = 0 + + self.transaction_details = pd.DataFrame() + self.logger.info('Making new env: {}'.format(env_id)) + + def _random_choice_section(self): + random_int = np.random.randint(self.date_leng) + if random_int == self.date_leng - 1: + begin_point = self.begin_fs.index[random_int] + end_point = None + else: + begin_point, end_point = self.begin_fs.index[random_int: random_int+2] + df_section = self.df.iloc[begin_point: end_point] + return df_section + + def reset(self): + self.df_sample = self._random_choice_section() + self.step_st = 0 + # define the price to calculate the reward + self.price = self.df_sample[self.price_name].as_matrix() + # define the observation feature + self.obs_features = self.df_sample[self.using_feature].as_matrix() + #maybe make market position feature in final feature, set as option + self.posi_arr = np.zeros_like(self.price) + # position variation + self.posi_variation_arr = np.zeros_like(self.posi_arr) + # position entry or cover :new_entry->1 increase->2 cover->-1 decrease->-2 + self.posi_entry_cover_arr = np.zeros_like(self.posi_arr) + # self.position_feature = np.array(self.posi_l[self.step_st:self.step_st+self.obs_len])/(self.max_position*2)+0.5 + + self.price_mean_arr = self.price.copy() + self.reward_fluctuant_arr = (self.price - self.price_mean_arr)*self.posi_arr + self.reward_makereal_arr = self.posi_arr.copy() + self.reward_arr = self.reward_fluctuant_arr*self.reward_makereal_arr + + self.info = None + self.transaction_details = pd.DataFrame() + + # observation part + self.obs_state = self.obs_features[self.step_st: self.step_st+self.obs_len] + self.obs_posi = self.posi_arr[self.step_st: self.step_st+self.obs_len] + self.obs_posi_var = self.posi_variation_arr[self.step_st: self.step_st+self.obs_len] + self.obs_posi_entry_cover = self.posi_entry_cover_arr[self.step_st: self.step_st+self.obs_len] + self.obs_price = self.price[self.step_st: self.step_st+self.obs_len] + self.obs_price_mean = self.price_mean_arr[self.step_st: self.step_st+self.obs_len] + self.obs_reward_fluctuant = self.reward_fluctuant_arr[self.step_st: self.step_st+self.obs_len] + self.obs_makereal = self.reward_makereal_arr[self.step_st: self.step_st+self.obs_len] + self.obs_reward = self.reward_arr[self.step_st: self.step_st+self.obs_len] + + self.transaction_dict = {'mkt_pos': self.obs_posi[:, np.newaxis], + 'mkt_pos_var': self.obs_posi_var[:, np.newaxis], + 'entry_cover': self.obs_posi_entry_cover[:, np.newaxis], + 'avg_hold_price': self.obs_price_mean[:, np.newaxis], + 'fluc_reward': self.obs_reward_fluctuant[:, np.newaxis], + 'make_real': self.obs_makereal[:, np.newaxis], + 'reward': self.obs_reward[:, np.newaxis],} + self.transaction_all_dict = {'mkt_pos': self.posi_arr, + 'mkt_pos_var': self.posi_variation_arr, + 'entry_cover': self.posi_entry_cover_arr, + 'avg_hold_price': self.price_mean_arr, + 'fluc_reward': self.reward_fluctuant_arr, + 'make_real': self.reward_makereal_arr, + 'reward': self.reward_arr,} + + if self.return_transaction: + if isinstance(self.return_transaction, bool): + self.obs_return = np.concatenate((self.obs_state, + self.obs_posi[:, np.newaxis], + self.obs_posi_var[:, np.newaxis], + self.obs_posi_entry_cover[:, np.newaxis], + self.obs_price_mean[:, np.newaxis], + self.obs_reward_fluctuant[:, np.newaxis], + self.obs_makereal[:, np.newaxis], + self.obs_reward[:, np.newaxis]), axis=1) + elif isinstance(self.return_transaction, list): + self.obs_return = np.concatenate((self.obs_state,)+ \ + tuple(self.transaction_dict[need] \ + for need in self.return_transaction), + axis=1) + elif isinstance(self.return_transaction, dict): + self.obs_return = np.concatenate((self.obs_state,)+ \ + tuple(self.return_transaction[need](self.transaction_dict[need]) \ + for need in self.return_transaction), + axis=1) + else: + self.obs_return = self.obs_state + else: + self.obs_return = self.obs_state + + self.t_index = 0 + return self.obs_return + + + def _long(self, open_posi, enter_price, current_mkt_position, current_price_mean): + if open_posi: + self.chg_price_mean[:] = enter_price + self.chg_posi[:] = 1 + self.chg_posi_var[:1] = 1 + self.chg_posi_entry_cover[:1] = 1 + else: + after_act_mkt_position = current_mkt_position + 1 + self.chg_price_mean[:] = (current_price_mean*current_mkt_position + \ + enter_price)/after_act_mkt_position + self.chg_posi[:] = after_act_mkt_position + self.chg_posi_var[:1] = 1 + self.chg_posi_entry_cover[:1] = 2 + + def _short(self, open_posi, enter_price, current_mkt_position, current_price_mean): + if open_posi: + self.chg_price_mean[:] = enter_price + self.chg_posi[:] = -1 + self.chg_posi_var[:1] = -1 + self.chg_posi_entry_cover[:1] = 1 + else: + after_act_mkt_position = current_mkt_position - 1 + self.chg_price_mean[:] = (current_price_mean*abs(current_mkt_position) + \ + enter_price)/abs(after_act_mkt_position) + self.chg_posi[:] = after_act_mkt_position + self.chg_posi_var[:1] = -1 + self.chg_posi_entry_cover[:1] = 2 + + def _short_cover(self, current_price_mean, current_mkt_position): + self.chg_price_mean[:] = current_price_mean + self.chg_posi[:] = current_mkt_position + 1 + self.chg_makereal[:1] = 1 + self.chg_reward[:] = ((self.chg_price - self.chg_price_mean)*(-1) - self.fee)*self.chg_makereal + self.chg_posi_var[:1] = 1 + self.chg_posi_entry_cover[:1] = -1 + + def _long_cover(self, current_price_mean, current_mkt_position): + self.chg_price_mean[:] = current_price_mean + self.chg_posi[:] = current_mkt_position - 1 + self.chg_makereal[:1] = 1 + self.chg_reward[:] = ((self.chg_price - self.chg_price_mean)*(1) - self.fee)*self.chg_makereal + self.chg_posi_var[:1] = -1 + self.chg_posi_entry_cover[:1] = -1 + + def _stayon(self, current_price_mean, current_mkt_position): + self.chg_posi[:] = current_mkt_position + self.chg_price_mean[:] = current_price_mean + + def step(self, action, custom_done=False): + current_index = self.step_st + self.obs_len - (1 + self.action_first) + current_price_mean = self.price_mean_arr[current_index] + current_mkt_position = self.posi_arr[current_index] + + if not self.action_first: + self.t_index += 1 + self.step_st += self.step_len + + # observation part + self.obs_state = self.obs_features[self.step_st: self.step_st+self.obs_len] + self.obs_posi = self.posi_arr[self.step_st: self.step_st+self.obs_len] + # position variation + self.obs_posi_var = self.posi_variation_arr[self.step_st: self.step_st+self.obs_len] + # position entry or cover :new_entry->1 increase->2 cover->-1 decrease->-2 + self.obs_posi_entry_cover = self.posi_entry_cover_arr[self.step_st: self.step_st+self.obs_len] + self.obs_price = self.price[self.step_st: self.step_st+self.obs_len] + self.obs_price_mean = self.price_mean_arr[self.step_st: self.step_st+self.obs_len] + self.obs_reward_fluctuant = self.reward_fluctuant_arr[self.step_st: self.step_st+self.obs_len] + self.obs_makereal = self.reward_makereal_arr[self.step_st: self.step_st+self.obs_len] + self.obs_reward = self.reward_arr[self.step_st: self.step_st+self.obs_len] + # change part + self.chg_posi = self.obs_posi[-self.step_len:] + self.chg_posi_var = self.obs_posi_var[-self.step_len:] + self.chg_posi_entry_cover = self.obs_posi_entry_cover[-self.step_len:] + self.chg_price = self.obs_price[-self.step_len:] + self.chg_price_mean = self.obs_price_mean[-self.step_len:] + self.chg_reward_fluctuant = self.obs_reward_fluctuant[-self.step_len:] + self.chg_makereal = self.obs_makereal[-self.step_len:] + self.chg_reward = self.obs_reward[-self.step_len:] + + done = False + if self.step_st+self.obs_len+self.step_len >= len(self.price) or custom_done: + done = True + action = -1 + if current_mkt_position != 0: + self.chg_price_mean[:] = current_price_mean + self.chg_posi[:] = 0 + self.chg_posi_var[:1] = -current_mkt_position + self.chg_posi_entry_cover[:1] = -2 + self.chg_makereal[:1] = 1 + self.chg_reward[:] = ((self.chg_price - self.chg_price_mean)*(current_mkt_position) - abs(current_mkt_position)*self.fee)*self.chg_makereal + + transaction_details_index = ['position', 'position_variation', 'entry_cover', + 'price_mean', 'reward_fluctuant', 'reward_makereal', + 'reward'] + self.transaction_details = pd.DataFrame([self.posi_arr, + self.posi_variation_arr, + self.posi_entry_cover_arr, + self.price_mean_arr, + self.reward_fluctuant_arr, + self.reward_makereal_arr, + self.reward_arr], + index=transaction_details_index, + columns=self.df_sample.index).T + self.info = self.df_sample.join(self.transaction_details) + + + # use next tick, maybe choice avg in first 10 tick will be better to real backtest + enter_price = self.chg_price[0] + if action == 1 and self.max_position > current_mkt_position >= 0: + open_posi = (current_mkt_position == 0) + self._long(open_posi, enter_price, current_mkt_position, current_price_mean) + + elif action == 2 and -self.max_position < current_mkt_position <= 0: + open_posi = (current_mkt_position == 0) + self._short(open_posi, enter_price, current_mkt_position, current_price_mean) + + elif action == 1 and current_mkt_position<0: + self._short_cover(current_price_mean, current_mkt_position) + + elif action == 2 and current_mkt_position>0: + self._long_cover(current_price_mean, current_mkt_position) + + elif action == 1 and current_mkt_position==self.max_position: + action = 0 + elif action == 2 and current_mkt_position==-self.max_position: + action = 0 + + if action == 0: + if current_mkt_position != 0: + self._stayon(current_price_mean, current_mkt_position) + + self.chg_reward_fluctuant[:] = (self.chg_price - self.chg_price_mean)*self.chg_posi - np.abs(self.chg_posi)*self.fee + + if self.return_transaction: + self.transaction_dict = {'mkt_pos': self.obs_posi[:, np.newaxis], + 'mkt_pos_var': self.obs_posi_var[:, np.newaxis], + 'entry_cover': self.obs_posi_entry_cover[:, np.newaxis], + 'avg_hold_price': self.obs_price_mean[:, np.newaxis], + 'fluc_reward': self.obs_reward_fluctuant[:, np.newaxis], + 'make_real': self.obs_makereal[:, np.newaxis], + 'reward': self.obs_reward[:, np.newaxis],} + if isinstance(self.return_transaction, bool): + self.obs_return = np.concatenate((self.obs_state, + self.obs_posi[:, np.newaxis], + self.obs_posi_var[:, np.newaxis], + self.obs_posi_entry_cover[:, np.newaxis], + self.obs_price_mean[:, np.newaxis], + self.obs_reward_fluctuant[:, np.newaxis], + self.obs_makereal[:, np.newaxis], + self.obs_reward[:, np.newaxis]), axis=1) + elif isinstance(self.return_transaction, list): + self.obs_return = np.concatenate((self.obs_state,)+ \ + tuple(self.transaction_dict[need] \ + for need in self.return_transaction), + axis=1) + elif isinstance(self.return_transaction, dict): + self.obs_return = np.concatenate((self.obs_state,)+ \ + tuple(self.return_transaction[need](self.transaction_dict[need]) \ + for need in self.return_transaction), + axis=1) + else: + self.obs_return = self.obs_state + else: + self.obs_return = self.obs_state + + if self.action_first: + self.t_index += 1 + self.step_st += self.step_len + + return self.obs_return, self.obs_reward.sum(), done, self.info + + def _gen_trade_color(self, ind, long_entry=(1, 0, 0, 0.5), long_cover=(1, 1, 1, 0.5), + short_entry=(0, 1, 0, 0.5), short_cover=(1, 1, 1, 0.5)): + if self.posi_variation_arr[ind]>0 and self.posi_entry_cover_arr[ind]>0: + return long_entry + elif self.posi_variation_arr[ind]>0 and self.posi_entry_cover_arr[ind]<0: + return long_cover + elif self.posi_variation_arr[ind]<0 and self.posi_entry_cover_arr[ind]>0: + return short_entry + elif self.posi_variation_arr[ind]<0 and self.posi_entry_cover_arr[ind]<0: + return short_cover + + def _plot_trading(self): + price_x = list(range(len(self.price[:self.step_st+self.obs_len]))) + self.price_plot = self.ax.plot(price_x, self.price[:self.step_st+self.obs_len], c=(0, 0.68, 0.95, 0.9),zorder=1) + # maybe seperate up down color + #self.price_plot = self.ax.plot(price_x, self.price[:self.step_st+self.obs_len], c=(0, 0.75, 0.95, 0.9),zorder=1) + self.features_plot = [self.ax3.plot(price_x, self.obs_features[:self.step_st+self.obs_len, i], + c=self.features_color[i])[0] for i in range(self.feature_len)] + rect_high = self.obs_price.max() - self.obs_price.min() + self.target_box = self.ax.add_patch( + patches.Rectangle( + (self.step_st, self.obs_price.min()), self.obs_len, rect_high, + label='observation',edgecolor=(0.9, 1, 0.2, 0.8),facecolor=(0.95,1,0.1,0.3), + linestyle='-',linewidth=1.5, + fill=True) + ) # remove background) + self.fluc_reward_plot_p = self.ax2.fill_between(price_x, 0, self.reward_fluctuant_arr[:self.step_st+self.obs_len], + where=self.reward_fluctuant_arr[:self.step_st+self.obs_len]>=0, + facecolor=(1, 0.8, 0, 0.2), edgecolor=(1, 0.8, 0, 0.9), linewidth=0.8) + self.fluc_reward_plot_n = self.ax2.fill_between(price_x, 0, self.reward_fluctuant_arr[:self.step_st+self.obs_len], + where=self.reward_fluctuant_arr[:self.step_st+self.obs_len]<=0, + facecolor=(0, 1, 0.8, 0.2), edgecolor=(0, 1, 0.8, 0.9), linewidth=0.8) + self.posi_plot_long = self.ax2.fill_between(price_x, 0, self.posi_arr[:self.step_st+self.obs_len], + where=self.posi_arr[:self.step_st+self.obs_len]>=0, + facecolor=(1, 0.5, 0, 0.2), edgecolor=(1, 0.5, 0, 0.9), linewidth=1) + self.posi_plot_short = self.ax2.fill_between(price_x, 0, self.posi_arr[:self.step_st+self.obs_len], + where=self.posi_arr[:self.step_st+self.obs_len]<=0, + facecolor=(0, 0.5, 1, 0.2), edgecolor=(0, 0.5, 1, 0.9), linewidth=1) + self.reward_plot_p = self.ax2.fill_between(price_x, 0, + self.reward_arr[:self.step_st+self.obs_len].cumsum(), + where=self.reward_arr[:self.step_st+self.obs_len].cumsum()>=0, + facecolor=(1, 0, 0, 0.2), edgecolor=(1, 0, 0, 0.9), linewidth=1) + self.reward_plot_n = self.ax2.fill_between(price_x, 0, + self.reward_arr[:self.step_st+self.obs_len].cumsum(), + where=self.reward_arr[:self.step_st+self.obs_len].cumsum()<=0, + facecolor=(0, 1, 0, 0.2), edgecolor=(0, 1, 0, 0.9), linewidth=1) + + trade_x = self.posi_variation_arr.nonzero()[0] + trade_x_buy = [i for i in trade_x if self.posi_variation_arr[i]>0] + trade_x_sell = [i for i in trade_x if self.posi_variation_arr[i]<0] + trade_y_buy = [self.price[i] for i in trade_x_buy] + trade_y_sell = [self.price[i] for i in trade_x_sell] + trade_color_buy = [self._gen_trade_color(i) for i in trade_x_buy] + trade_color_sell = [self._gen_trade_color(i) for i in trade_x_sell] + self.trade_plot_buy = self.ax.scatter(x=trade_x_buy, y=trade_y_buy, s=100, marker='^', + c=trade_color_buy, edgecolors=(1,0,0,0.9), zorder=2) + self.trade_plot_sell = self.ax.scatter(x=trade_x_sell, y=trade_y_sell, s=100, marker='v', + c=trade_color_sell, edgecolors=(0,1,0,0.9), zorder=2) + + + def render(self, save=False, show=True): + if self.render_on == 0: + matplotlib.style.use('dark_background') + self.render_on = 1 + + left, width = 0.1, 0.8 + rect1 = [left, 0.4, width, 0.55] + rect2 = [left, 0.2, width, 0.2] + rect3 = [left, 0.05, width, 0.15] + + self.fig = plt.figure(figsize=(15,8)) + self.fig.suptitle('%s'%self.df_sample['datetime'].iloc[0].date(), fontsize=14, fontweight='bold') + #self.ax = self.fig.add_subplot(1,1,1) + self.ax = self.fig.add_axes(rect1) # left, bottom, width, height + self.ax2 = self.fig.add_axes(rect2, sharex=self.ax) + self.ax3 = self.fig.add_axes(rect3, sharex=self.ax) + self.ax.grid(color='gray', linestyle='-', linewidth=0.5) + self.ax2.grid(color='gray', linestyle='-', linewidth=0.5) + self.ax3.grid(color='gray', linestyle='-', linewidth=0.5) + self.features_color = [c.rgb+(0.9,) for c in Color('yellow').range_to(Color('cyan'), self.feature_len)] + #fig, ax = plt.subplots() + self._plot_trading() + + self.ax.set_xlim(0,len(self.price[:self.step_st+self.obs_len])+200) + if show: + plt.ion() + #self.fig.tight_layout() + plt.show() + if save: + self.fig.savefig('fig/%s.png' % str(self.t_index)) + + elif self.render_on == 1: + self.ax.lines.remove(self.price_plot[0]) + [self.ax3.lines.remove(plot) for plot in self.features_plot] + self.fluc_reward_plot_p.remove() + self.fluc_reward_plot_n.remove() + self.target_box.remove() + self.reward_plot_p.remove() + self.reward_plot_n.remove() + self.posi_plot_long.remove() + self.posi_plot_short.remove() + self.trade_plot_buy.remove() + self.trade_plot_sell.remove() + + self._plot_trading() + + self.ax.set_xlim(0,len(self.price[:self.step_st+self.obs_len])+200) + if save: + self.fig.savefig('fig/%s.png' % str(self.t_index)) + if show: + plt.pause(0.0001) + + From dc2e1735b88849473c4f0c4979bb027987e9f6e8 Mon Sep 17 00:00:00 2001 From: Yvictor <410175015@gms.ndhu.edu.tw> Date: Fri, 6 Apr 2018 14:48:59 +0800 Subject: [PATCH 7/7] add action 3 for cover, modify action long and short include cover and new --- trading_env/envs/backtest_v2.py | 79 +++++++++++++++++++++------------ 1 file changed, 50 insertions(+), 29 deletions(-) diff --git a/trading_env/envs/backtest_v2.py b/trading_env/envs/backtest_v2.py index 7e4f06e..e041e20 100644 --- a/trading_env/envs/backtest_v2.py +++ b/trading_env/envs/backtest_v2.py @@ -41,7 +41,8 @@ def __init__(self, env_id, obs_data_len, step_len, self.action_space = 3 self.action_describe = {0:'do nothing', 1:'long', - 2:'short'} + 2:'short', + 3:'cover'} if isinstance(return_transaction, bool) and return_transaction: @@ -168,12 +169,23 @@ def _long(self, open_posi, enter_price, current_mkt_position, current_price_mean self.chg_posi_var[:1] = 1 self.chg_posi_entry_cover[:1] = 1 else: - after_act_mkt_position = current_mkt_position + 1 - self.chg_price_mean[:] = (current_price_mean*current_mkt_position + \ - enter_price)/after_act_mkt_position - self.chg_posi[:] = after_act_mkt_position - self.chg_posi_var[:1] = 1 - self.chg_posi_entry_cover[:1] = 2 + if current_mkt_position > 0: + after_act_mkt_position = current_mkt_position + 1 + self.chg_price_mean[:] = (current_price_mean*current_mkt_position + \ + enter_price)/after_act_mkt_position + self.chg_posi[:] = after_act_mkt_position + self.chg_posi_var[:1] = 1 + self.chg_posi_entry_cover[:1] = 2 + else: + # make real short order + self.chg_makereal[:1] = 1 + self.chg_reward[:] = ((self.chg_price - current_price_mean)*(current_mkt_position) - self.fee*abs(current_mkt_position))*self.chg_makereal + # make new long order + self.chg_price_mean[:] = enter_price + self.chg_posi[:] = 1 + self.chg_posi_var[:1] = -current_mkt_position + 1 + self.chg_posi_entry_cover[:1] = 3 + def _short(self, open_posi, enter_price, current_mkt_position, current_price_mean): if open_posi: @@ -182,12 +194,22 @@ def _short(self, open_posi, enter_price, current_mkt_position, current_price_mea self.chg_posi_var[:1] = -1 self.chg_posi_entry_cover[:1] = 1 else: - after_act_mkt_position = current_mkt_position - 1 - self.chg_price_mean[:] = (current_price_mean*abs(current_mkt_position) + \ - enter_price)/abs(after_act_mkt_position) - self.chg_posi[:] = after_act_mkt_position - self.chg_posi_var[:1] = -1 - self.chg_posi_entry_cover[:1] = 2 + if current_mkt_position < 0: + after_act_mkt_position = current_mkt_position - 1 + self.chg_price_mean[:] = (current_price_mean*abs(current_mkt_position) + \ + enter_price)/abs(after_act_mkt_position) + self.chg_posi[:] = after_act_mkt_position + self.chg_posi_var[:1] = -1 + self.chg_posi_entry_cover[:1] = 2 + else: + # make real short order + self.chg_makereal[:1] = 1 + self.chg_reward[:] = ((self.chg_price - current_price_mean)*(current_mkt_position) - self.fee*abs(current_mkt_position))*self.chg_makereal + # make new long order + self.chg_price_mean[:] = enter_price + self.chg_posi[:] = -1 + self.chg_posi_var[:1] = -current_mkt_position - 1 + self.chg_posi_entry_cover[:1] = 3 def _short_cover(self, current_price_mean, current_mkt_position): self.chg_price_mean[:] = current_price_mean @@ -266,25 +288,24 @@ def step(self, action, custom_done=False): # use next tick, maybe choice avg in first 10 tick will be better to real backtest enter_price = self.chg_price[0] - if action == 1 and self.max_position > current_mkt_position >= 0: + if action == 1 and self.max_position > current_mkt_position: open_posi = (current_mkt_position == 0) self._long(open_posi, enter_price, current_mkt_position, current_price_mean) - elif action == 2 and -self.max_position < current_mkt_position <= 0: + elif action == 2 and -self.max_position < current_mkt_position: open_posi = (current_mkt_position == 0) self._short(open_posi, enter_price, current_mkt_position, current_price_mean) - - elif action == 1 and current_mkt_position<0: - self._short_cover(current_price_mean, current_mkt_position) - - elif action == 2 and current_mkt_position>0: - self._long_cover(current_price_mean, current_mkt_position) elif action == 1 and current_mkt_position==self.max_position: action = 0 elif action == 2 and current_mkt_position==-self.max_position: action = 0 + elif action == 3 and current_mkt_position<0: + self._short_cover(current_price_mean, current_mkt_position) + elif action == 3 and current_mkt_position>0: + self._long_cover(current_price_mean, current_mkt_position) + if action == 0: if current_mkt_position != 0: self._stayon(current_price_mean, current_mkt_position) @@ -338,7 +359,7 @@ def _gen_trade_color(self, ind, long_entry=(1, 0, 0, 0.5), long_cover=(1, 1, 1, elif self.posi_variation_arr[ind]<0 and self.posi_entry_cover_arr[ind]<0: return short_cover - def _plot_trading(self): + def _plot_trading(self, reward_divinplot): price_x = list(range(len(self.price[:self.step_st+self.obs_len]))) self.price_plot = self.ax.plot(price_x, self.price[:self.step_st+self.obs_len], c=(0, 0.68, 0.95, 0.9),zorder=1) # maybe seperate up down color @@ -353,10 +374,10 @@ def _plot_trading(self): linestyle='-',linewidth=1.5, fill=True) ) # remove background) - self.fluc_reward_plot_p = self.ax2.fill_between(price_x, 0, self.reward_fluctuant_arr[:self.step_st+self.obs_len], + self.fluc_reward_plot_p = self.ax2.fill_between(price_x, 0, self.reward_fluctuant_arr[:self.step_st+self.obs_len]/reward_divinplot, where=self.reward_fluctuant_arr[:self.step_st+self.obs_len]>=0, facecolor=(1, 0.8, 0, 0.2), edgecolor=(1, 0.8, 0, 0.9), linewidth=0.8) - self.fluc_reward_plot_n = self.ax2.fill_between(price_x, 0, self.reward_fluctuant_arr[:self.step_st+self.obs_len], + self.fluc_reward_plot_n = self.ax2.fill_between(price_x, 0, self.reward_fluctuant_arr[:self.step_st+self.obs_len]/reward_divinplot, where=self.reward_fluctuant_arr[:self.step_st+self.obs_len]<=0, facecolor=(0, 1, 0.8, 0.2), edgecolor=(0, 1, 0.8, 0.9), linewidth=0.8) self.posi_plot_long = self.ax2.fill_between(price_x, 0, self.posi_arr[:self.step_st+self.obs_len], @@ -366,11 +387,11 @@ def _plot_trading(self): where=self.posi_arr[:self.step_st+self.obs_len]<=0, facecolor=(0, 0.5, 1, 0.2), edgecolor=(0, 0.5, 1, 0.9), linewidth=1) self.reward_plot_p = self.ax2.fill_between(price_x, 0, - self.reward_arr[:self.step_st+self.obs_len].cumsum(), + self.reward_arr[:self.step_st+self.obs_len].cumsum()/reward_divinplot, where=self.reward_arr[:self.step_st+self.obs_len].cumsum()>=0, facecolor=(1, 0, 0, 0.2), edgecolor=(1, 0, 0, 0.9), linewidth=1) self.reward_plot_n = self.ax2.fill_between(price_x, 0, - self.reward_arr[:self.step_st+self.obs_len].cumsum(), + self.reward_arr[:self.step_st+self.obs_len].cumsum()/reward_divinplot, where=self.reward_arr[:self.step_st+self.obs_len].cumsum()<=0, facecolor=(0, 1, 0, 0.2), edgecolor=(0, 1, 0, 0.9), linewidth=1) @@ -387,7 +408,7 @@ def _plot_trading(self): c=trade_color_sell, edgecolors=(0,1,0,0.9), zorder=2) - def render(self, save=False, show=True): + def render(self, save=False, show=True, reward_divinplot=1): if self.render_on == 0: matplotlib.style.use('dark_background') self.render_on = 1 @@ -408,7 +429,7 @@ def render(self, save=False, show=True): self.ax3.grid(color='gray', linestyle='-', linewidth=0.5) self.features_color = [c.rgb+(0.9,) for c in Color('yellow').range_to(Color('cyan'), self.feature_len)] #fig, ax = plt.subplots() - self._plot_trading() + self._plot_trading(reward_divinplot) self.ax.set_xlim(0,len(self.price[:self.step_st+self.obs_len])+200) if show: @@ -431,7 +452,7 @@ def render(self, save=False, show=True): self.trade_plot_buy.remove() self.trade_plot_sell.remove() - self._plot_trading() + self._plot_trading(reward_divinplot) self.ax.set_xlim(0,len(self.price[:self.step_st+self.obs_len])+200) if save: