Yvictor · Yvictor · Dec 7, 2017 · Dec 10, 2017 · Dec 16, 2017 · Dec 19, 2017
diff --git a/trading_env/__init__.py b/trading_env/__init__.py
@@ -8,7 +8,31 @@ def available_envs():
 def make(env_id, obs_data_len, step_len,
          df, fee, deal_col_name='price', 
          feature_names=['price', 'volume'], 
+         return_transaction=False,
          *args , **kwargs):
+    """
+    v0: currently not maintain
+
+    v1:
+    # assert df 
+    # need deal price as essential and specified the df format
+    # obs_data_leng -> observation data length
+    # step_len -> when call step rolling windows will + step_len
+    # df -> dataframe that contain data for trading(format as...)
+        # price 
+        # datetime
+        # serial_number -> serial num of deal at each day recalculating
+
+    # fee : when each deal will pay the fee, set with your product
+    # max_position : the max market position for you trading share 
+    # deal_col_name : the column name for cucalate reward used.
+    # feature_names : list contain the feature columns to use in trading status.
+    # return_transaction : bool, list or dict default False
+                        True will return all column include 'mkt_pos', 'mkt_pos_var', 'entry_cover', 'avg_hold_price', 'fluc_reward', 'make_real', 'reward'
+                        use list to define which column to return
+                        use dict to define which column to return and dict value as the func apply to array  
+    # ?day trade option set as default if don't use this need modify
+    """
     envs = available_envs()
     assert env_id in envs , "env_id: {} not exist. try one of {}".format(env_id, str(envs).strip('[]'))
     assert deal_col_name in df.columns, "deal_col not in Dataframe please define the correct column name of which column want to calculate the profit."
@@ -22,5 +46,6 @@ def make(env_id, obs_data_len, step_len,
                       step_len=step_len ,df=df, fee=fee, 
                       deal_col_name=deal_col_name, 
                       feature_names=feature_names,
+                      return_transaction=return_transaction,
                       *args, **kwargs)
     return env    
diff --git a/trading_env/envs/__init__.py b/trading_env/envs/__init__.py
@@ -1,6 +1,8 @@
 from . import training_v0
 from . import training_v1
+from . import training_v2
 from . import backtest_v0
 from . import backtest_v1
+from . import backtest_v2
 
-available_envs_module = [training_v0, training_v1, backtest_v0, backtest_v1]
+available_envs_module = [training_v0, training_v1, training_v2, backtest_v0, backtest_v1, backtest_v2]
diff --git a/trading_env/envs/backtest_v1.py b/trading_env/envs/backtest_v1.py
@@ -14,7 +14,7 @@ class trading_env:
     def __init__(self, env_id, obs_data_len, step_len,
                  df, fee, max_position=5, deal_col_name='price', 
                  feature_names=['price', 'volume'], 
-                 return_transaction=True,
+                 return_transaction=False,
                  fluc_div=100.0, gameover_limit=5,
                  *args, **kwargs):
         """
@@ -42,9 +42,18 @@ def __init__(self, env_id, obs_data_len, step_len,
         self.action_describe = {0:'do nothing',
                                 1:'long',
                                 2:'short'}
-
+
+
+        if isinstance(return_transaction, bool) and return_transaction:
+            transaction_num = 7
+        elif not return_transaction:
+            transaction_num = 0
+        else:
+            transaction_num = len(return_transaction)    
+
         self.obs_len = obs_data_len
         self.feature_len = len(feature_names)
+        self.return_state_len = len(feature_names) + transaction_num
         self.observation_space = np.array([self.obs_len*self.feature_len,])
         self.using_feature = feature_names
         self.price_name = deal_col_name
@@ -117,17 +126,35 @@ def reset(self):
         self.obs_reward_fluctuant = self.reward_fluctuant_arr[self.step_st: self.step_st+self.obs_len]
         self.obs_makereal = self.reward_makereal_arr[self.step_st: self.step_st+self.obs_len]
         self.obs_reward = self.reward_arr[self.step_st: self.step_st+self.obs_len]
+
+        self.transaction_dict = {'mkt_pos': self.obs_posi[:, np.newaxis],
+                                'mkt_pos_var': self.obs_posi_var[:, np.newaxis],
+                                'entry_cover': self.obs_posi_entry_cover[:, np.newaxis],
+                                'avg_hold_price': self.obs_price_mean[:, np.newaxis],
+                                'fluc_reward': self.obs_reward_fluctuant[:, np.newaxis],
+                                'make_real': self.obs_makereal[:, np.newaxis],
+                                'reward': self.obs_reward[:, np.newaxis],}
 
         if self.return_transaction:
-            self.obs_return = np.concatenate((self.obs_state, 
-                                            self.obs_posi[:, np.newaxis], 
-                                            self.obs_posi_var[:, np.newaxis],
-                                            self.obs_posi_entry_cover[:, np.newaxis],
-                                            self.obs_price[:, np.newaxis],
-                                            self.obs_price_mean[:, np.newaxis],
-                                            self.obs_reward_fluctuant[:, np.newaxis],
-                                            self.obs_makereal[:, np.newaxis],
-                                            self.obs_reward[:, np.newaxis]), axis=1)
+            if isinstance(self.return_transaction, bool):
+                self.obs_return = np.concatenate((self.obs_state, 
+                                                self.obs_posi[:, np.newaxis], 
+                                                self.obs_posi_var[:, np.newaxis],
+                                                self.obs_posi_entry_cover[:, np.newaxis],
+                                                self.obs_price_mean[:, np.newaxis],
+                                                self.obs_reward_fluctuant[:, np.newaxis],
+                                                self.obs_makereal[:, np.newaxis],
+                                                self.obs_reward[:, np.newaxis]), axis=1)
+            elif isinstance(self.return_transaction, list):
+                self.obs_return = np.concatenate((self.obs_state,)+ \
+                                                 tuple(self.transaction_dict[need] \
+                                                 for need in self.return_transaction), 
+                                                 axis=1)
+            elif isinstance(self.return_transaction, dict):
+                self.obs_return = np.concatenate((self.obs_state,)+ \
+                                                tuple(self.return_transaction[need](self.transaction_dict[need]) \
+                                                for need in self.return_transaction), 
+                                                axis=1)
         else:
             self.obs_return = self.obs_state
 
@@ -183,7 +210,7 @@ def _stayon(self, current_price_mean, current_mkt_position):
         self.chg_posi[:] = current_mkt_position
         self.chg_price_mean[:] = current_price_mean
 
-    def step(self, action):
+    def step(self, action, custom_done=False):
         current_index = self.step_st + self.obs_len -1
         current_price_mean = self.price_mean_arr[current_index]
         current_mkt_position = self.posi_arr[current_index]
@@ -213,7 +240,7 @@ def step(self, action):
         self.chg_reward = self.obs_reward[-self.step_len:]
 
         done = False
-        if self.step_st+self.obs_len+self.step_len >= len(self.price):
+        if self.step_st+self.obs_len+self.step_len >= len(self.price) or custom_done:
             done = True
             action = -1
             if current_mkt_position != 0:
@@ -265,15 +292,32 @@ def step(self, action):
         self.chg_reward_fluctuant[:] = (self.chg_price - self.chg_price_mean)*self.chg_posi - np.abs(self.chg_posi)*self.fee
 
         if self.return_transaction:
-            self.obs_return = np.concatenate((self.obs_state, 
-                                            self.obs_posi[:, np.newaxis], 
-                                            self.obs_posi_var[:, np.newaxis],
-                                            self.obs_posi_entry_cover[:, np.newaxis],
-                                            self.obs_price[:, np.newaxis],
-                                            self.obs_price_mean[:, np.newaxis],
-                                            self.obs_reward_fluctuant[:, np.newaxis],
-                                            self.obs_makereal[:, np.newaxis],
-                                            self.obs_reward[:, np.newaxis]), axis=1)
+            self.transaction_dict = {'mkt_pos': self.obs_posi[:, np.newaxis],
+                                    'mkt_pos_var': self.obs_posi_var[:, np.newaxis],
+                                    'entry_cover': self.obs_posi_entry_cover[:, np.newaxis],
+                                    'avg_hold_price': self.obs_price_mean[:, np.newaxis],
+                                    'fluc_reward': self.obs_reward_fluctuant[:, np.newaxis],
+                                    'make_real': self.obs_makereal[:, np.newaxis],
+                                    'reward': self.obs_reward[:, np.newaxis],}
+            if isinstance(self.return_transaction, bool):
+                self.obs_return = np.concatenate((self.obs_state, 
+                                                self.obs_posi[:, np.newaxis], 
+                                                self.obs_posi_var[:, np.newaxis],
+                                                self.obs_posi_entry_cover[:, np.newaxis],
+                                                self.obs_price_mean[:, np.newaxis],
+                                                self.obs_reward_fluctuant[:, np.newaxis],
+                                                self.obs_makereal[:, np.newaxis],
+                                                self.obs_reward[:, np.newaxis]), axis=1)
+            elif isinstance(self.return_transaction, list):
+                self.obs_return = np.concatenate((self.obs_state,)+ \
+                                                 tuple(self.transaction_dict[need] \
+                                                 for need in self.return_transaction), 
+                                                 axis=1)
+            elif isinstance(self.return_transaction, dict):
+                self.obs_return = np.concatenate((self.obs_state,)+ \
+                                                tuple(self.return_transaction[need](self.transaction_dict[need]) \
+                                                for need in self.return_transaction), 
+                                                axis=1)
         else:
             self.obs_return = self.obs_state
 
@@ -339,7 +383,7 @@ def _plot_trading(self):
                                                c=trade_color_sell, edgecolors=(0,1,0,0.9), zorder=2)
 
 
-    def render(self, save=False):
+    def render(self, save=False, show=True):
         if self.render_on == 0:
             matplotlib.style.use('dark_background')
             self.render_on = 1
@@ -363,9 +407,10 @@ def render(self, save=False):
             self._plot_trading()
 
             self.ax.set_xlim(0,len(self.price[:self.step_st+self.obs_len])+200)
-            plt.ion()
-            #self.fig.tight_layout()
-            plt.show()
+            if show:
+                plt.ion()
+                #self.fig.tight_layout()
+                plt.show()
             if save:
                 self.fig.savefig('fig/%s.png' % str(self.t_index))
 
@@ -387,4 +432,5 @@ def render(self, save=False):
             self.ax.set_xlim(0,len(self.price[:self.step_st+self.obs_len])+200)
             if save:
                 self.fig.savefig('fig/%s.png' % str(self.t_index))
-            plt.pause(0.0001)
+            if show:
+                plt.pause(0.0001)