-
Notifications
You must be signed in to change notification settings - Fork 25
/
Copy pathmain.py
118 lines (89 loc) · 2.91 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import matplotlib.pyplot as plt
import numpy as np
import random
import sys
from frl.data import Data
from frl.rl import RL
from frl.env import Env
from frl.agent import Agent
plt.style.use('seaborn-paper')
def main():
# Read CSV from file and perform data preprocessing / feature extraction
data_path = sys.argv[1] if len(sys.argv) > 1 else './bitcoin-historical-data/coinbaseUSD_1-min_data.csv'
data_src = Data(data_path)
data_src.preprocess()
data_src.extract_feature()
# Train-test split the data by 70% and 30%
cut = int(len(data_src) * 0.7)
train_feat = data_src.feat.iloc[:cut]
train_data = data_src.data.iloc[:cut]
train_len = len(train_data)
test_feat = data_src.feat.iloc[cut:]
test_data = data_src.data.iloc[cut:]
# Create agent
agent = Agent(
in_features=28,
num_layers=1,
hidden_size=32,
out_features=5,
lr=1e-3,
weight_decay=1e-6,
discount_factor=0.99
)
# Training
for i_episode in range(5000):
print(f'Episode {i_episode+1}', end=' ')
# Sample an episode of length 96
idx = random.randint(0, train_len-96)
_train_feat = train_feat.iloc[idx: idx+96]
_train_data = train_data.iloc[idx: idx+96]
train_env = Env(_train_feat, _train_data)
train_rl = RL(agent, train_env)
train_rl.train()
train_rwd = sum(train_rl.rollout())
print(f'{train_rwd:.3f}')
# Testing
test_env = Env(test_feat, test_data)
test_rl = RL(agent, test_env)
test_rl.eval()
test_rwd = test_rl.rollout()
# Plotting
plot_result(test_rwd, test_data['Open'], test_data['Close'])
def plot_result(rwd_lst, open_price, close_price):
plt.subplot(211)
# Baseline1: BnH
ret = np.log(close_price / close_price.shift(1))
ret.fillna(0, inplace=True)
bnh = np.cumsum(ret.values) * 2
plt.plot(bnh, label='BnH')
# Baseline2: Momentum
log_ret = np.log(close_price / open_price)
sma = close_price.rolling(30, min_periods=1).mean()
signal = (close_price > sma).shift(1).astype(float) * 4 # shift by 1 since we trade on the next opening price
signal.fillna(0, inplace=True)
mmt = np.cumsum(log_ret.values * signal.values) # convert to cum. simple return
plt.plot(mmt, label='MMT')
# RL agent performance
rl = np.cumsum(rwd_lst)
plt.xticks(())
plt.ylabel('Cumulative Log-Returns')
plt.plot(rl, label='RL')
plt.legend()
def mdd(x):
max_val = None
temp = []
for t in x:
if max_val is None or t > max_val:
max_val = t
temp.append(t - max_val)
return temp
plt.subplot(212)
plt.ticklabel_format(style='sci', axis='x', scilimits=(0, 0))
plt.xlabel('Timesteps')
plt.ylabel('MDD')
plt.plot(mdd(bnh))
plt.plot(mdd(mmt))
plt.plot(mdd(rl))
plt.show()
if __name__ == '__main__':
main()