-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpit_mcts.py
54 lines (41 loc) · 1.35 KB
/
pit_mcts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import Arena
from MCTS import MCTS
from tictactoe.TicTacToeGame import TicTacToeGame, display
from tictactoe.TicTacToePlayers import HumanTicTacToePlayer
from NeuralNet import NeuralNet
from Game import Game
import numpy as np
"""
use this script to play any two agents against each other, or play manually with
any agent.
"""
g = TicTacToeGame(3)
# all players
#rp = RandomPlayer(g).play
#gp = GreedyOthelloPlayer(g).play
hp = HumanTicTacToePlayer(g).play
# nnet players
class Config(object):
def __init__(self):
self.num_sampling_moves = 30
self.max_moves = 512 # for chess and shogi, 722 for Go.
self.num_mcts_sims = 5000
# Root prior exploration noise.
self.root_dirichlet_alpha = 0.3 # for chess, 0.03 for Go and 0.15 for shogi.
self.root_exploration_fraction = 0.0
# UCB formula
self.pb_c_base = 19652
self.pb_c_init = 1.25
# Load model
self.load_model = True
self.load_folder_file = ('./models/','checkpoint_25.pth.tar')
class NN(NeuralNet):
def __init__(self,game:Game):
self.game = game
def predict(self, board):
return np.ones(self.game.action_size())/self.game.action_size(), 0
nn = NN(g)
mcts1 = MCTS(g, nn, Config())
n1p = lambda x: np.argmax(mcts1.get_action_prob(x)[0])
arena = Arena.Arena(n1p, hp, g, display=display)
print(arena.play_games(20, verbose=True))