-
Notifications
You must be signed in to change notification settings - Fork 17
/
Copy pathagent.py
481 lines (403 loc) · 42.2 KB
/
agent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
# Reference: https://python.langchain.com/en/latest/use_cases/agent_simulations
import re
from datetime import datetime
from typing import List, Optional, Tuple
from langchain import LLMChain
from langchain.base_language import BaseLanguageModel
from langchain.prompts import PromptTemplate
from langchain.retrievers import TimeWeightedVectorStoreRetriever
from langchain.schema import Document
from pydantic.v1 import BaseModel, Field
from termcolor import colored
import util
import time
class SuspicionAgent(BaseModel):
"""A character with memory and innate characteristics."""
name: str
game_name: str
age: int
observation_rule: str
"""The traits of the character you wish not to change."""
status: str
"""Current activities of the character."""
llm: BaseLanguageModel
"""The retriever to fetch related memories."""
verbose: bool = False
reflection_threshold: Optional[float] = None
"""When the total 'importance' of memories exceeds the above threshold, stop to reflect."""
current_plan: List[str] = []
belief: str = ""
pattern: str = ""
long_belief: str = ""
counter_belief: str = ""
plan: str = ""
high_plan: str = ""
"""The current plan of the agent."""
memory: List = ['']
summary: str = "" #: :meta private:
summary_refresh_seconds: int = 3600 #: :meta private:
last_refreshed: datetime = Field(default_factory=datetime.now) #: :meta private:
memory_importance: float = 0.0 #: :meta private:
max_tokens_limit: int = 1200 #: :meta private:
read_observation: str = "" #: :meta private:
rule: str = "" #: :meta private:
class Config:
"""Configuration for this pydantic object."""
arbitrary_types_allowed = True
def add_long_memory(self, memory_content: str) -> List[str]:
"""Add an observation or memory to the agent's memory."""
self.memory.append(memory_content)
return self.memory
def planning_module(self, observation: str, recipient_name:str, previous_conversation: List[str] =None, belief: str =None, valid_action_list: List[str] = None, short_memory_summary:str = "",pattern:str = "",last_plan:str = "", mode: str = "second_tom") -> str:
"""Make Plans and Evaluate Plans."""
"""Combining these two modules together to save costs"""
if mode == 'second_tom':
prompt = PromptTemplate.from_template(
"You are the objective player behind a NPC character called {initiator_name}, and you are playing the board game {game_name} with {recipient_name}.\n"
+ " The game rule is: {rule} \n"
+'{pattern}\n'
+ " Your observation about the game status now is: {observation}\n"
+'{belief}\n'
+ " Understanding all given information, can you do following things:"
+ " Make Reasonable Plans: Please plan several strategies according to actions {valid_action_list} you can play now to win the finally whole {game_name} games step by step. Note that you can say something or keep silent to confuse your opponent. "
+ " Potential {recipient_name}'s actions (if release) and Estimate Winning/Lose/Draw Rate for Each Plan: From the perspective of {recipient_name} , please infer what the action {recipient_name} with probability (normalize to number 100% in total) would do when {recipient_name} holds different cards and then calculate the winning/lose/draw rates when {recipient_name} holds different cards step by step. At last, please calculate the overall winning/lose/draw rates for each plan step by step considering {recipient_name}'s behaviour pattern. Output in a tree-structure: "
+ "Output: Plan 1: If I execute plan1. "
"The winning/lose/draw rates when {recipient_name} holds card1: Based on {recipient_name}'s behaviour pattern, In the xx round, because {recipient_name} holds card1 (probability) and the combination with current public card (if release) (based on my belief on {recipient_name}), and if he sees my action, {recipient_name} will do action1 (probability) ( I actually hold card and the public card (if reveal) is , he holds card1 and the public card (if reveal), considering Single Game Win/Draw/Lose Rule, please infer I will win/draw/lose step by step ), action2 (probability) (considering Single Game Win/Draw/Lose Rule, please infer I will win/draw/lose step by step ),.. (normalize to number 100% in total); \n Overall (winning rate for his card1) is (probability = his card probability * win action probability), (lose rate for his card2) is (probability= his card probability * lose action probability), (draw rate for his card2) is (probability = his card probability * draw action probability) "
"The winning/lose/draw rates when {recipient_name} holds card2: Based on {recipient_name}'s behaviour pattern, In the xx round, because {recipient_name} holds card2 (probability) and the combination with current public card (if release) (based on my belief on {recipient_name}) , and if he sees my action, he will do action1 (probability) (I actually hold card and the public card (if reveal) is , he holds card1 and the public card (if reveal), considering Single Game Win/Draw/Lose Rule, please infer I will win/draw/lose step by step ).. action2 (probability) (normalize to number 100% in total) (considering Single Game Win/Draw/Lose Rule, please infer I will win/draw/lose step by step ),.. ;..... continue ....\n Overall (winning rate for his card2) is (probability = his card probability * win action probability), (lose rate for his card2) is (probability= his card probability * lose action probability), (draw rate for his card2) is (probability = his card probability * draw action probability) "
"...\n"
"Plan1 overall {initiator_name}'s Winning/Lose/Draw rates : the Winning rate (probability) for plan 1 is (winning rate for his card1) + (winning rate for his card2) + .. ; Lose rate (probability) for plan 1 : (lose rate for his card1) + (lose rate for his card2) + .. ; Draw Rate (probability) for plan 1 : (draw rate for his card1) + (draw rate for his card2) + ... ; (normalize to number 100% in total) for plan1 \n"
"Plan 2: If I execute plan2, The winning/lose/draw rates when {recipient_name} holds card1: Based on {recipient_name}'s behaviour pattern, In the xx round, if {recipient_name} holds card1 (probability) and the combination with current public card (if release), .. (format is similar with before ) ... continue .."
"Plan 3: .. Coninue ... "
+ " The number of payoffs for each plan: Understanding your current observation, each new plans, please infer the number of wininng/lose payoffs for each plan step by step, Output: Plan1: After the action, All chips in the pot: If win, the winning payoff would be (Calculated by Winning Payoff Rules step by step) : After the action, All chips in the pot: If lose , the lose payoff would be: (Calculated by Lose Payoff Rules step by step). Plan2: After the action, All chips in the pot: If win, the winning chips would be (Calculated by Winning Payoff Rules step by step): After the action, All chips in the pot: If lose , the lose chips would be: (Calculated by Lose Payoff Rules step by step). If the number of my chips in pots have no change, please directly output them. \n"
+ " Estimate Expected Chips Gain for Each Plan: Understanding all the information and Estimate Winning/Lose/Draw Rate for Each Plan, please estimate the overall average Expected Chips Gain for each plan/strategy in the current game by calculating winning rate * (Winning Payoff Rule in the game rule) - lose rate * (Lose Payoff Rule in the game rule) step by step"
+ " Plan Selection: Please output the rank of estimated expected chips gains for every plan objectively step by step, and select the plan/strategy with the highest estimated expected chips gain considering both the strategy improvement. \n "
)
elif mode == 'first_tom':
prompt = PromptTemplate.from_template(
"You are the player behind a NPC character called {initiator_name}, and you are playing the board game {game_name} with {recipient_name}.\n"
+ " The game rule is: {rule} \n"
+ " {pattern} \n"
+ " Your observation about the game status now is: {observation}\n"
+ ' {belief}\n'
+ " Understanding all given information, can you do following things:"
+ " Make Reasonable Plans: Please plan several strategies according to actions {valid_action_list} you can play now to win the finally whole {game_name} games step by step. Note that you can say something or keep silent to confuse your opponent."
+ " Potential {recipient_name}'s actions and Estimate Winning/Lose/Draw Rate: From the perspective of {recipient_name}, please infer what the action {recipient_name} with probability (normalize to number 100% in total) would do when {recipient_name} holds different cards, and then calculate the winning/lose/draw rates when {recipient_name} holds different cards step by step. Output in a tree-structure: "
+ "Output: Based on {recipient_name}'s behaviour pattern and Analysis on {recipient_name}'s cards, "
"Winning/lose/draw rates when {recipient_name} holds card1 in the xx round,: if {recipient_name} holds card1 (probability) (based on my belief on {recipient_name}) with the public card (if release), {recipient_name} will do action1 (probability) (infer I will win/draw/lose step by step (considering Single Game Win/Draw/Lose Rule and my factual card analysis with public card (if release), his card analysis with public card (if release) step by step ), action2 (probability) (infer I will win/draw/lose step by step ),.. (normalize to number 100% in total); Overall (winning rate for his card1) is (probability = his card probability * win action probability), (lose rate for his card2) is (probability= his card probability * lose action probability), (draw rate for his card2) is (probability = his card probability * draw action probability) "
"The winning/lose/draw rates when {recipient_name} holds card2 in the xx round,: If {recipient_name} holds card2 (probability) (based on my belief on {recipient_name}) with the public card (if release), he will do action1 (probability) (infer I will win/draw/lose (considering Single Game Win/Draw/Lose Rule and my factual card analysis with current public card (if release), his card analysis with current public card (if release)) step by step ).. action2 (probability) (normalize to number 100% in total) (infer I will win/draw/lose step by step ),.. based on {recipient_name}'s behaviour pattern;..... continue .... Overall (winning rate for his card2) is (probability = his card probability * win action probability), (lose rate for his card2) is (probability= his card probability * lose action probability), (draw rate for his card2) is (probability = his card probability * draw action probability) "
"..."
"Overall {initiator_name}'s Winning/Lose/Draw rates : Based on the above analysis, the Winning rate (probability) is (winning rate for his card1) + (winning rate for his card2) + .. ; Lose rate (probability): (lose rate for his card1) + (lose rate for his card2) + .. ; Draw Rate (probability): (draw rate for his card1) + (draw rate for his card2) + ... ; (normalize to number 100% in total). \n"
+ " Potential believes about the number of winning and lose payoffs for each plan: Understanding the game rule, your current observation, previous actions summarization, each new plans, Winning Payoff Rule, Lose Payoff Rule, please infer your several believes about the number of chips in pots for each plan step by step, Output: Plan1: Chips in the pot: If win, the winning payoff would be (Calculated by Winning Payoff Rules in the game rule) : After the action, If lose , the lose payoff would be: . Plan2: Chips in the pot: If win, the winning chips would be (Calculated by Winning Payoff Rules in the game rule): After the action, If lose , the lose chips would be: . If the number of my chips in pots have no change, please directly output them. "
+ " Estimate Expected Chips Gain for Each Plan: Understanding the game rule, plans, and your knowledge about the {game_name}, please estimate the overall average Expected Chips Gain for each plan/strategy in the current game by calculating winning rate * (Winning Payoff Rule in the game rule) - lose rate * (Lose Payoff Rule in the game rule)., explain what is the results if you do not select the plan, and explain why is this final Expected Chips Gain reasonablely step by step? "
+ " Plan Selection: Please output the rank of estimated expected chips gains for every plan objectively step by step, and select the plan/strategy with the highest estimated expected chips gain considering both the strategy improvement. \n\n "
)
else:
prompt = PromptTemplate.from_template(
"You are the player behind a NPC character called {initiator_name}, and you are playing the board game {game_name} with {recipient_name}.\n"
+ " The game rule is: {rule} \n"
+ " {pattern} \n"
+ " Your observation about the game status now is: {observation}\n"
+ " Understanding all given information, can you do following things:"
+ " Make Reasonable Plans: Please plan several strategies according to actions {valid_action_list} you can play now to win the finally whole {game_name} games step by step. Note that you can say something or keep silent to confuse your opponent."
+ " Estimate Winning/Lose/Draw Rate for Each Plan: Understanding the given information, and your knowledge about the {game_name}, please estimate the success rate of each step of each plan step by step and the overall average winning/lose/draw rate (normalize to number 100% in total) of each plan/strategy for the current game step by step following the templete: If I do plan1, because I hold card, the public information (if release) and Single Game Win/Draw/Lose Rule, I will win or Lose or draw (probability); ... continue .... Overall win/draw/lose rate: Based on the analysis, I can do the weighted average step by step to get that the overall weighted average winning rate is (probability), average lose rate is (probability), draw rate is (probability) (normalize to number 100% in total)\n "
+ " Potential believes about the number of winning and lose payoffs for each plan: Understanding the game rule, your current observation, previous actions summarization, each new plans, Winning Payoff Rule, Lose Payoff Rule, please infer your several believes about the number of chips in pots for each plan step by step, Output: Plan1: Chips in the pot: If win, the winning payoff would be (Calculated by Winning Payoff Rules in the game rule) : After the action, Chips in the pot: If lose , the lose payoff would be: . Plan2: Chips in the pot: If win, the winning chips would be (Calculated by Winning Payoff Rules in the game rule): After the action, Chips in the pot: If lose , the lose chips would be: . If the number of my chips in pots have no change, please directly output them. "
+" Estimate Expected Chips Gain for Each Plan: Understanding the game rule, plans, and your knowledge about the {game_name}, please estimate the overall average Expected Chips Gain for each plan/strategy in the current game by calculating winning rate * (Winning Payoff Rule in the game rule) - lose rate * (Lose Payoff Rule in the game rule)., explain what is the results if you do not select the plan, and explain why is this final Expected Chips Gain reasonablely step by step? "
+ " Plan Selection: Please output the rank of estimated expected chips gains for every plan objectively step by step, and select the plan/strategy with the highest estimated expected chips gain considering both the strategy improvement. \n\n "
)
agent_summary_description = short_memory_summary
belief = self.belief if belief is None else belief
kwargs = dict(
recent_observations=agent_summary_description,
last_plan=last_plan,
belief=belief,
initiator_name=self.name,
pattern=pattern,
recipient_name=recipient_name,
observation=observation,
rule=self.rule,
game_name=self.game_name,
valid_action_list=valid_action_list
)
plan_prediction_chain = LLMChain(llm=self.llm, prompt=prompt)
self.plan = plan_prediction_chain.run(**kwargs)
self.plan = self.plan.strip()
return self.plan.strip()
def get_belief(self, observation: str, recipient_name: str,short_memory_summary:str,pattern:str = "",mode: str = "second_tom") -> str:
"""React to get a belief."""
if mode == 'second_tom':
prompt = PromptTemplate.from_template(
"You are the player behind a NPC character called {agent_name}, and you are playing the board game {game_name} with {recipient_name}. \n"
+ " The game rule is: {rule} \n"
+ " Your estimated judgement about the behaviour pattern of {recipient_name} and improved strategy is: {pattern} \n"
+ " Your observation now is: {observation}\n"
+ " Your current game progress summarization including actions and conversations with {recipient_name} is: {recent_observations}\n"
+ " Understanding the game rule, the cards you have, your observation, progress summarization in the current game, the estimated behaviour pattern of {recipient_name}, the potential guess pattern of {recipient_name} on you, and your knowledge about the {game_name}, can you do following things? "
+ " Analysis on my Cards: Understanding all given information and your knowledge about the {game_name}, please analysis what is your best combination and advantages of your cards in the current round step by step."
+ " Belief on {recipient_name}'s cards: Understanding all given information, please infer the probabilities about the cards of {recipient_name} (normalize to number 100% in total) objectively step by step."
"Output: {recipient_name} saw my history actions (or not) and then did action1 (probability) in the 1st round , ... continue..... Before this round, {recipient_name} say my history actions (or not) and did action1 (probability), because {recipient_name}'s behaviour pattern and the match with the public card (if release), {recipient_name} tends to have card1 (probability), card2 (probability) ..continue.. (normalize to number 100% in total)."
+ " Analysis on {recipient_name}'s Cards: Understanding all given information and your knowledge about the {game_name}, please analysis what is {recipient_name}'s best combination and advantages of {recipient_name}'s cards in the current round step by step."
+ " Potential {recipient_name}'s current believes about your cards: Understanding all given information and your knowledge about the {game_name}, If you were {recipient_name} (he can only observe my actions but cannot see my cards), please infer the {recipient_name}'s believes about your cards with probability (normalize to number 100% in total) step by step. Output: {agent_name} did action1 (probability) (after I did action or not) in the 1st round, , ... continue... {agent_name} did action1 (probability) (after I did action or not) in the current round,, from the perspective of {recipient_name}, {agent_name} tends to have card1 (probability), card2 (probability) ... (normalize to number 100% in total) ."
)
elif mode == 'first_tom':
prompt = PromptTemplate.from_template(
"You are the player behind a NPC character called {agent_name}, and you are playing the board game {game_name} with {recipient_name}. \n"
+ " The game rule is: {rule} \n"
+ " Your estimated judgement about the behaviour pattern of {recipient_name} and improved strategy is: {pattern} \n"
+ " Your observation now is: {observation}\n"
+ " Your current game progress summarization including actions and conversations with {recipient_name} is: {recent_observations}\n"
+ " Understanding the game rule, the cards you have, your observation, progress summarization in the current game, the estimated behaviour pattern of {recipient_name}, and your knowledge about the {game_name}, can you do following things? "
+ " Analysis on my Cards: Understanding all given information, please analysis what is your best combination and advantages of your cards in the current round step by step."
+ " Belief on {recipient_name}'s cards: Understanding all given information, please infer your the probabilities about the cards of {recipient_name} (normalize to number 100% total) step by step. Templete: In the 1st round, {recipient_name} did action1 (probability), ... continue... In the current round, {recipient_name} did action1 (probability), because {recipient_name}'s behaviour pattern and the match with the current public card (if release), he tends to have card1 (probability), card2 (probability) (normalize to number 100% in total). "
+ " Analysis on {recipient_name}'s Cards: Understanding all given information, please analysis what is {recipient_name}'s best combination and advantages of {recipient_name}'s cards in the current round step by step."
)
agent_summary_description = short_memory_summary
kwargs = dict(
agent_summary_description=agent_summary_description,
recent_observations=agent_summary_description,
agent_name=self.name,
pattern= pattern,
recipient_name=recipient_name,
observation=observation,
game_name=self.game_name,
rule=self.rule
)
print(recipient_name)
belief_prediction_chain = LLMChain(llm=self.llm, prompt=prompt)
self.belief = belief_prediction_chain.run(**kwargs)
self.belief = self.belief.strip()
return self.belief.strip()
def get_pattern(self, recipient_name: str,game_pattern: str='', last_k:int=20,short_summarization:str='',mode:str='second_tom') -> str:
"""React to get a belief."""
if mode == 'second_tom':
prompt = PromptTemplate.from_template(
"You are the objective player behind a NPC character called {agent_name}, and you are playing {game_name} with {recipient_name}. \n"
+ " The game rule is: {rule} \n"
+ " Your previous game memory including observations, actions and conversations with {recipient_name} is: {long_memory}\n"
+ " {recipient_name}'s game pattern: Understanding all given information and your understanding about the {game_name}, please infer and estimate as many as possible reasonable {recipient_name}'s game behaviour pattern/preferences for each card he holds and each round with probability (normalize to number 100\% in total for each pattern item) and please also infer advantages of his card, and analysis how the {recipient_name}'s behaviour pattern/preferences are influenced by my actions when he holds different cards step by step. Output as a tree-structure "
+ "Output: When {recipient_name} holds card1 and the combination of public card (if release): if {recipient_name} is the first to act, he would like to do action1 (probabilities), action2 (probabilities) ... continue .. If {recipient_name} sees the action1/action2/action3 of the opponent or not, he would like to do action1 (probabilities), action2 (probabilities) ... continue ... (normalize to number 100% in total), if {recipient_name} sees the action2 of the opponent or not, ... continue ..(more patterns with different actions).. in the 1st round, ; If {recipient_name} sees the action1 of the opponent or not, he would like to do action1 (probabilities), action2 (probabilities) ... continue... (normalize to number 100% in total), ... continue ..(more patterns)..In the 2nd round,;"
"When {recipient_name} holds card2 and combination of public card (if release): if {recipient_name} is the first to act, he would like to do action1 (probabilities), action2 (probabilities) ... continue .. If {recipient_name} sees the action1 of the opponent or not, he would like to do action1 (probabilities), action2 (probabilities) .. continue ... (normalize to number 100% in total)...in the 1st round,; .. continue ..(more patterns with different actions).in the 2nd round .. "
" (more patterns with different cards).. continue.."
+ " {recipient_name}'s guess on my game pattern: Understanding all given information, please infer several reasonable believes about my game pattern/preference when holding different cards from the perspective of {recipient_name} (please consider the advantages of the card, actions and the the match with the public card (if release)) for every round of the game in detail as a tree-structure output step by step"
+ "Output: In the 1st round, When name holds card1 with public card (if release), he would like to do (probabilities), action2 (probabilities) (normalize to number 100% in total) o ... continue .. and then do action ...;"
"When name holds card2 with public card (if release), ... "
" .. continue.."
+ " Strategy Improvement: Understanding the above information, think about what strategies I can adopt to exploit the game pattern of {recipient_name} and {recipient_name}'s guess on my game pattern for winning {recipient_name} in the whole game step by step. (Note that you cannot observe the cards of the opponent during the game, but you can observe his actions). Output as a tree-structure:"
"When I hold card and the public card (if release), and see the action of the opponent, I would like to do action1; ... "
)
elif mode == 'first_tom':
prompt = PromptTemplate.from_template(
"You are the player behind a NPC character called {agent_name}, and you are playing the board game {game_name} with {recipient_name}. \n"
+ " The game rule is: {rule} \n"
+ " Your previous game memory including observations, actions and conversations with {recipient_name} is: {long_memory}\n"
+ " Please understand the game rule, previous all game history and your knowledge about the {game_name}, can you do following things for future games? "
+ " {recipient_name}'s game pattern: Understanding all given information, please infer all possible reasonable {recipient_name}'s game pattern/preferences for each card he holds and each round with probability (normalize to number 100\% in total for each pattern item) for every round of the game as a tree-structure output step by step "
+ "Output: In the 1st round, when name holds card1 and the public card (if release), he would like to do action (probabilities); when name holds card2 and the public card (if release), he would like to do action (probabilities), ... continue.. In the 2nd round, when name holds card1 and the public card (if release), .(similar with before).. continue. "
+ " Number of chips reason: Think about why you can have these chips in all previous games step by step. "
+ " Reflex: Reflex which your actions are right or wrong in previous games to win or Lose conrete chips step by step (Note that you cannot observe the cards of the opponent during the game, but you can observe his actions) "
+ " Strategy Improvement: Understanding the above information, think about what strategies I can adopt to exploit the game pattern of {recipient_name} for winning {recipient_name} in the whole game step by step. (Note that you cannot observe the cards of the opponent during the game, but you can observe his actions). Output as a tree-structure:"
)
else:
prompt = PromptTemplate.from_template(
"You are the player behind a NPC character called {agent_name}, and you are playing the board game {game_name} with {recipient_name}. \n"
+ " The game rule is: {rule} \n"
+ " Your previous game memory including observations, actions and conversations with {recipient_name} is: {long_memory}\n"
+ " Please understand the game rule, previous all game history and your knowledge about the {game_name}, can you do following things for future games? "
+ " Number of chips reason: Think about why you can have these chips in all previous games step by step. "
+ " Reflex: Reflex which your actions are right or wrong in previous games to win or Lose conrete chips step by step. (Note that you cannot observe the cards of the opponent during the game, but you can observe his actions) "
+ " Strategy Improvement: Understanding the above information, think about what strategies I need to adopt to win {recipient_name} for the whole game step by step. (Note that you cannot observe the cards of the opponent during the game, but you can observe his actions). Output as a tree-structure:"
)
reflection_chain = LLMChain(llm=self.llm, prompt=prompt, verbose=self.verbose)
long_memory = self.memory[-last_k:]
long_memory_str = "\n\n".join([o for o in long_memory])
kwargs = dict(
long_memory=long_memory_str,
game_pattern=game_pattern,
agent_name=self.name,
recipient_name=recipient_name,
game_name=self.game_name,
rule=self.rule
)
# print(kwargs)
self.long_belief = reflection_chain.run(**kwargs)
self.long_belief = self.long_belief.strip()
return self.long_belief.strip()
def get_summarization(self, recipient_name: str,game_memory: str, opponent_name:str,no_highsight_obs:bool) -> str:
"""Get a long memory summarization to save costs."""
if no_highsight_obs:
prompt = PromptTemplate.from_template(
"You are the player behind a NPC character called {agent_name}, and you are playing the board game {game_name} with {recipient_name}. \n"
+ " The game rule is: {rule} \n"
+ " The observation conversion rules are: {observation_rule}\n"
+ " One game memory including observations, actions and conversations with {recipient_name} is: {long_memory}\n"
+ " Understanding the game rule, observation conversion rules and game history and your knowledge about the {game_name}, can you do following things:"
+ " History summarization: summary the game history with action, observation, and results information? using the templete, and respond shortly: In the first round of first game, name holds card1 does action .... continue ..."
+ "{opponent_name}'s card reasoning: If the card of {opponent_name} is not available, because {agent_name}'s card is xx and public card (if release) is xxx, and {opponent_name} behaviours are xx, the current game result is xx, please infer {opponent_name}'s card with probability (100% in total) with your understanding about the above all information confidently step by step. \n"
)
else:
prompt = PromptTemplate.from_template(
"You are the player behind a NPC character called {agent_name}, and you are playing the board game {game_name} with {recipient_name}. \n"
+ " The game rule is: {rule} \n"
+ " The observation conversion rules are: {observation_rule}\n"
+ " One game memory including observations, actions and conversations with {recipient_name} is: {long_memory}\n"
+ " Understanding the game rule, observation conversion rules and game history and your knowledge about the {game_name}, can you do following things:"
+ " History summarization: summary the game history with action, observation, and results information? using the templete, and respond shortly: In the first round of first game, name holds card1 does action .... continue ..."
)
reflection_chain = LLMChain(llm=self.llm, prompt=prompt, verbose=self.verbose)
kwargs = dict(
observation_rule=self.observation_rule,
long_memory=game_memory,
agent_name=self.name,
recipient_name=recipient_name,
opponent_name=opponent_name,
# observation=observation,
game_name=self.game_name,
rule=self.rule
)
# print(kwargs)
self.long_belief = reflection_chain.run(**kwargs)
self.long_belief = self.long_belief.strip()
return self.long_belief.strip()
def get_short_memory_summary(self, observation: str, recipient_name: str,short_memory_summary:str) -> str:
"""React to get a belief."""
prompt = PromptTemplate.from_template(
"You are the player behind a NPC character called {agent_name}, and you are playing the board game {game_name} with {recipient_name}. \n"
+ " The game rule is: {rule} \n"
+ " Your current observation is: {observation}\n"
+ " The current game history including previous action, observations and conversation is: {agent_summary_description}\n"
+ " Based on the game rule, your observation and your knowledge about the {game_name}, please summarize the current history. Output as a tree-structure, and respond shortly: "
+ " In the first round, name does action, and say xxx .... continue ..."
)
agent_summary_description = short_memory_summary
kwargs = dict(
agent_summary_description=agent_summary_description,
recent_observations=agent_summary_description,
agent_name=self.name,
recipient_name=recipient_name,
observation=observation,
game_name=self.game_name,
rule=self.rule
)
belief_prediction_chain = LLMChain(llm=self.llm, prompt=prompt)
self.belief = belief_prediction_chain.run(**kwargs)
self.belief = self.belief.strip()
return self.belief.strip()
def convert_obs(self, observation: str, recipient_name: str, user_index: str, valid_action_list:str) -> str:
"""React to get a belief."""
prompt = PromptTemplate.from_template(
"You are the player behind a NPC character called {agent_name} with player index {user_index}, and you are playing the board game {game_name} with {recipient_name}. \n"
+ " The game rule is: {rule} \n"
+ " Your observation now is: {observation}\n"
+ " You will receive a valid action list you can perform in this turn \n"
+ " Your valid action list is: {valid_action_list}\n"
+ " The observation conversion rules are: {observation_rule}\n"
+ " Please convert {observation} and {valid_action_list} to the readable text based on the observation conversion rules and your knowledge about the {game_name} (respond shortly).\n\n"
)
kwargs = dict(
user_index=user_index,
agent_name=self.name,
rule=self.rule,
recipient_name=recipient_name,
observation=observation,
valid_action_list=valid_action_list,
game_name=self.game_name,
observation_rule=self.observation_rule
)
obs_prediction_chain = LLMChain(llm=self.llm, prompt=prompt)
self.read_observation = obs_prediction_chain.run(**kwargs)
self.read_observation = self.read_observation.strip()
return self.read_observation
def action_decision(self, observation: str, valid_action_list: List[str], promp_head: str, act: str = None,short_memory_summary:str="") -> Tuple[str,str]:
"""React to a given observation."""
"""React to a given observation."""
prompt = PromptTemplate.from_template(
promp_head
+ "\nYour plan is: {plan}"
+ "\n Based on the plan, please select the next action from the available action list: {valid_action_list} (Just one word) and say something to the opponent player to bluff or confuse him or keep silent to finally win the whole game and reduce the risk of your action (respond sentence only). Please respond them and split them by |"
+ "\n\n"
)
agent_summary_description = short_memory_summary
kwargs = dict(
agent_summary_description= agent_summary_description,
# current_time=current_time_str,
# relevant_memories=relevant_memories_str,
agent_name= self.name,
game_name=self.game_name,
observation= observation,
agent_status= self.status,
valid_action_list = valid_action_list,
plan = self.plan,
belief = self.belief,
act = act
)
action_prediction_chain = LLMChain(llm=self.llm, prompt=prompt)
result = action_prediction_chain.run(**kwargs)
if "|" in result:
result,result_comm = result.split("|",1)
else:
result_comm = ""
return result.strip(),result_comm.strip()
def make_act(self, observation: str,opponent_name: str, player_index:int,valid_action_list: List, verbose_print:bool,game_idx:int,round:int,bot_short_memory:List, bot_long_memory:List, console,log_file_name='', mode='second_tom',no_highsight_obs=False) -> Tuple[bool, str]:
readable_text_amy_obs = self.convert_obs(observation, opponent_name, player_index, valid_action_list)
if verbose_print:
console.print('readable_text_obs: ', style="red")
print(readable_text_amy_obs)
time.sleep(0)
if len(bot_short_memory[player_index]) == 1:
short_memory_summary = f'{game_idx+1}th Game Start \n'+readable_text_amy_obs
else:
short_memory_summary = self.get_short_memory_summary(observation=readable_text_amy_obs, recipient_name=opponent_name,short_memory_summary='\n'.join(bot_short_memory[player_index]))
if verbose_print:
console.print('short_memory_summary: ', style="yellow")
print(short_memory_summary)
time.sleep(0)
if round <= 1:
self.pattern = self.get_pattern(opponent_name,'',short_summarization=short_memory_summary,mode=mode)
console.print('pattern: ', style="blue")
print(self.pattern)
time.sleep(0)
print(opponent_name)
if mode == 'second_tom' or mode == 'first_tom':
belief = self.get_belief(readable_text_amy_obs,opponent_name,short_memory_summary=short_memory_summary,pattern=self.pattern,mode=mode)
if verbose_print:
console.print(self.name + " belief: " , style="deep_pink3")
print(self.name + " belief: " + str(belief))
else:
belief = ''
time.sleep(0)
plan = self.planning_module(readable_text_amy_obs,opponent_name, belief=belief,valid_action_list=valid_action_list,short_memory_summary=short_memory_summary,pattern=self.pattern,last_plan='', mode=mode)
if verbose_print:
console.print(self.name + " plan: " , style="orchid")
print(self.name + " plan: " + str(plan))
time.sleep(0)
promp_head = ''
act, comm = self.action_decision(readable_text_amy_obs, valid_action_list, promp_head,short_memory_summary=short_memory_summary)
if log_file_name is not None:
util.get_logging(logger_name=log_file_name + '_obs',
content={str(game_idx + 1) + "_" + str(round): {"raw_obs": observation,
"readable_text_obs": readable_text_amy_obs}})
util.get_logging(logger_name=log_file_name + '_short_memory',
content={str(game_idx + 1) + "_" + str(round): {
"raw_short_memory": '\n'.join(bot_short_memory[player_index]),
"short_memory_summary": short_memory_summary}})
util.get_logging(logger_name=log_file_name + '_pattern_model',
content={str(game_idx + 1) + "_" + str(round): self.pattern})
util.get_logging(logger_name=log_file_name + '_belief',
content={str(game_idx + 1) + "_" + str(round): {
"belief": str(belief)}})
util.get_logging(logger_name=log_file_name + '_plan',
content={str(game_idx + 1) + "_" + str(round): {
"plan": str(plan)}})
util.get_logging(logger_name= log_file_name + '_act',
content={str(game_idx + 1) + "_" + str(round): {
"act": str(act), "talk_sentence": str(comm)}})
while act not in valid_action_list:
print('Action + ', str(act), ' is not a valid action in valid_action_list, please try again.\n')
promp_head += 'Action {act} is not a valid action in {valid_action_list}, please try again.\n'
act, comm = self.action_decision( readable_text_amy_obs, valid_action_list, promp_head,act)
print(self.name + " act: " + str(act))
print(comm)
bot_short_memory[player_index].append(f"{self.name} have the observation {readable_text_amy_obs}, try to take action: {act} and say {comm} to {opponent_name}")
bot_short_memory[((player_index + 1)%2)].append(f"{self.name} try to take action: {act} and say {comm} to {opponent_name}")
bot_long_memory[player_index].append(
f"{self.name} have the observation {observation}, try to take action: {act} and say {comm} to {opponent_name}")
return act,comm,bot_short_memory,bot_long_memory