forked from ysymyth/awesome-language-agents
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCoALA.bib
2528 lines (2130 loc) · 93.9 KB
/
CoALA.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
@inproceedings{shi2017world,
title={{World of Bits: An Open-Domain platform for web-based agents}},
author={Shi, Tianlin and Karpathy, Andrej and Fan, Linxi and Hernandez, Jonathan and Liang, Percy},
booktitle={International Conference on Machine Learning},
pages={3135--3144},
year={2017},
}
@article{markov1954theory,
title={The theory of algorithms},
author={Markov, Andrei Andreevich},
journal={Trudy Matematicheskogo Instituta Imeni VA Steklova},
volume={42},
pages={3--375},
year={1954},
publisher={Russian Academy of Sciences, Steklov Mathematical Institute}
}
@article{post1943formal,
title={Formal reductions of the general combinatorial decision problem},
author={Post, Emil L},
journal={American Journal of Mathematics},
volume={65},
number={2},
pages={197-215},
year={1943},
}
@techreport{newell1967studies,
title={Studies in problem solving: {S}ubject 3 on the crypt-arithmetic task {DONALD}+ {GERALD}= {ROBERT}},
author={Newell, Allen},
year={1967},
institution={Carnegie Mellon University}
}
@inproceedings{mohan2014learning,
title={Learning goal-oriented hierarchical tasks from situated interactive instruction},
author={Mohan, Shiwali and Laird, John},
booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
volume={28},
year={2014}
}
@book{sutton2018reinforcement,
title={Reinforcement learning: An introduction},
author={Sutton, Richard S and Barto, Andrew G},
year={2018},
publisher={MIT press}
}
@book{laird2019soar,
title={The {S}oar cognitive architecture},
author={Laird, John E},
year={2019},
publisher={MIT press}
}
@article{nason2005soar,
title={Soar-{RL}: Integrating reinforcement learning with {S}oar},
author={Nason, Shelley and Laird, John E},
journal={Cognitive Systems Research},
volume={6},
number={1},
pages={51--59},
year={2005},
publisher={Elsevier}
}
@article{laird2022,
title={Introduction to {S}oar},
author={Laird, John E},
journal={arXiv preprint arXiv:2205.03854},
year={2022}
}
@inproceedings{lindes2016toward,
title={Toward integrating cognitive linguistics and cognitive language processing},
author={Lindes, Peter and Laird, John E},
booktitle={Proceedings of the 14th International Conference on Cognitive Modeling (ICCM)},
year={2016}
}
@article{newell1989symbolic,
title={Symbolic architectures for cognition},
author={Newell, Allen and Rosenbloom, Paul S and Laird, John E},
journal={Foundations of cognitive science},
pages={93--131},
year={1989}
}
@inproceedings{nuxoll2007extending,
title={Extending cognitive architecture with episodic memory},
author={Nuxoll, Andrew M and Laird, John E},
booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
pages={1560--1564},
year={2007}
}
@article{sumers2022talk,
title={{How to talk so AI will learn: Instructions, descriptions, and autonomy}},
author={Sumers, Theodore and Hawkins, Robert and Ho, Mark K and Griffiths, Tom and Hadfield-Menell, Dylan},
journal={Advances in Neural Information Processing Systems},
volume={35},
pages={34762--34775},
year={2022}
}
@article{zhang2019dialogpt,
title={{DialoGPT: Large-scale generative pre-training for conversational response generation}},
author={Zhang, Yizhe and Sun, Siqi and Galley, Michel and Chen, Yen-Chun and Brockett, Chris and Gao, Xiang and Gao, Jianfeng and Liu, Jingjing and Dolan, Bill},
journal={arXiv preprint arXiv:1911.00536},
year={2019}
}
@inproceedings{nguyen2022framework,
title={A framework for learning to request rich and contextually useful information from humans},
author={Nguyen, Khanh X and Bisk, Yonatan and Iii, Hal Daum{\'e}},
booktitle={International Conference on Machine Learning},
pages={16553--16568},
year={2022},
}
@inproceedings{nguyen2021interactive,
title={Interactive learning from activity description},
author={Nguyen, Khanh X and Misra, Dipendra and Schapire, Robert and Dud{\'\i}k, Miroslav and Shafto, Patrick},
booktitle={International Conference on Machine Learning},
pages={8096--8108},
year={2021},
}
@inproceedings{
palo2023towards,
title={Towards A Unified Agent with Foundation Models},
author={Norman Di Palo and Arunkumar Byravan and Leonard Hasenclever and Markus Wulfmeier and Nicolas Heess and Martin Riedmiller},
booktitle={Workshop on Reincarnating Reinforcement Learning at ICLR 2023},
year={2023},
}
@inproceedings{sumers2021learning,
title={Learning rewards from linguistic feedback},
author={Sumers, Theodore R and Ho, Mark K and Hawkins, Robert D and Narasimhan, Karthik and Griffiths, Thomas L},
booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
volume={35},
pages={6002--6010},
year={2021}
}
@incollection{atkinson1968human,
title={Human memory: A proposed system and its control processes},
author={Atkinson, Richard C and Shiffrin, Richard M},
booktitle={Psychology of Learning and Motivation},
volume={2},
pages={89--195},
year={1968},
publisher={Elsevier}
}
@incollection{baddeley1974working,
title={Working memory},
author={Baddeley, Alan D and Hitch, Graham},
booktitle={Psychology of Learning and Motivation},
volume={8},
pages={47--89},
year={1974},
publisher={Elsevier}
}
@inproceedings{derbinsky2012multi,
title={A multi-domain evaluation of scaling in a general episodic memory},
author={Derbinsky, Nate and Li, Justin and Laird, John},
booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
volume={26},
pages={193--199},
year={2012}
}
@article{mohan2012acquiring,
title={Acquiring grounded representations of words with situated interactive instruction},
author={Mohan, Shiwali and Mininger, Aaron H and Kirk, James R and Laird, John E},
journal={Advances in Cognitive Systems},
volume={2},
pages={113--130},
year={2012},
}
@article{tambe1995intelligent,
title={Intelligent agents for interactive simulation environments},
author={Tambe, Milind and Johnson, W Lewis and Jones, Randolph M and Koss, Frank and Laird, John E and Rosenbloom, Paul S and Schwamb, Karl},
journal={AI magazine},
volume={16},
number={1},
pages={15--15},
year={1995}
}
@inproceedings{laird2012cognitive,
title={Cognitive robotics using the {S}oar cognitive architecture.},
author={Laird, John Edwin and Kinkade, Keegan R and Mohan, Shiwali and Xu, Joseph Z},
booktitle={CogRob @ AAAI},
year={2012},
}
@article{kirk2014rosie,
title={Interactive task learning for simple games},
author={Kirk, James R and Laird, John E},
journal={Advances in Cognitive Systems},
volume={3},
number={13-30},
pages={5},
year={2014}
}
@article{kotseruba202040,
title={40 years of cognitive architectures: core cognitive abilities and practical applications},
author={Kotseruba, Iuliia and Tsotsos, John K},
journal={Artificial Intelligence Review},
volume={53},
number={1},
pages={17--94},
year={2020},
publisher={Springer}
}
@article{liu2018reinforcement,
title={{Reinforcement Learning on Web Interfaces using Workflow-Guided Exploration}},
author={Liu, Evan Zheran and Guu, Kelvin and Pasupat, Panupong and Shi, Tianlin and Liang, Percy},
journal={arXiv preprint arXiv:1802.08802},
year={2018}
}
@book{kahneman2011thinking,
title={Thinking, fast and slow},
author={Kahneman, Daniel},
year={2011},
publisher={Macmillan}
}
@book{newell1972human,
title={Human problem solving},
author={Newell, Allen and Simon, Herbert Alexander},
year={1972},
publisher={Prentice-Hall}
}
@article{sloman1996empirical,
title={The empirical case for two systems of reasoning.},
author={Sloman, Steven A},
journal={Psychological bulletin},
volume={119},
number={1},
pages={3},
year={1996},
publisher={American Psychological Association}
}
@article{daw2005uncertainty,
title={Uncertainty-based competition between prefrontal and dorsolateral striatal systems for behavioral control},
author={Daw, Nathaniel D and Niv, Yael and Dayan, Peter},
journal={Nature Neuroscience},
volume={8},
number={12},
pages={1704--1711},
year={2005},
}
@book{stanovich1999rational,
title={Who is rational? Studies of individual differences in reasoning},
author={Stanovich, Keith E},
year={1999},
publisher={Psychology Press}
}
@article{kahneman2002representativeness,
title={Representativeness revisited: Attribute substitution in intuitive judgment},
author={Kahneman, Daniel and Frederick, Shane and others},
journal={Heuristics and biases: The psychology of intuitive judgment},
volume={49},
number={49-81},
pages={74},
year={2002}
}
@article{gur2018learning,
title={{Learning to Navigate the Web}},
author={Gur, Izzeddin and Rueckert, Ulrich and Faust, Aleksandra and Hakkani-Tur, Dilek},
journal={arXiv preprint arXiv:1812.09195},
year={2018}
}
@article{jia2019dom,
title={{Dom-q-net: Grounded RL on Structured Language}},
author={Jia, Sheng and Kiros, Jamie and Ba, Jimmy},
journal={arXiv preprint arXiv:1902.07257},
year={2019}
}
@inproceedings{merkle2019cooperative,
title={{Cooperative Web Agents by Combining Semantic Technologies with Reinforcement Learning}},
author={Merkle, Nicole and Philipp, Patrick},
booktitle={Proceedings of the 10th International Conference on Knowledge Capture},
pages={205--212},
year={2019}
}
@inproceedings{bender2020climbing,
title={{Climbing towards NLU: On Meaning, Form, and Understanding in the Age of Data}},
author={Bender, Emily M and Koller, Alexander},
booktitle={Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics},
pages={5185--5198},
year={2020}
}
@article{nakano2021webgpt,
title={{WebGPT: Browser-Assisted Question-Answering with Human Feedback}},
author={Nakano, Reiichiro and Hilton, Jacob and Balaji, Suchir and Wu, Jeff and Ouyang, Long and Kim, Christina and Hesse, Christopher and Jain, Shantanu and Kosaraju, Vineet and Saunders, William and others},
journal={arXiv preprint arXiv:2112.09332},
year={2021}
}
@article{gur2021adversarial,
title={{Adversarial Environment Generation for Learning to Navigate the Web}},
author={Gur, Izzeddin and Jaques, Natasha and Malta, Kevin and Tiwari, Manoj and Lee, Honglak and Faust, Aleksandra},
journal={arXiv preprint arXiv:2103.01991},
year={2021}
}
@article{hotti2021klarna,
title={{The Klarna Product Page Dataset: A RealisticBenchmark for Web Representation Learning}},
author={Hotti, Alexandra and Risuleo, Riccardo Sven and Magureanu, Stefan and Moradi, Aref and Lagergren, Jens},
journal={arXiv preprint arXiv:2111.02168},
year={2021}
}
@article{mazumder2020flin,
title={{FLIN: A Flexible Natural Language Interface for Web Navigation}},
author={Mazumder, Sahisnu and Riva, Oriana},
journal={arXiv preprint arXiv:2010.12844},
year={2020}
}
@article{humphreys2022data,
title={{A data-driven approach for learning to control computers}},
author={Humphreys, Peter C and Raposo, David and Pohlen, Toby and Thornton, Gregory and Chhaparia, Rachita and Muldal, Alistair and Abramson, Josh and Georgiev, Petko and Goldin, Alex and Santoro, Adam and others},
journal={arXiv preprint arXiv:2202.08137},
year={2022}
}
@article{toyama2021androidenv,
title={{AndroidEnv: A Reinforcement Learning Platform for Android}},
author={Toyama, Daniel and Hamel, Philippe and Gergely, Anita and Comanici, Gheorghe and Glaese, Amelia and Ahmed, Zafarali and Jackson, Tyler and Mourad, Shibl and Precup, Doina},
journal={arXiv preprint arXiv:2105.13231},
year={2021}
}
@article{burns2022interactive,
title={{Interactive Mobile App Navigation with Uncertain or Under-specified Natural Language Commands}},
author={Burns, Andrea and Arsan, Deniz and Agrawal, Sanjna and Kumar, Ranjitha and Saenko, Kate and Plummer, Bryan A},
journal={arXiv preprint arXiv:2202.02312},
year={2022}
}
@article{nogueira2016end,
title={{End-to-End Goal-Driven Web Navigation}},
author={Nogueira, Rodrigo and Cho, Kyunghyun},
journal={{Advances in Neural Information Processing Systems}},
volume={29},
year={2016}
}
@inproceedings{pasupat2018mapping,
title={Mapping Natural Language Commands to Web Elements},
author={Pasupat, Panupong and Jiang, Tian-Shun and Liu, Evan Zheran and Guu, Kelvin and Liang, Percy},
booktitle={EMNLP},
year={2018}
}
@inproceedings{su2017building,
title={{Building Natural Language Interfaces to Web APIs}},
author={Su, Yu and Awadallah, Ahmed Hassan and Khabsa, Madian and Pantel, Patrick and Gamon, Michael and Encarnacion, Mark},
booktitle={Proceedings of the 2017 ACM on Conference on Information and Knowledge Management},
pages={177--186},
year={2017}
}
@inproceedings{su2018natural,
title={{Natural Language Interfaces with Fine-Grained User Interaction: A Case Study on Web APIs}},
author={Su, Yu and Hassan Awadallah, Ahmed and Wang, Miaosen and White, Ryen W},
booktitle={The 41st International ACM SIGIR Conference on Research \& Development in Information Retrieval},
pages={855--864},
year={2018}
}
@inproceedings{williams2019automatic,
title={{Automatic Task Completion Flows from Web APIs}},
author={Williams, Kyle and Hashemi, Seyyed Hadi and Zitouni, Imed},
booktitle={Proceedings of the 42nd International ACM SIGIR Conference on Research and Development in Information Retrieval},
pages={1009--1012},
year={2019}
}
@inproceedings{allen2007plow,
title={{PLOW: A Collaborative Task Learning Agent}},
author={Allen, James and Chambers, Nathanael and Ferguson, George and Galescu, Lucian and Jung, Hyuckchul and Swift, Mary and Taysom, William},
booktitle={AAAI},
volume={7},
pages={1514--1519},
year={2007}
}
@inproceedings{narasimhan2016improving,
title={{Improving Information Extraction by Acquiring External Evidence with Reinforcement Learning}},
author={Narasimhan, Karthik and Yala, Adam and Barzilay, Regina},
booktitle={Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing},
pages={2355--2365},
year={2016}
}
@inproceedings{nogueira2017task,
title={{Task-Oriented Query Reformulation with Reinforcement Learning}},
author={Nogueira, Rodrigo and Cho, Kyunghyun},
booktitle={Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing},
pages={574--583},
year={2017}
}
@article{guu2020realm,
title={{REALM: Retrieval-Augmented Language Model Pre-Training}},
author={Guu, Kelvin and Lee, Kenton and Tung, Zora and Pasupat, Panupong and Chang, Ming-Wei},
journal={arXiv preprint arXiv:2002.08909},
year={2020}
}
@article{adolphs2021boosting,
title={{Boosting Search Engines with Interactive Agents}},
author={Adolphs, Leonard and Boerschinger, Benjamin and Buck, Christian and Huebscher, Michelle Chen and Ciaramita, Massimiliano and Espeholt, Lasse and Hofmann, Thomas and Kilcher, Yannic},
journal={arXiv preprint arXiv:2109.00527},
year={2021}
}
@article{lewis2019bart,
title={{BART: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation, Translation, and Comprehension}},
author={Lewis, Mike and Liu, Yinhan and Goyal, Naman and Ghazvininejad, Marjan and Mohamed, Abdelrahman and Levy, Omer and Stoyanov, Ves and Zettlemoyer, Luke},
journal={arXiv preprint arXiv:1910.13461},
year={2019}
}
@article{Devlin2019BERTPO,
title={{B}{E}{R}{T}: Pre-training of Deep Bidirectional Transformers for Language Understanding},
author={Jacob Devlin and Ming-Wei Chang and Kenton Lee and Kristina Toutanova},
journal={ArXiv},
year={2019},
volume={abs/1810.04805}
}
@article{Russakovsky2015ImageNetLS,
title={{ImageNet Large Scale Visual Recognition Challenge}},
author={Olga Russakovsky and Jia Deng and Hao Su and Jonathan Krause and Sanjeev Satheesh and Sean Ma and Zhiheng Huang and Andrej Karpathy and Aditya Khosla and Michael S. Bernstein and Alexander C. Berg and Li Fei-Fei},
journal={International Journal of Computer Vision},
year={2015},
volume={115},
pages={211-252}
}
@article{He2016DeepRL,
title={{Deep Residual Learning for Image Recognition}},
author={Kaiming He and X. Zhang and Shaoqing Ren and Jian Sun},
journal={2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
year={2016},
pages={770-778}
}
@article{izacard2022atlas,
title={Atlas: Few-shot learning with retrieval augmented language models},
author={Izacard, Gautier and Lewis, Patrick and Lomeli, Maria and Hosseini, Lucas and Petroni, Fabio and Schick, Timo and Dwivedi-Yu, Jane and Joulin, Armand and Riedel, Sebastian and Grave, Edouard},
journal={arXiv preprint arXiv},
volume={2208},
year={2022}
}
@article{Browne2012ASO,
title={A Survey of Monte Carlo Tree Search Methods},
author={Cameron Browne and Edward Jack Powley and Daniel Whitehouse and Simon M. M. Lucas and Peter I. Cowling and Philipp Rohlfshagen and Stephen Tavener and Diego Perez Liebana and Spyridon Samothrakis and Simon Colton},
journal={IEEE Transactions on Computational Intelligence and AI in Games},
year={2012},
volume={4},
pages={1-43}
}
@article{hart1968formal,
title={A formal basis for the heuristic determination of minimum cost paths},
author={Hart, Peter E and Nilsson, Nils J and Raphael, Bertram},
journal={IEEE transactions on Systems Science and Cybernetics},
volume={4},
number={2},
pages={100--107},
year={1968},
publisher={IEEE}
}
@article{Mirjalili2016TheWO,
title={The Whale Optimization Algorithm},
author={Seyed Mohammad Mirjalili and Andrew Lewis},
journal={Adv. Eng. Softw.},
year={2016},
volume={95},
pages={51-67}
}
@inproceedings{Lazaridou2020MultiagentCM,
title={{Multi-agent Communication meets Natural Language: Synergies between Functional and Structural Language Learning}},
author={Angeliki Lazaridou and Anna Potapenko and Olivier Tieleman},
booktitle={ACL},
year={2020}
}
@misc{flask,
title={{Flask API}},
url={https://flask.palletsprojects.com/en/2.1.x/},
author={Ronacher, Armin},
year={2010}
}
@misc{scraperapi,
title={{ScraperAPI}},
url={https://www.scraperapi.com/},
author={Ni, Daniel},
year={2015}
}
@article{brockman2016openai,
title={{OpenAI Gym}},
author={Brockman, Greg and Cheung, Vicki and Pettersson, Ludwig and Schneider, Jonas and Schulman, John and Tang, Jie and Zaremba, Wojciech},
journal={arXiv preprint arXiv:1606.01540},
year={2016}
}
@misc{hughes_2019,
title={{Study shows we're spending an insane amount of time online}}, url={https://thenextweb.com/news/study-shows-were-spending-an-insane-amount-of-time-online},
journal={TNW | Tech},
author={Hughes, Matthew},
year={2019},
month={Jan}
}
@article{lin2021pyserini,
title={{Pyserini: An Easy-to-Use Python Toolkit to Support Replicable IR Research with Sparse and Dense Representationss}},
author={Lin, Jimmy and Ma, Xueguang and Lin, Sheng-Chieh and Yang, Jheng-Hong and Pradeep, Ronak and Nogueira, Rodrigo},
journal={arXiv preprint arXiv:2102.10073},
year={2021}
}
@inproceedings{thomason2019improving,
title={Improving grounded natural language understanding through human-robot dialog},
author={Thomason, Jesse and Padmakumar, Aishwarya and Sinapov, Jivko and Walker, Nick and Jiang, Yuqian and Yedidsion, Harel and Hart, Justin and Stone, Peter and Mooney, Raymond J},
booktitle={2019 International Conference on Robotics and Automation (ICRA)},
pages={6934--6941},
year={2019},
organization={IEEE}
}
@inproceedings{luketina2019survey,
title={A Survey of Reinforcement Learning Informed by Natural Language},
author={Luketina, Jelena and Nardelli, Nantas and Farquhar, Gregory and Foerster, Jakob N and Andreas, Jacob and Grefenstette, Edward and Whiteson, Shimon and Rockt{\"a}schel, Tim},
booktitle={IJCAI},
year={2019}
}
@article{uc2021survey,
title={Survey on reinforcement learning for language processing},
author={Uc-Cetina, Victor and Navarro-Guerrero, Nicolas and Martin-Gonzalez, Anabel and Weber, Cornelius and Wermter, Stefan},
journal={arXiv preprint arXiv:2104.05565},
year={2021}
}
@article{zhong2021silg,
title={{SILG: The Multi-domain Symbolic Interactive Language Grounding Benchmark}},
author={Zhong, Victor and Hanjie, Austin W and Wang, Sida and Narasimhan, Karthik and Zettlemoyer, Luke},
journal={Advances in Neural Information Processing Systems},
volume={34},
pages={21505--21519},
year={2021}
}
@article{brown2020language,
title={Language models are few-shot learners},
author={Brown, Tom and Mann, Benjamin and Ryder, Nick and Subbiah, Melanie and Kaplan, Jared D and Dhariwal, Prafulla and Neelakantan, Arvind and Shyam, Pranav and Sastry, Girish and Askell, Amanda and others},
journal={Advances in Neural Information Processing Systems},
volume={33},
pages={1877--1901},
year={2020}
}
@inproceedings{devlin2019bert,
title={{BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding}},
author={Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina},
booktitle={NAACL-HLT (1)},
year={2019}
}
@article{raffel2020exploring,
title={Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer},
author={Raffel, Colin and Shazeer, Noam and Roberts, Adam and Lee, Katherine and Narang, Sharan and Matena, Michael and Zhou, Yanqi and Li, Wei and Liu, Peter J},
journal={Journal of Machine Learning Research},
volume={21},
pages={1--67},
year={2020}
}
@article{Aghajanyan2021HTLMHP,
title={HTLM: Hyper-Text Pre-Training and Prompting of Language Models},
author={Armen Aghajanyan and Dmytro Okhonko and Mike Lewis and Mandar Joshi and Hu Xu and Gargi Ghosh and Luke Zettlemoyer},
journal={ArXiv},
year={2021},
volume={abs/2107.06955}
}
@inproceedings{Yuan2020InteractiveMC,
title={Interactive Machine Comprehension with Information Seeking Agents},
author={Xingdi Yuan and Jie Fu and Marc-Alexandre C{\^o}t{\'e} and Yi Tay and Christopher Joseph Pal and Adam Trischler},
booktitle={ACL},
year={2020}
}
@article{Lazaridou2022InternetaugmentedLM,
title={Internet-augmented language models through few-shot prompting for open-domain question answering},
author={Angeliki Lazaridou and Elena Gribovskaya and Wojciech Stokowiec and Nikolai Grigorev},
journal={ArXiv},
year={2022},
volume={abs/2203.05115}
}
@article{Shuster2022LanguageMT,
title={Language Models that Seek for Knowledge: Modular Search \& Generation for Dialogue and Prompt Completion},
author={Kurt Shuster and Mojtaba Komeili and Leonard Adolphs and Stephen Roller and Arthur D. Szlam and Jason Weston},
journal={ArXiv},
year={2022},
volume={abs/2203.13224}
}
@inproceedings{mnih2016asynchronous,
title={Asynchronous methods for deep reinforcement learning},
author={Mnih, Volodymyr and Badia, Adria Puigdomenech and Mirza, Mehdi and Graves, Alex and Lillicrap, Timothy and Harley, Tim and Silver, David and Kavukcuoglu, Koray},
booktitle={International conference on machine learning},
pages={1928--1937},
year={2016},
}
@article{seo2016bidirectional,
title={Bidirectional attention flow for machine comprehension},
author={Seo, Minjoon and Kembhavi, Aniruddha and Farhadi, Ali and Hajishirzi, Hannaneh},
journal={arXiv preprint arXiv:1611.01603},
year={2016}
}
@article{guo2020interactive,
title={Interactive fiction game playing as multi-paragraph reading comprehension with reinforcement learning},
author={Guo, Xiaoxiao and Yu, Mo and Gao, Yupeng and Gan, Chuang and Campbell, Murray and Chang, Shiyu},
journal={arXiv preprint arXiv:2010.02386},
year={2020}
}
@article{Chung2014EmpiricalEO,
title={Empirical Evaluation of Gated Recurrent Neural Networks on Sequence Modeling},
author={Junyoung Chung and Çaglar G{\"u}lçehre and Kyunghyun Cho and Yoshua Bengio},
journal={ArXiv},
year={2014},
volume={abs/1412.3555}
}
@inproceedings{huang2009analyzing,
title={Analyzing and evaluating query reformulation strategies in web search logs},
author={Huang, Jeff and Efthimiadis, Efthimis N},
booktitle={Proceedings of the 18th ACM conference on Information and knowledge management},
pages={77--86},
year={2009}
}
@article{rieh2006analysis,
title={Analysis of multiple query reformulations on the web: The interactive information retrieval context},
author={Rieh, Soo Young and others},
journal={Information Processing \& Management},
volume={42},
number={3},
pages={751--768},
year={2006},
publisher={Elsevier}
}
@article{wang2020deep,
title={Deep reinforced query reformulation for information retrieval},
author={Wang, Xiao and Macdonald, Craig and Ounis, Iadh},
journal={arXiv preprint arXiv:2007.07987},
year={2020}
}
@article{zhuang2022bridging,
title={Bridging the Gap Between Indexing and Retrieval for Differentiable Search Index with Query Generation},
author={Zhuang, Shengyao and Ren, Houxing and Shou, Linjun and Pei, Jian and Gong, Ming and Zuccon, Guido and Jiang, Daxin},
journal={arXiv preprint arXiv:2206.10128},
year={2022}
}
@article{komeili2021internet,
title={Internet-augmented dialogue generation},
author={Komeili, Mojtaba and Shuster, Kurt and Weston, Jason},
journal={arXiv preprint arXiv:2107.07566},
year={2021}
}
@article{lampinen2021towards,
title={Towards mental time travel: a hierarchical memory for reinforcement learning agents},
author={Lampinen, Andrew and Chan, Stephanie and Banino, Andrea and Hill, Felix},
journal={Advances in Neural Information Processing Systems},
volume={34},
pages={28182--28195},
year={2021}
}
@article{fortunato2019generalization,
title={Generalization of reinforcement learners with working and episodic memory},
author={Fortunato, Meire and Tan, Melissa and Faulkner, Ryan and Hansen, Steven and Puigdom{\`e}nech Badia, Adri{\`a} and Buttimore, Gavin and Deck, Charles and Leibo, Joel Z and Blundell, Charles},
journal={Advances in neural information processing systems},
volume={32},
year={2019}
}
@article{wayne2018unsupervised,
title={Unsupervised predictive memory in a goal-directed agent},
author={Wayne, Greg and Hung, Chia-Chun and Amos, David and Mirza, Mehdi and Ahuja, Arun and Grabska-Barwinska, Agnieszka and Rae, Jack and Mirowski, Piotr and Leibo, Joel Z and Santoro, Adam and others},
journal={arXiv preprint arXiv:1803.10760},
year={2018}
}
@article{yao2021reading,
title={Reading and acting while blindfolded: The need for semantics in text game agents},
author={Yao, Shunyu and Narasimhan, Karthik and Hausknecht, Matthew},
journal={arXiv preprint arXiv:2103.13552},
year={2021}
}
@inproceedings{hausknecht2020interactive,
title={Interactive fiction games: A colossal adventure},
author={Hausknecht, Matthew and Ammanabrolu, Prithviraj and C{\^o}t{\'e}, Marc-Alexandre and Yuan, Xingdi},
booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
volume={34},
pages={7903--7910},
year={2020}
}
@inproceedings{shridhar2020alfred,
title={Alfred: A benchmark for interpreting grounded instructions for everyday tasks},
author={Shridhar, Mohit and Thomason, Jesse and Gordon, Daniel and Bisk, Yonatan and Han, Winson and Mottaghi, Roozbeh and Zettlemoyer, Luke and Fox, Dieter},
booktitle={Proceedings of the IEEE/CVF conference on computer vision and pattern recognition},
pages={10740--10749},
year={2020}
}
@article{andreas2020task,
title={Task-oriented dialogue as dataflow synthesis},
author={Andreas, Jacob and Bufe, John and Burkett, David and Chen, Charles and Clausman, Josh and Crawford, Jean and Crim, Kate and DeLoach, Jordan and Dorner, Leah and Eisner, Jason and others},
journal={Transactions of the Association for Computational Linguistics},
volume={8},
pages={556--571},
year={2020},
publisher={MIT Press}
}
@article{budzianowski2018multiwoz,
title={MultiWOZ--a large-scale multi-domain wizard-of-oz dataset for task-oriented dialogue modelling},
author={Budzianowski, Pawe{\l} and Wen, Tsung-Hsien and Tseng, Bo-Hsiang and Casanueva, Inigo and Ultes, Stefan and Ramadan, Osman and Ga{\v{s}}i{\'c}, Milica},
journal={arXiv preprint arXiv:1810.00278},
year={2018}
}
@article{wang2021simvlm,
title={Simvlm: Simple visual language model pretraining with weak supervision},
author={Wang, Zirui and Yu, Jiahui and Yu, Adams Wei and Dai, Zihang and Tsvetkov, Yulia and Cao, Yuan},
journal={arXiv preprint arXiv:2108.10904},
year={2021}
}
@inproceedings{li2020oscar,
title={Oscar: Object-semantics aligned pre-training for vision-language tasks},
author={Li, Xiujun and Yin, Xi and Li, Chunyuan and Zhang, Pengchuan and Hu, Xiaowei and Zhang, Lei and Wang, Lijuan and Hu, Houdong and Dong, Li and Wei, Furu and others},
booktitle={European Conference on Computer Vision},
pages={121--137},
year={2020},
organization={Springer}
}
@inproceedings{pasupat2018elements,
author = {Panupong Pasupat and Tian-Shun Jiang and Evan Zheran Liu and Kelvin Guu and Percy Liang},
booktitle = {Empirical Methods in Natural Language Processing (EMNLP)},
title = {Mapping Natural Language Commands to Web Elements},
year = {2018},
}
@article{ecoffet2019go,
title={Go-explore: a new approach for hard-exploration problems},
author={Ecoffet, Adrien and Huizinga, Joost and Lehman, Joel and Stanley, Kenneth O and Clune, Jeff},
journal={arXiv preprint arXiv:1901.10995},
year={2019}
}
@inproceedings{pathak2017curiosity,
title={Curiosity-driven exploration by self-supervised prediction},
author={Pathak, Deepak and Agrawal, Pulkit and Efros, Alexei A and Darrell, Trevor},
booktitle={International conference on machine learning},
pages={2778--2787},
year={2017},
}
@article{tuyls2022multi,
title={Multi-Stage Episodic Control for Strategic Exploration in Text Games},
author={Tuyls, Jens and Yao, Shunyu and Kakade, Sham and Narasimhan, Karthik},
journal={arXiv preprint arXiv:2201.01251},
year={2022}
}
@inproceedings{verma2022chai,
title={CHAI: A CHatbot AI for Task-Oriented Dialogue with Offline Reinforcement Learning},
author={Verma, Siddharth and Fu, Justin and Yang, Sherry and Levine, Sergey},
booktitle={Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
pages={4471--4491},
year={2022}
}
@book{daniel2017thinking,
title={Thinking, fast and slow},
author={Daniel, Kahneman},
year={2017}
}
@inproceedings{newell1959report,
title={Report on a general problem solving program},
author={Newell, Allen and Shaw, John C and Simon, Herbert A},
booktitle={IFIP congress},
volume={256},
pages={64},
year={1959},
organization={Pittsburgh, PA}
}
@book{russel2013artificial,
title={Artificial intelligence: a modern approach},
author={Russel, Stuart and Norvig, Peter and others},
volume={256},
year={2013},
publisher={Pearson Education Limited London}
}
@article{fan2018hierarchical,
title={Hierarchical neural story generation},
author={Fan, Angela and Lewis, Mike and Dauphin, Yann},
journal={arXiv preprint arXiv:1805.04833},
year={2018}
}
@article{holtzman2019curious,
title={The curious case of neural text degeneration},
author={Holtzman, Ari and Buys, Jan and Du, Li and Forbes, Maxwell and Choi, Yejin},
journal={arXiv preprint arXiv:1904.09751},
year={2019}
}
@article{ouyang2022training,
title={Training language models to follow instructions with human feedback},
author={Ouyang, Long and Wu, Jeffrey and Jiang, Xu and Almeida, Diogo and Wainwright, Carroll and Mishkin, Pamela and Zhang, Chong and Agarwal, Sandhini and Slama, Katarina and Ray, Alex and others},
journal={Advances in Neural Information Processing Systems},
volume={35},
pages={27730--27744},
year={2022}
}
@inproceedings{biyik2019asking,
title={Asking Easy Questions: A User-Friendly Approach to Active Reward Learning},
author={Biyik, Erdem and Palan, Malayandi},
booktitle={Proceedings of the 3rd Conference on Robot Learning},
year={2019}
}
@inproceedings{zhang2020dialogpt,
title={DIALOGPT: Large-Scale Generative Pre-training for Conversational Response Generation},
author={Zhang, Yizhe and Sun, Siqi and Galley, Michel and Chen, Yen-Chun and Brockett, Chris and Gao, Xiang and Gao, Jianfeng and Liu, Jingjing and Dolan, William B},
booktitle={Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics: System Demonstrations},
pages={270--278},
year={2020}
}
@article{irving2018ai,
title={{AI safety via debate}},
author={Irving, Geoffrey and Christiano, Paul and Amodei, Dario},
journal={arXiv preprint arXiv:1805.00899},
year={2018}
}
@article{winograd1972understanding,
title={Understanding natural language},
author={Winograd, Terry},
journal={Cognitive psychology},
volume={3},
number={1},
pages={1--191},
year={1972},
publisher={Elsevier}
}
@inproceedings{tellex2011understanding,
title={Understanding natural language commands for robotic navigation and mobile manipulation},
author={Tellex, Stefanie and Kollar, Thomas and Dickerson, Steven and Walter, Matthew and Banerjee, Ashis and Teller, Seth and Roy, Nicholas},
booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
volume={25},
pages={1507--1514},
year={2011}
}
@article{pataranutaporn2021ai,
title={{AI-generated characters for supporting personalized learning and well-being}},
author={Pataranutaporn, Pat and Danry, Valdemar and Leong, Joanne and Punpongsanon, Parinya and Novy, Dan and Maes, Pattie and Sra, Misha},
journal={Nature Machine Intelligence},
volume={3},
number={12},
pages={1013--1022},
year={2021},
publisher={Nature Publishing Group UK London}
}
@inproceedings{zhou2018emotional,
title={Emotional chatting machine: Emotional conversation generation with internal and external memory},
author={Zhou, Hao and Huang, Minlie and Zhang, Tianyang and Zhu, Xiaoyan and Liu, Bing},
booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
volume={32},
year={2018}
}
@inproceedings{sadigh2017active,
author = {Dorsa Sadigh and
Anca D. Dragan and
Shankar Sastry and
Sanjit A. Seshia},
editor = {Nancy M. Amato and
Siddhartha S. Srinivasa and
Nora Ayanian and
Scott Kuindersma},
title = {Active Preference-Based Learning of Reward Functions},
booktitle = {Robotics: Science and Systems XIII},
year = {2017},
}
@misc{wang2023survey,
title={A Survey on Large Language Model based Autonomous Agents},
author={Lei Wang and Chen Ma and Xueyang Feng and Zeyu Zhang and Hao Yang and Jingsen Zhang and Zhiyuan Chen and Jiakai Tang and Xu Chen and Yankai Lin and Wayne Xin Zhao and Zhewei Wei and Ji-Rong Wen},
year={2023},
eprint={2308.11432},
archivePrefix={arXiv},
primaryClass={cs.AI}
}
@article{yang2023foundation,
title={Foundation models for decision making: Problems, methods, and opportunities},
author={Yang, Sherry and Nachum, Ofir and Du, Yilun and Wei, Jason and Abbeel, Pieter and Schuurmans, Dale},
journal={arXiv preprint arXiv:2303.04129},
year={2023}
}
@article{vaswani2017attention,
title={Attention is all you need},
author={Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, {\L}ukasz and Polosukhin, Illia},
journal={Advances in Neural Information Processing Systems},
volume={30},
year={2017}
}