CoALA.bib

@inproceedings{shi2017world,
  title={{World of Bits: An Open-Domain platform for web-based agents}},
  author={Shi, Tianlin and Karpathy, Andrej and Fan, Linxi and Hernandez, Jonathan and Liang, Percy},
  booktitle={International Conference on Machine Learning},
  pages={3135--3144},
  year={2017},
}

@article{markov1954theory,
  title={The theory of algorithms},
  author={Markov, Andrei Andreevich},
  journal={Trudy Matematicheskogo Instituta Imeni VA Steklova},
  volume={42},
  pages={3--375},
  year={1954},
  publisher={Russian Academy of Sciences, Steklov Mathematical Institute}
}

@article{post1943formal,
  title={Formal reductions of the general combinatorial decision problem},
  author={Post, Emil L},
  journal={American Journal of Mathematics},
  volume={65},
  number={2},
  pages={197-215},
  year={1943},
}

@techreport{newell1967studies,
  title={Studies in problem solving: {S}ubject 3 on the crypt-arithmetic task {DONALD}+ {GERALD}= {ROBERT}},
  author={Newell, Allen},
  year={1967},
  institution={Carnegie Mellon University}
}

@inproceedings{mohan2014learning,
  title={Learning goal-oriented hierarchical tasks from situated interactive instruction},
  author={Mohan, Shiwali and Laird, John},
  booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
  volume={28},
  year={2014}
}

@book{sutton2018reinforcement,
  title={Reinforcement learning: An introduction},
  author={Sutton, Richard S and Barto, Andrew G},
  year={2018},
  publisher={MIT press}
}

@book{laird2019soar,
  title={The {S}oar cognitive architecture},
  author={Laird, John E},
  year={2019},
  publisher={MIT press}
}

@article{nason2005soar,
  title={Soar-{RL}: Integrating reinforcement learning with {S}oar},
  author={Nason, Shelley and Laird, John E},
  journal={Cognitive Systems Research},
  volume={6},
  number={1},
  pages={51--59},
  year={2005},
  publisher={Elsevier}
}

@article{laird2022,
  title={Introduction to {S}oar},
  author={Laird, John E},
  journal={arXiv preprint arXiv:2205.03854},
  year={2022}
}

@inproceedings{lindes2016toward,
  title={Toward integrating cognitive linguistics and cognitive language processing},
  author={Lindes, Peter and Laird, John E},
  booktitle={Proceedings of the 14th International Conference on Cognitive Modeling (ICCM)},
  year={2016}
}


@article{newell1989symbolic,
  title={Symbolic architectures for cognition},
  author={Newell, Allen and Rosenbloom, Paul S and Laird, John E},
  journal={Foundations of cognitive science},
  pages={93--131},
  year={1989}
}

@inproceedings{nuxoll2007extending,
  title={Extending cognitive architecture with episodic memory},
  author={Nuxoll, Andrew M and Laird, John E},
  booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
  pages={1560--1564},
  year={2007}
}

@article{sumers2022talk,
  title={{How to talk so AI will learn: Instructions, descriptions, and autonomy}},
  author={Sumers, Theodore and Hawkins, Robert and Ho, Mark K and Griffiths, Tom and Hadfield-Menell, Dylan},
  journal={Advances in Neural Information Processing Systems},
  volume={35},
  pages={34762--34775},
  year={2022}
}

@article{zhang2019dialogpt,
  title={{DialoGPT: Large-scale generative pre-training for conversational response generation}},
  author={Zhang, Yizhe and Sun, Siqi and Galley, Michel and Chen, Yen-Chun and Brockett, Chris and Gao, Xiang and Gao, Jianfeng and Liu, Jingjing and Dolan, Bill},
  journal={arXiv preprint arXiv:1911.00536},
  year={2019}
}

@inproceedings{nguyen2022framework,
  title={A framework for learning to request rich and contextually useful information from humans},
  author={Nguyen, Khanh X and Bisk, Yonatan and Iii, Hal Daum{\'e}},
  booktitle={International Conference on Machine Learning},
  pages={16553--16568},
  year={2022},
}

@inproceedings{nguyen2021interactive,
  title={Interactive learning from activity description},
  author={Nguyen, Khanh X and Misra, Dipendra and Schapire, Robert and Dud{\'\i}k, Miroslav and Shafto, Patrick},
  booktitle={International Conference on Machine Learning},
  pages={8096--8108},
  year={2021},
}

@inproceedings{
palo2023towards,
title={Towards A Unified Agent with Foundation Models},
author={Norman Di Palo and Arunkumar Byravan and Leonard Hasenclever and Markus Wulfmeier and Nicolas Heess and Martin Riedmiller},
booktitle={Workshop on Reincarnating Reinforcement Learning at ICLR 2023},
year={2023},
}

@inproceedings{sumers2021learning,
  title={Learning rewards from linguistic feedback},
  author={Sumers, Theodore R and Ho, Mark K and Hawkins, Robert D and Narasimhan, Karthik and Griffiths, Thomas L},
  booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
  volume={35},
  pages={6002--6010},
  year={2021}
}

@incollection{atkinson1968human,
  title={Human memory: A proposed system and its control processes},
  author={Atkinson, Richard C and Shiffrin, Richard M},
  booktitle={Psychology of Learning and Motivation},
  volume={2},
  pages={89--195},
  year={1968},
  publisher={Elsevier}
}

@incollection{baddeley1974working,
  title={Working memory},
  author={Baddeley, Alan D and Hitch, Graham},
  booktitle={Psychology of Learning and Motivation},
  volume={8},
  pages={47--89},
  year={1974},
  publisher={Elsevier}
}

@inproceedings{derbinsky2012multi,
  title={A multi-domain evaluation of scaling in a general episodic memory},
  author={Derbinsky, Nate and Li, Justin and Laird, John},
  booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
  volume={26},
  pages={193--199},
  year={2012}
}

@article{mohan2012acquiring,
  title={Acquiring grounded representations of words with situated interactive instruction},
  author={Mohan, Shiwali and Mininger, Aaron H and Kirk, James R and Laird, John E},
  journal={Advances in Cognitive Systems},
  volume={2},
  pages={113--130},
  year={2012},
}

@article{tambe1995intelligent,
  title={Intelligent agents for interactive simulation environments},
  author={Tambe, Milind and Johnson, W Lewis and Jones, Randolph M and Koss, Frank and Laird, John E and Rosenbloom, Paul S and Schwamb, Karl},
  journal={AI magazine},
  volume={16},
  number={1},
  pages={15--15},
  year={1995}
}

@inproceedings{laird2012cognitive,
  title={Cognitive robotics using the {S}oar cognitive architecture.},
  author={Laird, John Edwin and Kinkade, Keegan R and Mohan, Shiwali and Xu, Joseph Z},
  booktitle={CogRob @ AAAI},
  year={2012},
}


@article{kirk2014rosie,
  title={Interactive task learning for simple games},
  author={Kirk, James R and Laird, John E},
  journal={Advances in Cognitive Systems},
  volume={3},
  number={13-30},
  pages={5},
  year={2014}
}

@article{kotseruba202040,
  title={40 years of cognitive architectures: core cognitive abilities and practical applications},
  author={Kotseruba, Iuliia and Tsotsos, John K},
  journal={Artificial Intelligence Review},
  volume={53},
  number={1},
  pages={17--94},
  year={2020},
  publisher={Springer}
}

@article{liu2018reinforcement,
  title={{Reinforcement Learning on Web Interfaces using Workflow-Guided Exploration}},
  author={Liu, Evan Zheran and Guu, Kelvin and Pasupat, Panupong and Shi, Tianlin and Liang, Percy},
  journal={arXiv preprint arXiv:1802.08802},
  year={2018}
}

@book{kahneman2011thinking,
  title={Thinking, fast and slow},
  author={Kahneman, Daniel},
  year={2011},
  publisher={Macmillan}
}

@book{newell1972human,
  title={Human problem solving},
  author={Newell, Allen and Simon, Herbert Alexander},
  year={1972},
  publisher={Prentice-Hall}
}

@article{sloman1996empirical,
  title={The empirical case for two systems of reasoning.},
  author={Sloman, Steven A},
  journal={Psychological bulletin},
  volume={119},
  number={1},
  pages={3},
  year={1996},
  publisher={American Psychological Association}
}

@article{daw2005uncertainty,
  title={Uncertainty-based competition between prefrontal and dorsolateral striatal systems for behavioral control},
  author={Daw, Nathaniel D and Niv, Yael and Dayan, Peter},
  journal={Nature Neuroscience},
  volume={8},
  number={12},
  pages={1704--1711},
  year={2005},
}

@book{stanovich1999rational,
  title={Who is rational? Studies of individual differences in reasoning},
  author={Stanovich, Keith E},
  year={1999},
  publisher={Psychology Press}
}

@article{kahneman2002representativeness,
  title={Representativeness revisited: Attribute substitution in intuitive judgment},
  author={Kahneman, Daniel and Frederick, Shane and others},
  journal={Heuristics and biases: The psychology of intuitive judgment},
  volume={49},
  number={49-81},
  pages={74},
  year={2002}
}

@article{gur2018learning,
  title={{Learning to Navigate the Web}},
  author={Gur, Izzeddin and Rueckert, Ulrich and Faust, Aleksandra and Hakkani-Tur, Dilek},
  journal={arXiv preprint arXiv:1812.09195},
  year={2018}
}

@article{jia2019dom,
  title={{Dom-q-net: Grounded RL on Structured Language}},
  author={Jia, Sheng and Kiros, Jamie and Ba, Jimmy},
  journal={arXiv preprint arXiv:1902.07257},
  year={2019}
}

@inproceedings{merkle2019cooperative,
  title={{Cooperative Web Agents by Combining Semantic Technologies with Reinforcement Learning}},
  author={Merkle, Nicole and Philipp, Patrick},
  booktitle={Proceedings of the 10th International Conference on Knowledge Capture},
  pages={205--212},
  year={2019}
}

@inproceedings{bender2020climbing,
  title={{Climbing towards NLU: On Meaning, Form, and Understanding in the Age of Data}},
  author={Bender, Emily M and Koller, Alexander},
  booktitle={Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics},
  pages={5185--5198},
  year={2020}
}

@article{nakano2021webgpt,
  title={{WebGPT: Browser-Assisted Question-Answering with Human Feedback}},
  author={Nakano, Reiichiro and Hilton, Jacob and Balaji, Suchir and Wu, Jeff and Ouyang, Long and Kim, Christina and Hesse, Christopher and Jain, Shantanu and Kosaraju, Vineet and Saunders, William and others},
  journal={arXiv preprint arXiv:2112.09332},
  year={2021}
}


@article{gur2021adversarial,
  title={{Adversarial Environment Generation for Learning to Navigate the Web}},
  author={Gur, Izzeddin and Jaques, Natasha and Malta, Kevin and Tiwari, Manoj and Lee, Honglak and Faust, Aleksandra},
  journal={arXiv preprint arXiv:2103.01991},
  year={2021}
}

@article{hotti2021klarna,
  title={{The Klarna Product Page Dataset: A RealisticBenchmark for Web Representation Learning}},
  author={Hotti, Alexandra and Risuleo, Riccardo Sven and Magureanu, Stefan and Moradi, Aref and Lagergren, Jens},
  journal={arXiv preprint arXiv:2111.02168},
  year={2021}
}

@article{mazumder2020flin,
  title={{FLIN: A Flexible Natural Language Interface for Web Navigation}},
  author={Mazumder, Sahisnu and Riva, Oriana},
  journal={arXiv preprint arXiv:2010.12844},
  year={2020}
}

@article{humphreys2022data,
  title={{A data-driven approach for learning to control computers}},
  author={Humphreys, Peter C and Raposo, David and Pohlen, Toby and Thornton, Gregory and Chhaparia, Rachita and Muldal, Alistair and Abramson, Josh and Georgiev, Petko and Goldin, Alex and Santoro, Adam and others},
  journal={arXiv preprint arXiv:2202.08137},
  year={2022}
}


@article{toyama2021androidenv,
  title={{AndroidEnv: A Reinforcement Learning Platform for Android}},
  author={Toyama, Daniel and Hamel, Philippe and Gergely, Anita and Comanici, Gheorghe and Glaese, Amelia and Ahmed, Zafarali and Jackson, Tyler and Mourad, Shibl and Precup, Doina},
  journal={arXiv preprint arXiv:2105.13231},
  year={2021}
}

@article{burns2022interactive,
  title={{Interactive Mobile App Navigation with Uncertain or Under-specified Natural Language Commands}},
  author={Burns, Andrea and Arsan, Deniz and Agrawal, Sanjna and Kumar, Ranjitha and Saenko, Kate and Plummer, Bryan A},
  journal={arXiv preprint arXiv:2202.02312},
  year={2022}
}

@article{nogueira2016end,
  title={{End-to-End Goal-Driven Web Navigation}},
  author={Nogueira, Rodrigo and Cho, Kyunghyun},
  journal={{Advances in Neural Information Processing Systems}},
  volume={29},
  year={2016}
}

@inproceedings{pasupat2018mapping,
  title={Mapping Natural Language Commands to Web Elements},
  author={Pasupat, Panupong and Jiang, Tian-Shun and Liu, Evan Zheran and Guu, Kelvin and Liang, Percy},
  booktitle={EMNLP},
  year={2018}
}

@inproceedings{su2017building,
  title={{Building Natural Language Interfaces to Web APIs}},
  author={Su, Yu and Awadallah, Ahmed Hassan and Khabsa, Madian and Pantel, Patrick and Gamon, Michael and Encarnacion, Mark},
  booktitle={Proceedings of the 2017 ACM on Conference on Information and Knowledge Management},
  pages={177--186},
  year={2017}
}

@inproceedings{su2018natural,
  title={{Natural Language Interfaces with Fine-Grained User Interaction: A Case Study on Web APIs}},
  author={Su, Yu and Hassan Awadallah, Ahmed and Wang, Miaosen and White, Ryen W},
  booktitle={The 41st International ACM SIGIR Conference on Research \& Development in Information Retrieval},
  pages={855--864},
  year={2018}
}

@inproceedings{williams2019automatic,
  title={{Automatic Task Completion Flows from Web APIs}},
  author={Williams, Kyle and Hashemi, Seyyed Hadi and Zitouni, Imed},
  booktitle={Proceedings of the 42nd International ACM SIGIR Conference on Research and Development in Information Retrieval},
  pages={1009--1012},
  year={2019}
}


@inproceedings{allen2007plow,
  title={{PLOW: A Collaborative Task Learning Agent}},
  author={Allen, James and Chambers, Nathanael and Ferguson, George and Galescu, Lucian and Jung, Hyuckchul and Swift, Mary and Taysom, William},
  booktitle={AAAI},
  volume={7},
  pages={1514--1519},
  year={2007}
}

@inproceedings{narasimhan2016improving,
  title={{Improving Information Extraction by Acquiring External Evidence with Reinforcement Learning}},
  author={Narasimhan, Karthik and Yala, Adam and Barzilay, Regina},
  booktitle={Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing},
  pages={2355--2365},
  year={2016}
}

@inproceedings{nogueira2017task,
  title={{Task-Oriented Query Reformulation with Reinforcement Learning}},
  author={Nogueira, Rodrigo and Cho, Kyunghyun},
  booktitle={Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing},
  pages={574--583},
  year={2017}
}

@article{guu2020realm,
  title={{REALM: Retrieval-Augmented Language Model Pre-Training}},
  author={Guu, Kelvin and Lee, Kenton and Tung, Zora and Pasupat, Panupong and Chang, Ming-Wei},
  journal={arXiv preprint arXiv:2002.08909},
  year={2020}
}

@article{adolphs2021boosting,
  title={{Boosting Search Engines with Interactive Agents}},
  author={Adolphs, Leonard and Boerschinger, Benjamin and Buck, Christian and Huebscher, Michelle Chen and Ciaramita, Massimiliano and Espeholt, Lasse and Hofmann, Thomas and Kilcher, Yannic},
  journal={arXiv preprint arXiv:2109.00527},
  year={2021}
}


@article{lewis2019bart,
  title={{BART: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation, Translation, and Comprehension}},
  author={Lewis, Mike and Liu, Yinhan and Goyal, Naman and Ghazvininejad, Marjan and Mohamed, Abdelrahman and Levy, Omer and Stoyanov, Ves and Zettlemoyer, Luke},
  journal={arXiv preprint arXiv:1910.13461},
  year={2019}
}

@article{Devlin2019BERTPO,
  title={{B}{E}{R}{T}: Pre-training of Deep Bidirectional Transformers for Language Understanding},
  author={Jacob Devlin and Ming-Wei Chang and Kenton Lee and Kristina Toutanova},
  journal={ArXiv},
  year={2019},
  volume={abs/1810.04805}
}


@article{Russakovsky2015ImageNetLS,
  title={{ImageNet Large Scale Visual Recognition Challenge}},
  author={Olga Russakovsky and Jia Deng and Hao Su and Jonathan Krause and Sanjeev Satheesh and Sean Ma and Zhiheng Huang and Andrej Karpathy and Aditya Khosla and Michael S. Bernstein and Alexander C. Berg and Li Fei-Fei},
  journal={International Journal of Computer Vision},
  year={2015},
  volume={115},
  pages={211-252}
}

@article{He2016DeepRL,
  title={{Deep Residual Learning for Image Recognition}},
  author={Kaiming He and X. Zhang and Shaoqing Ren and Jian Sun},
  journal={2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
  year={2016},
  pages={770-778}
}
@article{izacard2022atlas,
  title={Atlas: Few-shot learning with retrieval augmented language models},
  author={Izacard, Gautier and Lewis, Patrick and Lomeli, Maria and Hosseini, Lucas and Petroni, Fabio and Schick, Timo and Dwivedi-Yu, Jane and Joulin, Armand and Riedel, Sebastian and Grave, Edouard},
  journal={arXiv preprint arXiv},
  volume={2208},
  year={2022}
}


@article{Browne2012ASO,
  title={A Survey of Monte Carlo Tree Search Methods},
  author={Cameron Browne and Edward Jack Powley and Daniel Whitehouse and Simon M. M. Lucas and Peter I. Cowling and Philipp Rohlfshagen and Stephen Tavener and Diego Perez Liebana and Spyridon Samothrakis and Simon Colton},
  journal={IEEE Transactions on Computational Intelligence and AI in Games},
  year={2012},
  volume={4},
  pages={1-43}
}

@article{hart1968formal,
  title={A formal basis for the heuristic determination of minimum cost paths},
  author={Hart, Peter E and Nilsson, Nils J and Raphael, Bertram},
  journal={IEEE transactions on Systems Science and Cybernetics},
  volume={4},
  number={2},
  pages={100--107},
  year={1968},
  publisher={IEEE}
}


@article{Mirjalili2016TheWO,
  title={The Whale Optimization Algorithm},
  author={Seyed Mohammad Mirjalili and Andrew Lewis},
  journal={Adv. Eng. Softw.},
  year={2016},
  volume={95},
  pages={51-67}
}


@inproceedings{Lazaridou2020MultiagentCM,
  title={{Multi-agent Communication meets Natural Language: Synergies between Functional and Structural Language Learning}},
  author={Angeliki Lazaridou and Anna Potapenko and Olivier Tieleman},
  booktitle={ACL},
  year={2020}
}

@misc{flask, 
    title={{Flask API}},
    url={https://flask.palletsprojects.com/en/2.1.x/},
    author={Ronacher, Armin},
    year={2010}
}

@misc{scraperapi,
    title={{ScraperAPI}},
    url={https://www.scraperapi.com/},
    author={Ni, Daniel},
    year={2015}
}

@article{brockman2016openai,
  title={{OpenAI Gym}},
  author={Brockman, Greg and Cheung, Vicki and Pettersson, Ludwig and Schneider, Jonas and Schulman, John and Tang, Jie and Zaremba, Wojciech},
  journal={arXiv preprint arXiv:1606.01540},
  year={2016}
}

 @misc{hughes_2019,
    title={{Study shows we're spending an insane amount of time online}}, url={https://thenextweb.com/news/study-shows-were-spending-an-insane-amount-of-time-online},
    journal={TNW | Tech},
    author={Hughes, Matthew},
    year={2019},
    month={Jan}
}
 
 @article{lin2021pyserini,
  title={{Pyserini: An Easy-to-Use Python Toolkit to Support Replicable IR Research with Sparse and Dense Representationss}},
  author={Lin, Jimmy and Ma, Xueguang and Lin, Sheng-Chieh and Yang, Jheng-Hong and Pradeep, Ronak and Nogueira, Rodrigo},
  journal={arXiv preprint arXiv:2102.10073},
  year={2021}
}

@inproceedings{thomason2019improving,
  title={Improving grounded natural language understanding through human-robot dialog},
  author={Thomason, Jesse and Padmakumar, Aishwarya and Sinapov, Jivko and Walker, Nick and Jiang, Yuqian and Yedidsion, Harel and Hart, Justin and Stone, Peter and Mooney, Raymond J},
  booktitle={2019 International Conference on Robotics and Automation (ICRA)},
  pages={6934--6941},
  year={2019},
  organization={IEEE}
}

@inproceedings{luketina2019survey,
  title={A Survey of Reinforcement Learning Informed by Natural Language},
  author={Luketina, Jelena and Nardelli, Nantas and Farquhar, Gregory and Foerster, Jakob N and Andreas, Jacob and Grefenstette, Edward and Whiteson, Shimon and Rockt{\"a}schel, Tim},
  booktitle={IJCAI},
  year={2019}
}

@article{uc2021survey,
  title={Survey on reinforcement learning for language processing},
  author={Uc-Cetina, Victor and Navarro-Guerrero, Nicolas and Martin-Gonzalez, Anabel and Weber, Cornelius and Wermter, Stefan},
  journal={arXiv preprint arXiv:2104.05565},
  year={2021}
}

@article{zhong2021silg,
  title={{SILG: The Multi-domain Symbolic Interactive Language Grounding Benchmark}},
  author={Zhong, Victor and Hanjie, Austin W and Wang, Sida and Narasimhan, Karthik and Zettlemoyer, Luke},
  journal={Advances in Neural Information Processing Systems},
  volume={34},
  pages={21505--21519},
  year={2021}
}

@article{brown2020language,
  title={Language models are few-shot learners},
  author={Brown, Tom and Mann, Benjamin and Ryder, Nick and Subbiah, Melanie and Kaplan, Jared D and Dhariwal, Prafulla and Neelakantan, Arvind and Shyam, Pranav and Sastry, Girish and Askell, Amanda and others},
  journal={Advances in Neural Information Processing Systems},
  volume={33},
  pages={1877--1901},
  year={2020}
}

@inproceedings{devlin2019bert,
  title={{BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding}},
  author={Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina},
  booktitle={NAACL-HLT (1)},
  year={2019}
}

@article{raffel2020exploring,
  title={Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer},
  author={Raffel, Colin and Shazeer, Noam and Roberts, Adam and Lee, Katherine and Narang, Sharan and Matena, Michael and Zhou, Yanqi and Li, Wei and Liu, Peter J},
  journal={Journal of Machine Learning Research},
  volume={21},
  pages={1--67},
  year={2020}
}


@article{Aghajanyan2021HTLMHP,
  title={HTLM: Hyper-Text Pre-Training and Prompting of Language Models},
  author={Armen Aghajanyan and Dmytro Okhonko and Mike Lewis and Mandar Joshi and Hu Xu and Gargi Ghosh and Luke Zettlemoyer},
  journal={ArXiv},
  year={2021},
  volume={abs/2107.06955}
}

@inproceedings{Yuan2020InteractiveMC,
  title={Interactive Machine Comprehension with Information Seeking Agents},
  author={Xingdi Yuan and Jie Fu and Marc-Alexandre C{\^o}t{\'e} and Yi Tay and Christopher Joseph Pal and Adam Trischler},
  booktitle={ACL},
  year={2020}
}

@article{Lazaridou2022InternetaugmentedLM,
  title={Internet-augmented language models through few-shot prompting for open-domain question answering},
  author={Angeliki Lazaridou and Elena Gribovskaya and Wojciech Stokowiec and Nikolai Grigorev},
  journal={ArXiv},
  year={2022},
  volume={abs/2203.05115}
}

@article{Shuster2022LanguageMT,
  title={Language Models that Seek for Knowledge: Modular Search \& Generation for Dialogue and Prompt Completion},
  author={Kurt Shuster and Mojtaba Komeili and Leonard Adolphs and Stephen Roller and Arthur D. Szlam and Jason Weston},
  journal={ArXiv},
  year={2022},
  volume={abs/2203.13224}
}


@inproceedings{mnih2016asynchronous,
  title={Asynchronous methods for deep reinforcement learning},
  author={Mnih, Volodymyr and Badia, Adria Puigdomenech and Mirza, Mehdi and Graves, Alex and Lillicrap, Timothy and Harley, Tim and Silver, David and Kavukcuoglu, Koray},
  booktitle={International conference on machine learning},
  pages={1928--1937},
  year={2016},
}

@article{seo2016bidirectional,
  title={Bidirectional attention flow for machine comprehension},
  author={Seo, Minjoon and Kembhavi, Aniruddha and Farhadi, Ali and Hajishirzi, Hannaneh},
  journal={arXiv preprint arXiv:1611.01603},
  year={2016}
}

@article{guo2020interactive,
  title={Interactive fiction game playing as multi-paragraph reading comprehension with reinforcement learning},
  author={Guo, Xiaoxiao and Yu, Mo and Gao, Yupeng and Gan, Chuang and Campbell, Murray and Chang, Shiyu},
  journal={arXiv preprint arXiv:2010.02386},
  year={2020}
}


@article{Chung2014EmpiricalEO,
  title={Empirical Evaluation of Gated Recurrent Neural Networks on Sequence Modeling},
  author={Junyoung Chung and Çaglar G{\"u}lçehre and Kyunghyun Cho and Yoshua Bengio},
  journal={ArXiv},
  year={2014},
  volume={abs/1412.3555}
}

@inproceedings{huang2009analyzing,
  title={Analyzing and evaluating query reformulation strategies in web search logs},
  author={Huang, Jeff and Efthimiadis, Efthimis N},
  booktitle={Proceedings of the 18th ACM conference on Information and knowledge management},
  pages={77--86},
  year={2009}
}


@article{rieh2006analysis,
  title={Analysis of multiple query reformulations on the web: The interactive information retrieval context},
  author={Rieh, Soo Young and others},
  journal={Information Processing \& Management},
  volume={42},
  number={3},
  pages={751--768},
  year={2006},
  publisher={Elsevier}
}

@article{wang2020deep,
  title={Deep reinforced query reformulation for information retrieval},
  author={Wang, Xiao and Macdonald, Craig and Ounis, Iadh},
  journal={arXiv preprint arXiv:2007.07987},
  year={2020}
}

@article{zhuang2022bridging,
  title={Bridging the Gap Between Indexing and Retrieval for Differentiable Search Index with Query Generation},
  author={Zhuang, Shengyao and Ren, Houxing and Shou, Linjun and Pei, Jian and Gong, Ming and Zuccon, Guido and Jiang, Daxin},
  journal={arXiv preprint arXiv:2206.10128},
  year={2022}
}

@article{komeili2021internet,
  title={Internet-augmented dialogue generation},
  author={Komeili, Mojtaba and Shuster, Kurt and Weston, Jason},
  journal={arXiv preprint arXiv:2107.07566},
  year={2021}
}

@article{lampinen2021towards,
  title={Towards mental time travel: a hierarchical memory for reinforcement learning agents},
  author={Lampinen, Andrew and Chan, Stephanie and Banino, Andrea and Hill, Felix},
  journal={Advances in Neural Information Processing Systems},
  volume={34},
  pages={28182--28195},
  year={2021}
}

@article{fortunato2019generalization,
  title={Generalization of reinforcement learners with working and episodic memory},
  author={Fortunato, Meire and Tan, Melissa and Faulkner, Ryan and Hansen, Steven and Puigdom{\`e}nech Badia, Adri{\`a} and Buttimore, Gavin and Deck, Charles and Leibo, Joel Z and Blundell, Charles},
  journal={Advances in neural information processing systems},
  volume={32},
  year={2019}
}

@article{wayne2018unsupervised,
  title={Unsupervised predictive memory in a goal-directed agent},
  author={Wayne, Greg and Hung, Chia-Chun and Amos, David and Mirza, Mehdi and Ahuja, Arun and Grabska-Barwinska, Agnieszka and Rae, Jack and Mirowski, Piotr and Leibo, Joel Z and Santoro, Adam and others},
  journal={arXiv preprint arXiv:1803.10760},
  year={2018}
}


@article{yao2021reading,
  title={Reading and acting while blindfolded: The need for semantics in text game agents},
  author={Yao, Shunyu and Narasimhan, Karthik and Hausknecht, Matthew},
  journal={arXiv preprint arXiv:2103.13552},
  year={2021}
}


@inproceedings{hausknecht2020interactive,
  title={Interactive fiction games: A colossal adventure},
  author={Hausknecht, Matthew and Ammanabrolu, Prithviraj and C{\^o}t{\'e}, Marc-Alexandre and Yuan, Xingdi},
  booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
  volume={34},
  pages={7903--7910},
  year={2020}
}

@inproceedings{shridhar2020alfred,
  title={Alfred: A benchmark for interpreting grounded instructions for everyday tasks},
  author={Shridhar, Mohit and Thomason, Jesse and Gordon, Daniel and Bisk, Yonatan and Han, Winson and Mottaghi, Roozbeh and Zettlemoyer, Luke and Fox, Dieter},
  booktitle={Proceedings of the IEEE/CVF conference on computer vision and pattern recognition},
  pages={10740--10749},
  year={2020}
}


@article{andreas2020task,
  title={Task-oriented dialogue as dataflow synthesis},
  author={Andreas, Jacob and Bufe, John and Burkett, David and Chen, Charles and Clausman, Josh and Crawford, Jean and Crim, Kate and DeLoach, Jordan and Dorner, Leah and Eisner, Jason and others},
  journal={Transactions of the Association for Computational Linguistics},
  volume={8},
  pages={556--571},
  year={2020},
  publisher={MIT Press}
}


@article{budzianowski2018multiwoz,
  title={MultiWOZ--a large-scale multi-domain wizard-of-oz dataset for task-oriented dialogue modelling},
  author={Budzianowski, Pawe{\l} and Wen, Tsung-Hsien and Tseng, Bo-Hsiang and Casanueva, Inigo and Ultes, Stefan and Ramadan, Osman and Ga{\v{s}}i{\'c}, Milica},
  journal={arXiv preprint arXiv:1810.00278},
  year={2018}
}

@article{wang2021simvlm,
  title={Simvlm: Simple visual language model pretraining with weak supervision},
  author={Wang, Zirui and Yu, Jiahui and Yu, Adams Wei and Dai, Zihang and Tsvetkov, Yulia and Cao, Yuan},
  journal={arXiv preprint arXiv:2108.10904},
  year={2021}
}

@inproceedings{li2020oscar,
  title={Oscar: Object-semantics aligned pre-training for vision-language tasks},
  author={Li, Xiujun and Yin, Xi and Li, Chunyuan and Zhang, Pengchuan and Hu, Xiaowei and Zhang, Lei and Wang, Lijuan and Hu, Houdong and Dong, Li and Wei, Furu and others},
  booktitle={European Conference on Computer Vision},
  pages={121--137},
  year={2020},
  organization={Springer}
}


@inproceedings{pasupat2018elements,
  author = {Panupong Pasupat and Tian-Shun Jiang and Evan Zheran Liu and Kelvin Guu and Percy Liang},
  booktitle = {Empirical Methods in Natural Language Processing (EMNLP)},
  title = {Mapping Natural Language Commands to Web Elements},
  year = {2018},
}

@article{ecoffet2019go,
  title={Go-explore: a new approach for hard-exploration problems},
  author={Ecoffet, Adrien and Huizinga, Joost and Lehman, Joel and Stanley, Kenneth O and Clune, Jeff},
  journal={arXiv preprint arXiv:1901.10995},
  year={2019}
}

@inproceedings{pathak2017curiosity,
  title={Curiosity-driven exploration by self-supervised prediction},
  author={Pathak, Deepak and Agrawal, Pulkit and Efros, Alexei A and Darrell, Trevor},
  booktitle={International conference on machine learning},
  pages={2778--2787},
  year={2017},
}

@article{tuyls2022multi,
  title={Multi-Stage Episodic Control for Strategic Exploration in Text Games},
  author={Tuyls, Jens and Yao, Shunyu and Kakade, Sham and Narasimhan, Karthik},
  journal={arXiv preprint arXiv:2201.01251},
  year={2022}
}


@inproceedings{verma2022chai,
  title={CHAI: A CHatbot AI for Task-Oriented Dialogue with Offline Reinforcement Learning},
  author={Verma, Siddharth and Fu, Justin and Yang, Sherry and Levine, Sergey},
  booktitle={Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
  pages={4471--4491},
  year={2022}
}


@book{daniel2017thinking,
  title={Thinking, fast and slow},
  author={Daniel, Kahneman},
  year={2017}
}


@inproceedings{newell1959report,
  title={Report on a general problem solving program},
  author={Newell, Allen and Shaw, John C and Simon, Herbert A},
  booktitle={IFIP congress},
  volume={256},
  pages={64},
  year={1959},
  organization={Pittsburgh, PA}
}


@book{russel2013artificial,
  title={Artificial intelligence: a modern approach},
  author={Russel, Stuart and Norvig, Peter and others},
  volume={256},
  year={2013},
  publisher={Pearson Education Limited London}
}

@article{fan2018hierarchical,
  title={Hierarchical neural story generation},
  author={Fan, Angela and Lewis, Mike and Dauphin, Yann},
  journal={arXiv preprint arXiv:1805.04833},
  year={2018}
}

@article{holtzman2019curious,
  title={The curious case of neural text degeneration},
  author={Holtzman, Ari and Buys, Jan and Du, Li and Forbes, Maxwell and Choi, Yejin},
  journal={arXiv preprint arXiv:1904.09751},
  year={2019}
}


@article{ouyang2022training,
  title={Training language models to follow instructions with human feedback},
  author={Ouyang, Long and Wu, Jeffrey and Jiang, Xu and Almeida, Diogo and Wainwright, Carroll and Mishkin, Pamela and Zhang, Chong and Agarwal, Sandhini and Slama, Katarina and Ray, Alex and others},
  journal={Advances in Neural Information Processing Systems},
  volume={35},
  pages={27730--27744},
  year={2022}
}

@inproceedings{biyik2019asking,
  title={Asking Easy Questions: A User-Friendly Approach to Active Reward Learning},
  author={Biyik, Erdem and Palan, Malayandi},
  booktitle={Proceedings of the 3rd Conference on Robot Learning},
  year={2019}
}

@inproceedings{zhang2020dialogpt,
  title={DIALOGPT: Large-Scale Generative Pre-training for Conversational Response Generation},
  author={Zhang, Yizhe and Sun, Siqi and Galley, Michel and Chen, Yen-Chun and Brockett, Chris and Gao, Xiang and Gao, Jianfeng and Liu, Jingjing and Dolan, William B},
  booktitle={Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics: System Demonstrations},
  pages={270--278},
  year={2020}
}

@article{irving2018ai,
  title={{AI safety via debate}},
  author={Irving, Geoffrey and Christiano, Paul and Amodei, Dario},
  journal={arXiv preprint arXiv:1805.00899},
  year={2018}
}

@article{winograd1972understanding,
  title={Understanding natural language},
  author={Winograd, Terry},
  journal={Cognitive psychology},
  volume={3},
  number={1},
  pages={1--191},
  year={1972},
  publisher={Elsevier}
}

@inproceedings{tellex2011understanding,
  title={Understanding natural language commands for robotic navigation and mobile manipulation},
  author={Tellex, Stefanie and Kollar, Thomas and Dickerson, Steven and Walter, Matthew and Banerjee, Ashis and Teller, Seth and Roy, Nicholas},
  booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
  volume={25},
  pages={1507--1514},
  year={2011}
}

@article{pataranutaporn2021ai,
  title={{AI-generated characters for supporting personalized learning and well-being}},
  author={Pataranutaporn, Pat and Danry, Valdemar and Leong, Joanne and Punpongsanon, Parinya and Novy, Dan and Maes, Pattie and Sra, Misha},
  journal={Nature Machine Intelligence},
  volume={3},
  number={12},
  pages={1013--1022},
  year={2021},
  publisher={Nature Publishing Group UK London}
}

@inproceedings{zhou2018emotional,
  title={Emotional chatting machine: Emotional conversation generation with internal and external memory},
  author={Zhou, Hao and Huang, Minlie and Zhang, Tianyang and Zhu, Xiaoyan and Liu, Bing},
  booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
  volume={32},
  year={2018}
}

@inproceedings{sadigh2017active,
  author       = {Dorsa Sadigh and
                  Anca D. Dragan and
                  Shankar Sastry and
                  Sanjit A. Seshia},
  editor       = {Nancy M. Amato and
                  Siddhartha S. Srinivasa and
                  Nora Ayanian and
                  Scott Kuindersma},
  title        = {Active Preference-Based Learning of Reward Functions},
  booktitle    = {Robotics: Science and Systems XIII},
  year         = {2017},
}

@misc{wang2023survey,
      title={A Survey on Large Language Model based Autonomous Agents}, 
      author={Lei Wang and Chen Ma and Xueyang Feng and Zeyu Zhang and Hao Yang and Jingsen Zhang and Zhiyuan Chen and Jiakai Tang and Xu Chen and Yankai Lin and Wayne Xin Zhao and Zhewei Wei and Ji-Rong Wen},
      year={2023},
      eprint={2308.11432},
      archivePrefix={arXiv},
      primaryClass={cs.AI}
}

@article{yang2023foundation,
  title={Foundation models for decision making: Problems, methods, and opportunities},
  author={Yang, Sherry and Nachum, Ofir and Du, Yilun and Wei, Jason and Abbeel, Pieter and Schuurmans, Dale},
  journal={arXiv preprint arXiv:2303.04129},
  year={2023}
}

@article{vaswani2017attention,
  title={Attention is all you need},
  author={Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, {\L}ukasz and Polosukhin, Illia},
  journal={Advances in Neural Information Processing Systems},
  volume={30},
  year={2017}
}


@article{bai2022training,
  title={Training a helpful and harmless assistant with reinforcement learning from human feedback},
  author={Bai, Yuntao and Jones, Andy and Ndousse, Kamal and Askell, Amanda and Chen, Anna and DasSarma, Nova and Drain, Dawn and Fort, Stanislav and Ganguli, Deep and Henighan, Tom and others},
  journal={arXiv preprint arXiv:2204.05862},
  year={2022}
}


@article{wang2022self,
  title={Self-consistency improves chain of thought reasoning in language models},
  author={Wang, Xuezhi and Wei, Jason and Schuurmans, Dale and Le, Quoc and Chi, Ed and Zhou, Denny},
  journal={arXiv preprint arXiv:2203.11171},
  year={2022}
}

@article{OpenAI2023GPT4TR,
  title={GPT-4 Technical Report},
  author={OpenAI},
  journal={ArXiv},
  year={2023},
  volume={abs/2303.08774}
}


@inproceedings{chen2023open,
  title={Open-vocabulary queryable scene representations for real world planning},
  author={Chen, Boyuan and Xia, Fei and Ichter, Brian and Rao, Kanishka and Gopalakrishnan, Keerthana and Ryoo, Michael S and Stone, Austin and Kappler, Daniel},
  booktitle={2023 IEEE International Conference on Robotics and Automation (ICRA)},
  pages={11509--11522},
  year={2023},
}


@article{chen2023teaching,
      title={Teaching Large Language Models to Self-Debug}, 
      author={Xinyun Chen and Maxwell Lin and Nathanael Schärli and Denny Zhou},
      year={2023},
    journal={arXiv preprint arXiv:2304.05128},
}

@article{madaan2023selfrefine,
  title={Self-refine: Iterative refinement with self-feedback},
  author={Madaan, Aman and Tandon, Niket and Gupta, Prakhar and Hallinan, Skyler and Gao, Luyu and Wiegreffe, Sarah and Alon, Uri and Dziri, Nouha and Prabhumoye, Shrimai and Yang, Yiming and others},
  journal={arXiv preprint arXiv:2303.17651},
  year={2023}
}

@article{saunders2022selfcritique,
  title={Self-critiquing models for assisting human evaluators},
  author={Saunders, William and Yeh, Catherine and Wu, Jeff and Bills, Steven and Ouyang, Long and Ward, Jonathan and Leike, Jan},
  journal={arXiv preprint arXiv:2206.05802},
  year={2022}
}

@article{xu2023expertprompting,
  title={{ExpertPrompting: Instructing Large Language Models to be Distinguished Experts}},
  author={Xu, Benfeng and Yang, An and Lin, Junyang and Wang, Quan and Zhou, Chang and Zhang, Yongdong and Mao, Zhendong},
  journal={arXiv preprint arXiv:2305.14688},
  year={2023}
}

@article{kojima2022large,
  title={Large language models are zero-shot reasoners},
  author={Kojima, Takeshi and Gu, Shixiang Shane and Reid, Machel and Matsuo, Yutaka and Iwasawa, Yusuke},
  journal={Advances in Neural Information Processing Systems},
  volume={35},
  pages={22199--22213},
  year={2022}
}

@misc{liu2023chain,
      title={Chain of Hindsight Aligns Language Models with Feedback}, 
      author={Hao Liu and Carmelo Sferrazza and Pieter Abbeel},
      year={2023},
      eprint={2302.02676},
      archivePrefix={arXiv},
      primaryClass={cs.LG}
}


@article{campbell2002deep,
  title={Deep blue},
  author={Campbell, Murray and Hoane Jr, A Joseph and Hsu, Feng-hsiung},
  journal={Artificial intelligence},
  volume={134},
  number={1-2},
  pages={57--83},
  year={2002},
  publisher={Elsevier}
}


@article{silver2017mastering,
  title={Mastering the game of go without human knowledge},
  author={Silver, David and Schrittwieser, Julian and Simonyan, Karen and Antonoglou, Ioannis and Huang, Aja and Guez, Arthur and Hubert, Thomas and Baker, Lucas and Lai, Matthew and Bolton, Adrian and others},
  journal={nature},
  volume={550},
  number={7676},
  pages={354--359},
  year={2017},
  publisher={Nature Publishing Group}
}


@misc{wang2023planandsolve,
      title={Plan-and-Solve Prompting: Improving Zero-Shot Chain-of-Thought Reasoning by Large Language Models}, 
      author={Lei Wang and Wanyu Xu and Yihuai Lan and Zhiqiang Hu and Yunshi Lan and Roy Ka-Wei Lee and Ee-Peng Lim},
      year={2023},
      eprint={2305.04091},
      archivePrefix={arXiv},
      primaryClass={cs.CL}
}


@article{kim2023language,
      title={Language Models can Solve Computer Tasks}, 
      author={Geunwoo Kim and Pierre Baldi and Stephen McAleer},
      year={2023},
    journal={arXiv preprint arXiv:2303.17491},
}

@article{wei2022chain,
  title={Chain of thought prompting elicits reasoning in large language models},
  author={Wei, Jason and Wang, Xuezhi and Schuurmans, Dale and Bosma, Maarten and Chi, Ed and Le, Quoc and Zhou, Denny},
  journal={arXiv preprint arXiv:2201.11903},
  year={2022}
}

@inproceedings{Lu2021NeuroLogicAD,
  title={NeuroLogic A*esque Decoding: Constrained Text Generation with Lookahead Heuristics},
  author={Ximing Lu and Sean Welleck and Peter West and Liwei Jiang and Jungo Kasai and Daniel Khashabi and Ronan Le Bras and Lianhui Qin and Youngjae Yu and Rowan Zellers and Noah A. Smith and Yejin Choi},
  booktitle={North American Chapter of the Association for Computational Linguistics},
  year={2021}
}

@misc{xie2023decomposition,
      title={Decomposition Enhances Reasoning via Self-Evaluation Guided Decoding}, 
      author={Yuxi Xie and Kenji Kawaguchi and Yiran Zhao and Xu Zhao and Min-Yen Kan and Junxian He and Qizhe Xie},
      year={2023},
      eprint={2305.00633},
      archivePrefix={arXiv},
      primaryClass={cs.CL}
}

@misc{schlag2023large,
      title={Large Language Model Programs}, 
      author={Imanol Schlag and Sainbayar Sukhbaatar and Asli Celikyilmaz and Wen-tau Yih and Jason Weston and Jürgen Schmidhuber and Xian Li},
      year={2023},
      eprint={2305.05364},
      archivePrefix={arXiv},
      primaryClass={cs.LG}
}


@article{a-star,
  author={Hart, Peter E. and Nilsson, Nils J. and Raphael, Bertram},
  journal={IEEE Transactions on Systems Science and Cybernetics}, 
  title={A Formal Basis for the Heuristic Determination of Minimum Cost Paths}, 
  year={1968},
  volume={4},
  number={2},
  pages={100-107},
  doi={10.1109/TSSC.1968.300136}}

@misc{paul2023refiner,
      title={REFINER: Reasoning Feedback on Intermediate Representations}, 
      author={Debjit Paul and Mete Ismayilzada and Maxime Peyrard and Beatriz Borges and Antoine Bosselut and Robert West and Boi Faltings},
      year={2023},
      eprint={2304.01904},
      archivePrefix={arXiv},
      primaryClass={cs.CL}
}

@misc{gao2023pal,
      title={PAL: Program-aided Language Models}, 
      author={Luyu Gao and Aman Madaan and Shuyan Zhou and Uri Alon and Pengfei Liu and Yiming Yang and Jamie Callan and Graham Neubig},
      year={2023},
      eprint={2211.10435},
      archivePrefix={arXiv},
      primaryClass={cs.CL}
}

@article{liu2023pretrainpromptpredict,
author = {Liu, Pengfei and Yuan, Weizhe and Fu, Jinlan and Jiang, Zhengbao and Hayashi, Hiroaki and Neubig, Graham},
title = {Pre-Train, Prompt, and Predict: A Systematic Survey of Prompting Methods in Natural Language Processing},
year = {2023},
issue_date = {September 2023},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
volume = {55},
number = {9},
issn = {0360-0300},
journal = {ACM Computing Surveys},
articleno = {195},
}

@article{laird1986chunking,
  title={{Chunking in Soar: The anatomy of a general learning mechanism}},
  author={Laird, John E and Rosenbloom, Paul S and Newell, Allen},
  journal={Machine Learning},
  volume={1},
  pages={11--46},
  year={1986},
  publisher={Springer}
}

@article{gershman2015computational,
  title={Computational rationality: A converging paradigm for intelligence in brains, minds, and machines},
  author={Gershman, Samuel J and Horvitz, Eric J and Tenenbaum, Joshua B},
  journal={Science},
  volume={349},
  number={6245},
  pages={273--278},
  year={2015},
  publisher={American Association for the Advancement of Science}
}

@article{langley2009cognitive,
  title={Cognitive architectures: Research issues and challenges},
  author={Langley, Pat and Laird, John E and Rogers, Seth},
  journal={Cognitive Systems Research},
  volume={10},
  number={2},
  pages={141--160},
  year={2009},
  publisher={Elsevier}
}

@article{koedinger1997intelligent,
  title={Intelligent tutoring goes to school in the big city},
  author={Koedinger, Kenneth R and Anderson, John R and Hadley, William H and Mark, Mary A and others},
  journal={International Journal of Artificial Intelligence in Education},
  volume={8},
  number={1},
  pages={30--43},
  year={1997}
}

@article{jones1999automated,
  title={Automated intelligent pilots for combat flight simulation},
  author={Jones, Randolph M and Laird, John E and Nielsen, Paul E and Coulter, Karen J and Kenny, Patrick and Koss, Frank V},
  journal={AI magazine},
  volume={20},
  number={1},
  pages={27--27},
  year={1999}
}

@article{lieder2020resource,
  title={Resource-rational analysis: Understanding human cognition as the optimal use of limited computational resources},
  author={Lieder, Falk and Griffiths, Thomas L},
  journal={Behavioral and Brain Sciences},
  volume={43},
  pages={e1},
  year={2020},
  publisher={Cambridge University Press}
}

@article{callaway2022rational,
  title={Rational use of cognitive resources in human planning},
  author={Callaway, Frederick and van Opheusden, Bas and Gul, Sayan and Das, Priyam and Krueger, Paul M and Griffiths, Thomas L and Lieder, Falk},
  journal={Nature Human Behaviour},
  volume={6},
  number={8},
  pages={1112--1125},
  year={2022},
  publisher={Nature Publishing Group UK London}
}

@article{russek2022time,
  title={Time spent thinking in online chess reflects the value of computation},
  author={Russek, Evan and Acosta-Kane, Daniel and van Opheusden, Bas and Mattar, Marcelo G and Griffiths, Tom},
  year={2022},
  journal={PsyArXiv}
}

@article{wray2021language,
  title={Language Models as a Knowledge Source for Cognitive Agents},
  author={Wray, Robert E and Kirk, James R and Laird, John E and others},
  journal={arXiv preprint arXiv:2109.08270},
  year={2021}
}

@article{kirk2023improving,
  title={{Improving Knowledge Extraction from LLMs for Robotic Task Learning through Agent Analysis}},
  author={Kirk, James R and Robert, Wray and Lindes, Peter and Laird, John E.},
  journal={arXiv preprint arXiv:2306.06770},
  year={2023}
}

@article{kirk2022evaluating,
  title={Evaluating diverse knowledge sources for online one-shot learning of novel tasks},
  author={Kirk, James R and Wray, Robert E and Lindes, Peter and Laird, John E},
  journal={arXiv preprint arXiv:2208.09554},
  year={2022}
}

@article{mattar2018prioritized,
  title={Prioritized memory access explains planning and hippocampal replay},
  author={Mattar, Marcelo G and Daw, Nathaniel D},
  journal={Nature Neuroscience},
  volume={21},
  number={11},
  pages={1609--1617},
  year={2018},
  publisher={Nature Publishing Group US New York}
}


@InProceedings{huang22language,
  title = 	 {Language Models as Zero-Shot Planners: Extracting Actionable Knowledge for Embodied Agents},
  author =       {Huang, Wenlong and Abbeel, Pieter and Pathak, Deepak and Mordatch, Igor},
  booktitle = 	 {Proceedings of the 39th International Conference on Machine Learning},
  pages = 	 {9118--9147},
  year = 	 {2022},
  editor = 	 {Chaudhuri, Kamalika and Jegelka, Stefanie and Song, Le and Szepesvari, Csaba and Niu, Gang and Sabato, Sivan},
  volume = 	 {162},
  series = 	 {Proceedings of Machine Learning Research},
  month = 	 {17--23 Jul},
}


@article{anderson2003newell,
  title={The {N}ewell test for a theory of cognition},
  author={Anderson, John R and Lebiere, Christian},
  journal={Behavioral and Brain Sciences},
  volume={26},
  number={5},
  pages={587--601},
  year={2003},
  publisher={Cambridge University Press}
}

@article{newell1992precis,
  title={Pr{\'e}cis of unified theories of cognition},
  author={Newell, Allen},
  journal={Behavioral and Brain Sciences},
  volume={15},
  number={3},
  pages={425--437},
  year={1992},
  publisher={Cambridge University Press}
}

@article{simon1971human,
  title={Human problem solving: The state of the theory in 1970.},
  author={Simon, Herbert A and Newell, Allen},
  journal={American psychologist},
  volume={26},
  number={2},
  pages={145},
  year={1971},
  publisher={American Psychological Association}
}


@article{laird1987soar,
  title={Soar: An architecture for general intelligence},
  author={Laird, John E and Newell, Allen and Rosenbloom, Paul S},
  journal={Artificial Intelligence},
  volume={33},
  number={1},
  pages={1--64},
  year={1987},
  publisher={Elsevier}
}

@article{newell1980physical,
  title={Physical symbol systems},
  author={Newell, Allen},
  journal={Cognitive science},
  volume={4},
  number={2},
  pages={135--183},
  year={1980},
  publisher={Elsevier}
}

@article{sun2004desiderata,
  title={Desiderata for cognitive architectures},
  author={Sun, Ron},
  journal={Philosophical Psychology},
  volume={17},
  number={3},
  pages={341--373},
  year={2004},
  publisher={Taylor \& Francis}
}

@article{adams2012mapping,
  title={Mapping the landscape of human-level artificial general intelligence},
  author={Adams, Sam and Arel, Itmar and Bach, Joscha and Coop, Robert and Furlan, Rod and Goertzel, Ben and Hall, J Storrs and Samsonovich, Alexei and Scheutz, Matthias and Schlesinger, Matthew and others},
  journal={AI magazine},
  volume={33},
  number={1},
  pages={25--42},
  year={2012}
}

@inproceedings{
dasgupta2022collaborating,
title={Collaborating with language models for embodied reasoning},
author={Ishita Dasgupta and Christine Kaeser-Chen and Kenneth Marino and Arun Ahuja and Sheila Babayan and Felix Hill and Rob Fergus},
booktitle={Second Workshop on Language and Reinforcement Learning},
year={2022},
}

@article{wang2023voyager,
  title={Voyager: An open-ended embodied agent with large language models},
  author={Wang, Guanzhi and Xie, Yuqi and Jiang, Yunfan and Mandlekar, Ajay and Xiao, Chaowei and Zhu, Yuke and Fan, Linxi and Anandkumar, Anima},
  journal={arXiv preprint arXiv:2305.16291},
  year={2023}
}


@InProceedings{sumers2023distilling,
  title = 	 {Distilling Internet-Scale Vision-Language Models into Embodied Agents},
  author =       {Sumers, Theodore and Marino, Kenneth and Ahuja, Arun and Fergus, Rob and Dasgupta, Ishita},
  booktitle = 	 {Proceedings of the 40th International Conference on Machine Learning},
  pages = 	 {32797--32818},
  year = 	 {2023},
}


@inproceedings{wu2022aichains,
  title={A{I} chains: Transparent and controllable human-{AI} interaction by chaining large language model prompts},
  author={Wu, Tongshuang and Terry, Michael and Cai, Carrie Jun},
  booktitle={Proceedings of the 2022 CHI Conference on Human Factors in Computing Systems},
  pages={1--22},
  year={2022}
}

@article{romero2023synergistic,
  title={Synergistic Integration of Large Language Models and Cognitive Architectures for Robust AI: An Exploratory Analysis},
  author={Romero, Oscar J and Zimmerman, John and Steinfeld, Aaron and Tomasic, Anthony},
  journal={arXiv preprint arXiv:2308.09830},
  year={2023}
}

@article{dohan2022languagemodelcascades,
  title={Language model cascades},
  author={Dohan, David and Xu, Winnie and Lewkowycz, Aitor and Austin, Jacob and Bieber, David and Lopes, Raphael Gontijo and Wu, Yuhuai and Michalewski, Henryk and Saurous, Rif A and Sohl-Dickstein, Jascha and others},
  journal={arXiv preprint arXiv:2207.10342},
  year={2022}
}

@inproceedings{wu2022promptchainer,
  title={Promptchainer: Chaining large language model prompts through visual programming},
  author={Wu, Tongshuang and Jiang, Ellen and Donsbach, Aaron and Gray, Jeff and Molina, Alejandra and Terry, Michael and Cai, Carrie J},
  booktitle={CHI Conference on Human Factors in Computing Systems Extended Abstracts},
  pages={1--10},
  year={2022}
}

@inproceedings{
creswell2023selectioninference,
title={Selection-Inference: Exploiting Large Language Models for Interpretable Logical Reasoning},
author={Antonia Creswell and Murray Shanahan and Irina Higgins},
booktitle={The Eleventh International Conference on Learning Representations },
year={2023},
}

@inproceedings{tafjord2021proofwriter,
  title={ProofWriter: Generating Implications, Proofs, and Abductive Statements over Natural Language},
  author={Tafjord, Oyvind and Dalvi, Bhavana and Clark, Peter},
  booktitle={Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021},
  pages={3621--3634},
  year={2021}
}

@inproceedings{
jin2022when,
title={When to Make Exceptions: Exploring Language Models as Accounts of Human Moral Judgment},
author={Zhijing Jin and Sydney Levine and Fernando Gonzalez Adauto and Ojasv Kamal and Maarten Sap and Mrinmaya Sachan and Rada Mihalcea and Joshua B. Tenenbaum and Bernhard Sch{\"o}lkopf},
booktitle={Advances in Neural Information Processing Systems},
editor={Alice H. Oh and Alekh Agarwal and Danielle Belgrave and Kyunghyun Cho},
year={2022},
}


@article{ganguli2023capacitymoralcorrection,
  title={The capacity for moral self-correction in large language models},
  author={Ganguli, Deep and Askell, Amanda and Schiefer, Nicholas and Liao, Thomas and Luko{\v{s}}i{\=u}t{\.e}, Kamil{\.e} and Chen, Anna and Goldie, Anna and Mirhoseini, Azalia and Olsson, Catherine and Hernandez, Danny and others},
  journal={arXiv preprint arXiv:2302.07459},
  year={2023}
}

@article{bai2022constitutional,
  title={Constitutional {AI}: Harmlessness from {AI} feedback},
  author={Bai, Yuntao and Kadavath, Saurav and Kundu, Sandipan and Askell, Amanda and Kernion, Jackson and Jones, Andy and Chen, Anna and Goldie, Anna and Mirhoseini, Azalia and McKinnon, Cameron and others},
  journal={arXiv preprint arXiv:2212.08073},
  year={2022}
}


@inproceedings{
zhang2023planning,
title={Planning with Large Language Models for Code Generation},
author={Shun Zhang and Zhenfang Chen and Yikang Shen and Mingyu Ding and Joshua B. Tenenbaum and Chuang Gan},
booktitle={The Eleventh International Conference on Learning Representations },
year={2023},
}

@misc{wang2023describe,
      title={Describe, Explain, Plan and Select: Interactive Planning with Large Language Models Enables Open-World Multi-Task Agents}, 
      author={Zihao Wang and Shaofei Cai and Anji Liu and Xiaojian Ma and Yitao Liang},
      year={2023},
      eprint={2302.01560},
      archivePrefix={arXiv},
      primaryClass={cs.AI}
}


@article{inner2022huang,
  title={Inner monologue: Embodied reasoning through planning with language models},
  author={Huang, Wenlong and Xia, Fei and Xiao, Ted and Chan, Harris and Liang, Jacky and Florence, Pete and Zeng, Andy and Tompson, Jonathan and Mordatch, Igor and Chebotar, Yevgen and others},
  journal={arXiv preprint arXiv:2207.05608},
  year={2022}
}


@article{Radford2018ImprovingLU,
  title={Improving language understanding by generative pre-training},
  author={Radford, Alec and Narasimhan, Karthik and Salimans, Tim and Sutskever, Ilya and others},
  year={2018},
  publisher={OpenAI},
  journal={OpenAI blog},
}

@article{Radford2019LanguageMA,
  title={Language models are unsupervised multitask learners},
  author={Radford, Alec and Wu, Jeffrey and Child, Rewon and Luan, David and Amodei, Dario and Sutskever, Ilya and others},
  journal={OpenAI blog},
  volume={1},
  number={8},
  pages={9},
  year={2019}
}

@inproceedings{NEURIPS2020_1457c0d6,
 author = {Brown, Tom and Mann, Benjamin and Ryder, Nick and Subbiah, Melanie and Kaplan, Jared D and Dhariwal, Prafulla and Neelakantan, Arvind and Shyam, Pranav and Sastry, Girish and Askell, Amanda and Agarwal, Sandhini and Herbert-Voss, Ariel and Krueger, Gretchen and Henighan, Tom and Child, Rewon and Ramesh, Aditya and Ziegler, Daniel and Wu, Jeffrey and Winter, Clemens and Hesse, Chris and Chen, Mark and Sigler, Eric and Litwin, Mateusz and Gray, Scott and Chess, Benjamin and Clark, Jack and Berner, Christopher and McCandlish, Sam and Radford, Alec and Sutskever, Ilya and Amodei, Dario},
 booktitle = {Advances in Neural Information Processing Systems},
 editor = {H. Larochelle and M. Ranzato and R. Hadsell and M.F. Balcan and H. Lin},
 pages = {1877--1901},
 publisher = {Curran Associates, Inc.},
 title = {Language Models are Few-Shot Learners},
 url = {https://proceedings.neurips.cc/paper_files/paper/2020/file/1457c0d6bfcb4967418bfb8ac142f64a-Paper.pdf},
 volume = {33},
 year = {2020}
}


@article{wallace2022automated,
  title={Automated Crossword Solving},
  author={Wallace, Eric and Tomlin, Nicholas and Xu, Albert and Yang, Kevin and Pathak, Eshaan and Ginsberg, Matthew and Klein, Dan},
  journal={arXiv preprint arXiv:2205.09665},
  year={2022}
}

@article{liu2021makes,
  title={{What Makes Good In-Context Examples for GPT-3 ?}},
  author={Liu, Jiachang and Shen, Dinghan and Zhang, Yizhe and Dolan, Bill and Carin, Lawrence and Chen, Weizhu},
  journal={arXiv preprint arXiv:2101.06804},
  year={2021}
}

@article{touvron2023llama,
  title={Llama: Open and efficient foundation language models},
  author={Touvron, Hugo and Lavril, Thibaut and Izacard, Gautier and Martinet, Xavier and Lachaux, Marie-Anne and Lacroix, Timoth{\'e}e and Rozi{\`e}re, Baptiste and Goyal, Naman and Hambro, Eric and Azhar, Faisal and others},
  journal={arXiv preprint arXiv:2302.13971},
  year={2023}
}


@article{chowdhery2022palm,
  title={Palm: Scaling language modeling with pathways},
  author={Chowdhery, Aakanksha and Narang, Sharan and Devlin, Jacob and Bosma, Maarten and Mishra, Gaurav and Roberts, Adam and Barham, Paul and Chung, Hyung Won and Sutton, Charles and Gehrmann, Sebastian and others},
  journal={arXiv preprint arXiv:2204.02311},
  year={2022}
}


@article{khot2022decomposed,
  title={Decomposed prompting: A modular approach for solving complex tasks},
  author={Khot, Tushar and Trivedi, Harsh and Finlayson, Matthew and Fu, Yao and Richardson, Kyle and Clark, Peter and Sabharwal, Ashish},
  journal={arXiv preprint arXiv:2210.02406},
  year={2022}
}

@article{zhou2022least,
  title={Least-to-most prompting enables complex reasoning in large language models},
  author={Zhou, Denny and Sch{\"a}rli, Nathanael and Hou, Le and Wei, Jason and Scales, Nathan and Wang, Xuezhi and Schuurmans, Dale and Cui, Claire and Bousquet, Olivier and Le, Quoc and others},
  journal={arXiv preprint arXiv:2205.10625},
  year={2022}
}

@inproceedings{guu2020retrieval,
  title={Retrieval augmented language model pre-training},
  author={Guu, Kelvin and Lee, Kenton and Tung, Zora and Pasupat, Panupong and Chang, Mingwei},
  booktitle={International conference on machine learning},
  pages={3929--3938},
  year={2020}
}

@article{mialon2023augmented,
  title={Augmented language models: a survey},
  author={Mialon, Gr{\'e}goire and Dess{\`\i}, Roberto and Lomeli, Maria and Nalmpantis, Christoforos and Pasunuru, Ram and Raileanu, Roberta and Rozi{\`e}re, Baptiste and Schick, Timo and Dwivedi-Yu, Jane and Celikyilmaz, Asli and others},
  journal={arXiv preprint arXiv:2302.07842},
  year={2023}
}

@article{lewis2020retrieval,
  title={{Retrieval-augmented generation for knowledge-intensive NLP tasks}},
  author={Lewis, Patrick and Perez, Ethan and Piktus, Aleksandra and Petroni, Fabio and Karpukhin, Vladimir and Goyal, Naman and K{\"u}ttler, Heinrich and Lewis, Mike and Yih, Wen-tau and Rockt{\"a}schel, Tim and others},
  journal={Advances in Neural Information Processing Systems},
  volume={33},
  pages={9459--9474},
  year={2020}
}

@article{yao2022react,
  title={React: Synergizing reasoning and acting in language models},
  author={Yao, Shunyu and Zhao, Jeffrey and Yu, Dian and Du, Nan and Shafran, Izhak and Narasimhan, Karthik and Cao, Yuan},
  journal={arXiv preprint arXiv:2210.03629},
  year={2022}
}

@article{schick2023toolformer,
  title={Toolformer: Language models can teach themselves to use tools},
  author={Schick, Timo and Dwivedi-Yu, Jane and Dess{\`\i}, Roberto and Raileanu, Roberta and Lomeli, Maria and Zettlemoyer, Luke and Cancedda, Nicola and Scialom, Thomas},
  journal={arXiv preprint arXiv:2302.04761},
  year={2023}
}

@inproceedings{borgeaud2022improving,
  title={Improving language models by retrieving from trillions of tokens},
  author={Borgeaud, Sebastian and Mensch, Arthur and Hoffmann, Jordan and Cai, Trevor and Rutherford, Eliza and Millican, Katie and Van Den Driessche, George Bm and Lespiau, Jean-Baptiste and Damoc, Bogdan and Clark, Aidan and others},
  booktitle={International Conference on Machine Learning},
  pages={2206--2240},
  year={2022},
}

@article{tang2023toolalpaca,
  title={{ToolAlpaca: Generalized Tool Learning for Language Models with 3000 Simulated Cases}},
  author={Tang, Qiaoyu and Deng, Ziliang and Lin, Hongyu and Han, Xianpei and Liang, Qiao and Sun, Le},
  journal={arXiv preprint arXiv:2306.05301},
  year={2023}
}

@article{song2023restgpt,
  title={RestGPT: Connecting Large Language Models with Real-World Applications via RESTful APIs},
  author={Song, Yifan and Xiong, Weimin and Zhu, Dawei and Li, Cheng and Wang, Ke and Tian, Ye and Li, Sujian},
  journal={arXiv preprint arXiv:2306.06624},
  year={2023}
}

@article{yao2023tree,
  title={Tree of thoughts: Deliberate problem solving with large language models},
  author={Yao, Shunyu and Yu, Dian and Zhao, Jeffrey and Shafran, Izhak and Griffiths, Thomas L and Cao, Yuan and Narasimhan, Karthik},
  journal={arXiv preprint arXiv:2305.10601},
  year={2023}
}

@article{hao2023reasoning,
  title={Reasoning with language model is planning with world model},
  author={Hao, Shibo and Gu, Yi and Ma, Haodi and Hong, Joshua Jiahua and Wang, Zhen and Wang, Daisy Zhe and Hu, Zhiting},
  journal={arXiv preprint arXiv:2305.14992},
  year={2023}
}

@article{shinn2023reflexion,
  title={Reflexion: Language Agents with Verbal Reinforcement Learning},
  author={Shinn, Noah and Cassano, Federico and Labash, Beck and Gopinath, Ashwin and Narasimhan, Karthik and Yao, Shunyu},
  journal={arXiv preprint arXiv:2303.11366},
  year={2023}
}

@article{branavan2012learning,
  title={Learning to win by reading manuals in a {M}onte-{C}arlo framework},
  author={Branavan, SRK and Silver, David and Barzilay, Regina},
  journal={Journal of Artificial Intelligence Research},
  volume={43},
  pages={661--704},
  year={2012}
}

@article{park2023generative,
  title={Generative agents: Interactive simulacra of human behavior},
  author={Park, Joon Sung and O'Brien, Joseph C and Cai, Carrie J and Morris, Meredith Ringel and Liang, Percy and Bernstein, Michael S},
  journal={arXiv preprint arXiv:2304.03442},
  year={2023}
}

@article{madaan2023self,
  title={Self-refine: Iterative refinement with self-feedback},
  author={Madaan, Aman and Tandon, Niket and Gupta, Prakhar and Hallinan, Skyler and Gao, Luyu and Wiegreffe, Sarah and Alon, Uri and Dziri, Nouha and Prabhumoye, Shrimai and Yang, Yiming and others},
  journal={arXiv preprint arXiv:2303.17651},
  year={2023}
}

@article{griffiths2020understanding,
  title={Understanding human intelligence through human limitations},
  author={Griffiths, Thomas L},
  journal={Trends in Cognitive Sciences},
  volume={24},
  number={11},
  pages={873--883},
  year={2020},
  publisher={Elsevier}
}

@article{zhou2022large,
  title={Large language models are human-level prompt engineers},
  author={Zhou, Yongchao and Muresanu, Andrei Ioan and Han, Ziwen and Paster, Keiran and Pitis, Silviu and Chan, Harris and Ba, Jimmy},
  journal={arXiv preprint arXiv:2211.01910},
  year={2022}
}

@article{ahn2022can,
  title={Do as {I} can, not as {I} say: Grounding language in robotic affordances},
  author={Ahn, Michael and Brohan, Anthony and Brown, Noah and Chebotar, Yevgen and Cortes, Omar and David, Byron and Finn, Chelsea and Fu, Chuyuan and Gopalakrishnan, Keerthana and Hausman, Karol and others},
  journal={arXiv preprint arXiv:2204.01691},
  year={2022}
}

@article{shen2023hugginggpt,
  title={Hugginggpt: Solving ai tasks with chatgpt and its friends in huggingface},
  author={Shen, Yongliang and Song, Kaitao and Tan, Xu and Li, Dongsheng and Lu, Weiming and Zhuang, Yueting},
  journal={arXiv preprint arXiv:2303.17580},
  year={2023}
}

@article{zeng2022socratic,
  title={Socratic models: Composing zero-shot multimodal reasoning with language},
  author={Zeng, Andy and Attarian, Maria and Ichter, Brian and Choromanski, Krzysztof and Wong, Adrian and Welker, Stefan and Tombari, Federico and Purohit, Aveek and Ryoo, Michael and Sindhwani, Vikas and others},
  journal={arXiv preprint arXiv:2204.00598},
  year={2022}
}

@inproceedings{huang2022language,
  title={Language models as zero-shot planners: Extracting actionable knowledge for embodied agents},
  author={Huang, Wenlong and Abbeel, Pieter and Pathak, Deepak and Mordatch, Igor},
  booktitle={International Conference on Machine Learning},
  pages={9118--9147},
  year={2022},
}


@article{liu2022mind,
  title={Mind's Eye: Grounded Language Model Reasoning through Simulation},
  author={Liu, Ruibo and Wei, Jason and Gu, Shixiang Shane and Wu, Te-Yen and Vosoughi, Soroush and Cui, Claire and Zhou, Denny and Dai, Andrew M},
  journal={arXiv preprint arXiv:2210.05359},
  year={2022}
}

@inproceedings{gao2023pal,
  title={Pal: Program-aided language models},
  author={Gao, Luyu and Madaan, Aman and Zhou, Shuyan and Alon, Uri and Liu, Pengfei and Yang, Yiming and Callan, Jamie and Neubig, Graham},
  booktitle={International Conference on Machine Learning},
  pages={10764--10799},
  year={2023},
}

@article{yao2020keep,
  title={{Keep CALM and explore: Language models for action generation in text-based games}},
  author={Yao, Shunyu and Rao, Rohan and Hausknecht, Matthew and Narasimhan, Karthik},
  journal={arXiv preprint arXiv:2010.02903},
  year={2020}
}

@article{yao2021reading,
  title={Reading and acting while blindfolded: The need for semantics in text game agents},
  author={Yao, Shunyu and Narasimhan, Karthik and Hausknecht, Matthew},
  journal={arXiv preprint arXiv:2103.13552},
  year={2021}
}

@article{weston2014memory,
  title={Memory networks},
  author={Weston, Jason and Chopra, Sumit and Bordes, Antoine},
  journal={arXiv preprint arXiv:1410.3916},
  year={2014}
}

@article{rubin2021learning,
  title={Learning to retrieve prompts for in-context learning},
  author={Rubin, Ohad and Herzig, Jonathan and Berant, Jonathan},
  journal={arXiv preprint arXiv:2112.08633},
  year={2021}
}


@article{chen2017reading,
  title={{Reading Wikipedia to answer open-domain questions}},
  author={Chen, Danqi and Fisch, Adam and Weston, Jason and Bordes, Antoine},
  journal={arXiv preprint arXiv:1704.00051},
  year={2017}
}


@inproceedings{narasimhan2018deep,
  bibtex_show = {true},
  title = {Deep Transfer in Reinforcement Learning by Language Grounding},
  author = {Narasimhan, Karthik and Barzilay, Regina and Jaakkola, Tommi},
  booktitle = {Journal of Artificial Intelligence Research (JAIR)},
  year = {2018},
  code = {https://github.com/karthikncode/Grounded-RL-Transfer},
  html = {https://arxiv.org/pdf/1708.00133.pdf},
  tag = {NLP},
  tagg = {RL}
}


@inproceedings{wang2021grounding,
  bibtex_show = {true},
  title = {Grounding Language to Entities and Dynamics for Generalization in Reinforcement Learning},
  author = {Hanjie, Austin W. and Zhong, Victor and Narasimhan, Karthik},
  booktitle = {International Conference on Machine Learning (ICML)},
  year = {2021},
  html = {https://arxiv.org/abs/2101.07393},
  tag = {NLP},
  tagg = {RL}
}


@inproceedings{cote2019textworld,
  title={Textworld: A learning environment for text-based games},
  author={C{\^o}t{\'e}, Marc-Alexandre and K{\'a}d{\'a}r, Akos and Yuan, Xingdi and Kybartas, Ben and Barnes, Tavian and Fine, Emery and Moore, James and Hausknecht, Matthew and El Asri, Layla and Adada, Mahmoud and others},
  booktitle={Computer Games: 7th Workshop, CGW 2018},
  pages={41--75},
  year={2019},
  organization={Springer}
}

@article{shridhar2020alfworld,
  title={Alfworld: Aligning text and embodied environments for interactive learning},
  author={Shridhar, Mohit and Yuan, Xingdi and C{\^o}t{\'e}, Marc-Alexandre and Bisk, Yonatan and Trischler, Adam and Hausknecht, Matthew},
  journal={arXiv preprint arXiv:2010.03768},
  year={2020}
}

@article{wang2022scienceworld,
  title={ScienceWorld: Is your Agent Smarter than a 5th Grader?},
  author={Wang, Ruoyao and Jansen, Peter and C{\^o}t{\'e}, Marc-Alexandre and Ammanabrolu, Prithviraj},
  journal={arXiv preprint arXiv:2203.07540},
  year={2022}
}

@article{parisi2022talm,
  title={Talm: Tool augmented language models},
  author={Parisi, Aaron and Zhao, Yao and Fiedel, Noah},
  journal={arXiv preprint arXiv:2205.12255},
  year={2022}
}


@article{yao2022webshop,
  title={Webshop: Towards scalable real-world web interaction with grounded language agents},
  author={Yao, Shunyu and Chen, Howard and Yang, John and Narasimhan, Karthik},
  journal={Advances in Neural Information Processing Systems},
  volume={35},
  pages={20744--20757},
  year={2022}
}

@article{chomsky1956three,
  title={Three models for the description of language},
  author={Chomsky, Noam},
  journal={IRE Transactions on information theory},
  volume={2},
  number={3},
  pages={113--124},
  year={1956},
  publisher={IEEE}
}

@article{alayrac2022flamingo,
  title={Flamingo: a visual language model for few-shot learning},
  author={Alayrac, Jean-Baptiste and Donahue, Jeff and Luc, Pauline and Miech, Antoine and Barr, Iain and Hasson, Yana and Lenc, Karel and Mensch, Arthur and Millican, Katherine and Reynolds, Malcolm and others},
  journal={Advances in Neural Information Processing Systems},
  volume={35},
  pages={23716--23736},
  year={2022}
}


@article{zhou2023webarena,
  title={{WebArena: A Realistic Web Environment for Building Autonomous Agents}},
  author={Zhou, Shuyan and Xu, Frank F and Zhu, Hao and Zhou, Xuhui and Lo, Robert and Sridhar, Abishek and Cheng, Xianyi and Bisk, Yonatan and Fried, Daniel and Alon, Uri and others},
  journal={arXiv preprint arXiv:2307.13854},
  year={2023}
}


@article{gur2023real,
  title={A real-world webagent with planning, long context understanding, and program synthesis},
  author={Gur, Izzeddin and Furuta, Hiroki and Huang, Austin and Safdari, Mustafa and Matsuo, Yutaka and Eck, Douglas and Faust, Aleksandra},
  journal={arXiv preprint arXiv:2307.12856},
  year={2023}
}

@article{deng2023mind2web,
  title={Mind2{W}eb: Towards a Generalist Agent for the Web},
  author={Deng, Xiang and Gu, Yu and Zheng, Boyuan and Chen, Shijie and Stevens, Samuel and Wang, Boshi and Sun, Huan and Su, Yu},
  journal={arXiv preprint arXiv:2306.06070},
  year={2023}
}

@article{qin2023toolllm,
  title={ToolLLM: Facilitating Large Language Models to Master 16000+ Real-world APIs},
  author={Qin, Yujia and Liang, Shihao and Ye, Yining and Zhu, Kunlun and Yan, Lan and Lu, Yaxi and Lin, Yankai and Cong, Xin and Tang, Xiangru and Qian, Bill and others},
  journal={arXiv preprint arXiv:2307.16789},
  year={2023}
}

@article{le2022coderl,
  title={Coderl: Mastering code generation through pretrained models and deep reinforcement learning},
  author={Le, Hung and Wang, Yue and Gotmare, Akhilesh Deepak and Savarese, Silvio and Hoi, Steven Chu Hong},
  journal={Advances in Neural Information Processing Systems},
  volume={35},
  pages={21314--21328},
  year={2022}
}

@inproceedings{ni2023lever,
  title={Lever: Learning to verify language-to-code generation with execution},
  author={Ni, Ansong and Iyer, Srini and Radev, Dragomir and Stoyanov, Veselin and Yih, Wen-tau and Wang, Sida and Lin, Xi Victoria},
  booktitle={International Conference on Machine Learning},
  pages={26106--26128},
  year={2023},
}

@article{yang2023intercode,
  title={InterCode: Standardizing and Benchmarking Interactive Coding with Execution Feedback},
  author={Yang, John and Prabhakar, Akshara and Narasimhan, Karthik and Yao, Shunyu},
  journal={arXiv preprint arXiv:2306.14898},
  year={2023}
}

@inproceedings{liang2023code,
  title={Code as policies: Language model programs for embodied control},
  author={Liang, Jacky and Huang, Wenlong and Xia, Fei and Xu, Peng and Hausman, Karol and Ichter, Brian and Florence, Pete and Zeng, Andy},
  booktitle={2023 IEEE International Conference on Robotics and Automation (ICRA)},
  pages={9493--9500},
  year={2023},
}

@inproceedings{singh2023progprompt,
  title={Progprompt: Generating situated robot task plans using large language models},
  author={Singh, Ishika and Blukis, Valts and Mousavian, Arsalan and Goyal, Ankit and Xu, Danfei and Tremblay, Jonathan and Fox, Dieter and Thomason, Jesse and Garg, Animesh},
  booktitle={2023 IEEE International Conference on Robotics and Automation (ICRA)},
  pages={11523--11530},
  year={2023},
}


@article{driess2023palm,
  title={{PALM-E: An embodied multimodal language model}},
  author={Driess, Danny and Xia, Fei and Sajjadi, Mehdi SM and Lynch, Corey and Chowdhery, Aakanksha and Ichter, Brian and Wahid, Ayzaan and Tompson, Jonathan and Vuong, Quan and Yu, Tianhe and others},
  journal={arXiv preprint arXiv:2303.03378},
  year={2023}
}

@article{ren2023robots,
  title={Robots that ask for help: Uncertainty alignment for large language model planners},
  author={Ren, Allen Z and Dixit, Anushri and Bodrova, Alexandra and Singh, Sumeet and Tu, Stephen and Brown, Noah and Xu, Peng and Takayama, Leila and Xia, Fei and Varley, Jake and others},
  journal={arXiv preprint arXiv:2307.01928},
  year={2023}
}


@article{gao2020making,
  title={Making pre-trained language models better few-shot learners},
  author={Gao, Tianyu and Fisch, Adam and Chen, Danqi},
  journal={arXiv preprint arXiv:2012.15723},
  year={2020}
}

@inproceedings{ellis2021dreamcoder,
  title={Dreamcoder: Bootstrapping inductive program synthesis with wake-sleep library learning},
  author={Ellis, Kevin and Wong, Catherine and Nye, Maxwell and Sabl{\'e}-Meyer, Mathias and Morales, Lucas and Hewitt, Luke and Cary, Luc and Solar-Lezama, Armando and Tenenbaum, Joshua B},
  booktitle={Proceedings of the 42nd ACM SIGPLAN International Conference on Programming Language Design and Implementation},
  pages={835--850},
  year={2021}
}

@article{zelikman2022star,
  title={{STaR: Bootstrapping reasoning with reasoning}},
  author={Zelikman, Eric and Wu, Yuhuai and Mu, Jesse and Goodman, Noah},
  journal={Advances in Neural Information Processing Systems},
  volume={35},
  pages={15476--15488},
  year={2022}
}

@article{huang2022large,
  title={Large language models can self-improve},
  author={Huang, Jiaxin and Gu, Shixiang Shane and Hou, Le and Wu, Yuexin and Wang, Xuezhi and Yu, Hongkun and Han, Jiawei},
  journal={arXiv preprint arXiv:2210.11610},
  year={2022}
}

@inproceedings{zhou2022docprompting,
  title={Docprompting: Generating code by retrieving the docs},
  author={Zhou, Shuyan and Alon, Uri and Xu, Frank F and Jiang, Zhengbao and Neubig, Graham},
  booktitle={The Eleventh International Conference on Learning Representations},
  year={2022}
}


@article{brohan2023rt,
  title={R{T}-2: Vision-Language-Action Models Transfer Web Knowledge to Robotic Control},
  author={Brohan, Anthony and Brown, Noah and Carbajal, Justice and Chebotar, Yevgen and Chen, Xi and Choromanski, Krzysztof and Ding, Tianli and Driess, Danny and Dubey, Avinava and Finn, Chelsea and others},
  journal={arXiv preprint arXiv:2307.15818},
  year={2023}
}

@article{huang2023instruct2act,
  title={{Instruct2Act: Mapping Multi-modality Instructions to Robotic Actions with Large Language Model}},
  author={Huang, Siyuan and Jiang, Zhengkai and Dong, Hao and Qiao, Yu and Gao, Peng and Li, Hongsheng},
  journal={arXiv preprint arXiv:2305.11176},
  year={2023}
}

@article{brohan2022rt,
  title={R{T}-1: Robotics transformer for real-world control at scale},
  author={Brohan, Anthony and Brown, Noah and Carbajal, Justice and Chebotar, Yevgen and Dabis, Joseph and Finn, Chelsea and Gopalakrishnan, Keerthana and Hausman, Karol and Herzog, Alex and Hsu, Jasmine and others},
  journal={arXiv preprint arXiv:2212.06817},
  year={2022}
}

@article{weng2023prompt,
  title   = "LLM-powered Autonomous Agents",
  author  = "Weng, Lilian",
  journal = "lilianweng.github.io",
  year    = "2023",
  month   = "Jun",
  url     = "https://lilianweng.github.io/posts/2023-06-23-agent/"
}


@article{Nilsson1984ShakeyTR,
  title={Shakey the Robot},
  author={Nils J. Nilsson},
  year={1984},
  journal={Technical Note}
}

@article{chen2021evaluating,
  title={Evaluating large language models trained on code},
  author={Chen, Mark and Tworek, Jerry and Jun, Heewoo and Yuan, Qiming and Pinto, Henrique Ponde de Oliveira and Kaplan, Jared and Edwards, Harri and Burda, Yuri and Joseph, Nicholas and Brockman, Greg and others},
  journal={arXiv preprint arXiv:2107.03374},
  year={2021}
}


@article{radford2019language,
  title={Language models are unsupervised multitask learners},
  author={Radford, Alec and Wu, Jeffrey and Child, Rewon and Luan, David and Amodei, Dario and Sutskever, Ilya and others},
  journal={OpenAI blog},
  volume={1},
  number={8},
  pages={9},
  year={2019}
}

@article{
wei2022emergent,
title={Emergent Abilities of Large Language Models},
author={Jason Wei and Yi Tay and Rishi Bommasani and Colin Raffel and Barret Zoph and Sebastian Borgeaud and Dani Yogatama and Maarten Bosma and Denny Zhou and Donald Metzler and Ed H. Chi and Tatsunori Hashimoto and Oriol Vinyals and Percy Liang and Jeff Dean and William Fedus},
journal={Transactions on Machine Learning Research},
issn={2835-8856},
year={2022},
note={Survey Certification}
}


@misc{tang2023referral,
      title={Referral Augmentation for Zero-Shot Information Retrieval}, 
      author={Michael Tang and Shunyu Yao and John Yang and Karthik Narasimhan},
      year={2023},
      eprint={2305.15098},
      archivePrefix={arXiv},
      primaryClass={cs.CL}
}

@misc{nogueira2019document,
      title={Document Expansion by Query Prediction}, 
      author={Rodrigo Nogueira and Wei Yang and Jimmy Lin and Kyunghyun Cho},
      year={2019},
      eprint={1904.08375},
      archivePrefix={arXiv},
      primaryClass={cs.IR}
}

@article{wang2023query2doc,
      title={Query2doc: Query Expansion with Large Language Models}, 
      author={Liang Wang and Nan Yang and Furu Wei},
    journal={arXiv preprint arXiv:2303.07678},
      year={2023},
}


@article{yao2023impact,
  title   = "Language Agents in the Digital World: Opportunities and Risks",
  author  = "Yao, Shunyu and Narasimhan, Karthik",
  journal = "princeton-nlp.github.io",
  year    = "2023",
  month   = "Jul",
  url     = "https://princeton-nlp.github.io/language-agent-impact/"
}

@book{jurafsky2000speech,
  title={Speech \& language processing},
  author={Jurafsky, Dan},
  year={2000},
  publisher={Pearson Education India}
}

@article{Li2023StarCoderMT,
  title={StarCoder: may the source be with you!},
  author={Raymond Li and Loubna Ben Allal and Yangtian Zi and Niklas Muennighoff and Denis Kocetkov and Chenghao Mou and Marc Marone and Christopher Akiki and Jia Li and Jenny Chim and Qian Liu and Evgenii Zheltonozhskii and Terry Yue Zhuo and Thomas Wang and Olivier Dehaene and Mishig Davaadorj and Joel Lamy-Poirier and Jo{\~a}o Monteiro and Oleh Shliazhko and Nicolas Gontier and Nicholas Meade and Armel Zebaze and Ming-Ho Yee and Logesh Kumar Umapathi and Jian Zhu and Benjamin Lipkin and Muhtasham Oblokulov and Zhiruo Wang and Rudra Murthy and Jason Stillerman and Siva Sankalp Patel and Dmitry Abulkhanov and Marco Zocca and Manan Dey and Zhihan Zhang and Nourhan Fahmy and Urvashi Bhattacharyya and W. Yu and Swayam Singh and Sasha Luccioni and Paulo Villegas and Maxim Kunakov and Fedor Zhdanov and Manuel Romero and Tony Lee and Nadav Timor and Jennifer Ding and Claire Schlesinger and Hailey Schoelkopf and Jana Ebert and Tri Dao and Mayank Mishra and Alexander Gu and Jennifer Robinson and Carolyn Jane Anderson and Brendan Dolan-Gavitt and Danish Contractor and Siva Reddy and Daniel Fried and Dzmitry Bahdanau and Yacine Jernite and Carlos Mu{\~n}oz Ferrandis and Sean M. Hughes and Thomas Wolf and Arjun Guha and Leandro von Werra and Harm de Vries},
  journal={ArXiv},
  year={2023},
  volume={abs/2305.06161},
}

@article{Rozire2023CodeLO,
  title={Code Llama: Open Foundation Models for Code},
  author={Baptiste Rozi{\`e}re and Jonas Gehring and Fabian Gloeckle and Sten Sootla and Itai Gat and Xiaoqing Tan and Yossi Adi and Jingyu Liu and Tal Remez and J{\'e}r{\'e}my Rapin and Artyom Kozhevnikov and I. Evtimov and Joanna Bitton and Manish P Bhatt and Cristian Cant{\'o}n Ferrer and Aaron Grattafiori and Wenhan Xiong and Alexandre D'efossez and Jade Copet and Faisal Azhar and Hugo Touvron and Louis Martin and Nicolas Usunier and Thomas Scialom and Gabriel Synnaeve},
  journal={ArXiv},
  year={2023},
  volume={abs/2308.12950},
}

@article{Li2022CompetitionlevelCG,
  title={Competition-level code generation with AlphaCode},
  author={Yujia Li and David H. Choi and Junyoung Chung and Nate Kushman and Julian Schrittwieser and R{\'e}mi Leblond and Tom and Eccles and James Keeling and Felix Gimeno and Agustin Dal Lago and Thomas Hubert and Peter Choy and Cyprien de and Masson d’Autume and Igor Babuschkin and Xinyun Chen and Po-Sen Huang and Johannes Welbl and Sven Gowal and Alexey and Cherepanov and James Molloy and Daniel Jaymin Mankowitz and Esme Sutherland Robson and Pushmeet Kohli and Nando de and Freitas and Koray Kavukcuoglu and Oriol Vinyals},
  journal={Science},
  year={2022},
  volume={378},
  pages={1092 - 1097},
}

@article{Meier2021LanguageME,
  title={Language models enable zero-shot prediction of the effects of mutations on protein function},
  author={Joshua Meier and Roshan Rao and Robert Verkuil and Jason Liu and Tom Sercu and Alexander Rives},
  journal={bioRxiv},
  year={2021},
}

@inproceedings{saycan2022arxiv,
    title={Do As I Can and Not As I Say: Grounding Language in Robotic Affordances},
    author={Michael Ahn and Anthony Brohan and Noah Brown and Yevgen Chebotar and Omar Cortes and Byron David and Chelsea Finn and Chuyuan Fu and Keerthana Gopalakrishnan and Karol Hausman and Alex Herzog and Daniel Ho and Jasmine Hsu and Julian Ibarz and Brian Ichter and Alex Irpan and Eric Jang and Rosario Jauregui Ruano and Kyle Jeffrey and Sally Jesmonth and Nikhil Joshi and Ryan Julian and Dmitry Kalashnikov and Yuheng Kuang and Kuang-Huei Lee and Sergey Levine and Yao Lu and Linda Luu and Carolina Parada and Peter Pastor and Jornell Quiambao and Kanishka Rao and Jarek Rettinghouse and Diego Reyes and Pierre Sermanet and Nicolas Sievers and Clayton Tan and Alexander Toshev and Vincent Vanhoucke and Fei Xia and Ted Xiao and Peng Xu and Sichun Xu and Mengyuan Yan and Andy Zeng},
    booktitle={arXiv preprint arXiv:2204.01691},
    year={2022}
}
@article{Deng2023Mind2WebTA,
  title={Mind2Web: Towards a Generalist Agent for the Web},
  author={Xiang Deng and Yu Gu and Bo Zheng and Shijie Chen and Samuel Stevens and Boshi Wang and Huan Sun and Yu Su},
  journal={ArXiv},
  year={2023},
  volume={abs/2306.06070},
}


@article{qian2023communicative,
  title={Communicative agents for software development},
  author={Qian, Chen and Cong, Xin and Yang, Cheng and Chen, Weize and Su, Yusheng and Xu, Juyuan and Liu, Zhiyuan and Sun, Maosong},
  journal={arXiv preprint arXiv:2307.07924},
  year={2023}
}

@article{chan2023chateval,
  title={ChatEval: Towards Better LLM-based Evaluators through Multi-Agent Debate},
  author={Chan, Chi-Min and Chen, Weize and Su, Yusheng and Yu, Jianxuan and Xue, Wei and Zhang, Shanghang and Fu, Jie and Liu, Zhiyuan},
  journal={arXiv preprint arXiv:2308.07201},
  year={2023}
}

@article{hong2023metagpt,
  title={Metagpt: Meta programming for multi-agent collaborative framework},
  author={Hong, Sirui and Zheng, Xiawu and Chen, Jonathan and Cheng, Yuheng and Zhang, Ceyao and Wang, Zili and Yau, Steven Ka Shing and Lin, Zijuan and Zhou, Liyang and Ran, Chenyu and others},
  journal={arXiv preprint arXiv:2308.00352},
  year={2023}
}

@article{liang2023encouraging,
  title={Encouraging Divergent Thinking in Large Language Models through Multi-Agent Debate},
  author={Liang, Tian and He, Zhiwei and Jiao, Wenxiang and Wang, Xing and Wang, Yan and Wang, Rui and Yang, Yujiu and Tu, Zhaopeng and Shi, Shuming},
  journal={arXiv preprint arXiv:2305.19118},
  year={2023}
}

@article{du2023improving,
  title={Improving Factuality and Reasoning in Language Models through Multiagent Debate},
  author={Du, Yilun and Li, Shuang and Torralba, Antonio and Tenenbaum, Joshua B and Mordatch, Igor},
  journal={arXiv preprint arXiv:2305.14325},
  year={2023}
}


@article{dagan2023dynamic,
  title={Dynamic Planning with a LLM},
  author={Dagan, Gautier and Keller, Frank and Lascarides, Alex},
  journal={arXiv preprint arXiv:2308.06391},
  year={2023}
}

@article{yao2023retroformer,
  title={Retroformer: Retrospective Large Language Agents with Policy Gradient Optimization},
  author={Yao, Weiran and Heinecke, Shelby and Niebles, Juan Carlos and Liu, Zhiwei and Feng, Yihao and Xue, Le and Murthy, Rithesh and Chen, Zeyuan and Zhang, Jianguo and Arpit, Devansh and others},
  journal={arXiv preprint arXiv:2308.02151},
  year={2023}
}

@article{xu2023rewoo,
  title={ReWOO: Decoupling Reasoning from Observations for Efficient Augmented Language Models},
  author={Xu, Binfeng and Peng, Zhiyuan and Lei, Bowen and Mukherjee, Subhabrata and Liu, Yuchen and Xu, Dongkuan},
  journal={arXiv preprint arXiv:2305.18323},
  year={2023}
}

@article{lin2023swiftsage,
  title={SwiftSage: A Generative Agent with Fast and Slow Thinking for Complex Interactive Tasks},
  author={Lin, Bill Yuchen and Fu, Yicheng and Yang, Karina and Ammanabrolu, Prithviraj and Brahman, Faeze and Huang, Shiyu and Bhagavatula, Chandra and Choi, Yejin and Ren, Xiang},
  journal={arXiv preprint arXiv:2305.17390},
  year={2023}
}

@article{chen2023chatcot,
  title={ChatCoT: Tool-Augmented Chain-of-Thought Reasoning on$\backslash$$\backslash$Chat-based Large Language Models},
  author={Chen, Zhipeng and Zhou, Kun and Zhang, Beichen and Gong, Zheng and Zhao, Wayne Xin and Wen, Ji-Rong},
  journal={arXiv preprint arXiv:2305.14323},
  year={2023}
}

@article{liu2023llm+,
  title={Llm+ p: Empowering large language models with optimal planning proficiency},
  author={Liu, Bo and Jiang, Yuqian and Zhang, Xiaohan and Liu, Qiang and Zhang, Shiqi and Biswas, Joydeep and Stone, Peter},
  journal={arXiv preprint arXiv:2304.11477},
  year={2023}
}

@article{zhang2023proagent,
  title={ProAgent: Building Proactive Cooperative AI with Large Language Models},
  author={Zhang, Ceyao and Yang, Kaijie and Hu, Siyi and Wang, Zihao and Li, Guanghe and Sun, Yihang and Zhang, Cheng and Zhang, Zhaowei and Liu, Anji and Zhu, Song-Chun and others},
  journal={arXiv preprint arXiv:2308.11339},
  year={2023}
}

@article{chen2023agentverse,
  title={AgentVerse: Facilitating Multi-Agent Collaboration and Exploring Emergent Behaviors in Agents},
  author={Chen, Weize and Su, Yusheng and Zuo, Jingwei and Yang, Cheng and Yuan, Chenfei and Qian, Chen and Chan, Chi-Min and Qin, Yujia and Lu, Yaxi and Xie, Ruobing and others},
  journal={arXiv preprint arXiv:2308.10848},
  year={2023}
}

@article{nascimento2023gpt,
  title={GPT-in-the-Loop: Adaptive Decision-Making for Multiagent Systems},
  author={Nascimento, Nathalia and Alencar, Paulo and Cowan, Donald},
  journal={arXiv preprint arXiv:2308.10435},
  year={2023}
}

@article{wu2023autogen,
  title={AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation Framework},
  author={Wu, Qingyun and Bansal, Gagan and Zhang, Jieyu and Wu, Yiran and Zhang, Shaokun and Zhu, Erkang and Li, Beibin and Jiang, Li and Zhang, Xiaoyun and Wang, Chi},
  journal={arXiv preprint arXiv:2308.08155},
  year={2023}
}

@article{xu2023gentopia,
  title={Gentopia: A Collaborative Platform for Tool-Augmented LLMs},
  author={Xu, Binfeng and Liu, Xukun and Shen, Hua and Han, Zeyu and Li, Yuhan and Yue, Murong and Peng, Zhiyuan and Liu, Yuchen and Yao, Ziyu and Xu, Dongkuan},
  journal={arXiv preprint arXiv:2308.04030},
  year={2023}
}

@article{li2023prd,
  title={PRD: Peer Rank and Discussion Improve Large Language Model based Evaluations},
  author={Li, Ruosen and Patel, Teerth and Du, Xinya},
  journal={arXiv preprint arXiv:2307.02762},
  year={2023}
}

@article{wang2023unleashing,
  title={Unleashing Cognitive Synergy in Large Language Models: A Task-Solving Agent through Multi-Persona Self-Collaboration},
  author={Wang, Zhenhailong and Mao, Shaoguang and Wu, Wenshan and Ge, Tao and Wei, Furu and Ji, Heng},
  journal={arXiv preprint arXiv:2307.05300},
  year={2023}
}

@article{mandi2023roco,
  title={RoCo: Dialectic Multi-Robot Collaboration with Large Language Models},
  author={Mandi, Zhao and Jain, Shreeya and Song, Shuran},
  journal={arXiv preprint arXiv:2307.04738},
  year={2023}
}

@article{talebirad2023multi,
  title={Multi-Agent Collaboration: Harnessing the Power of Intelligent LLM Agents},
  author={Talebirad, Yashar and Nadiri, Amirhossein},
  journal={arXiv preprint arXiv:2306.03314},
  year={2023}
}

@article{colas2023augmenting,
  title={Augmenting Autotelic Agents with Large Language Models},
  author={Colas, C{\'e}dric and Teodorescu, Laetitia and Oudeyer, Pierre-Yves and Yuan, Xingdi and C{\^o}t{\'e}, Marc-Alexandre},
  journal={arXiv preprint arXiv:2305.12487},
  year={2023}
}

@article{liu2023agentbench,
  title={AgentBench: Evaluating LLMs as Agents},
  author={Liu, Xiao and Yu, Hao and Zhang, Hanchen and Xu, Yifan and Lei, Xuanyu and Lai, Hanyu and Gu, Yu and Ding, Hangliang and Men, Kaiwen and Yang, Kejuan and others},
  journal={arXiv preprint arXiv:2308.03688},
  year={2023}
}

@article{boiko2023emergent,
  title={Emergent autonomous scientific research capabilities of large language models},
  author={Boiko, Daniil A and MacKnight, Robert and Gomes, Gabe},
  journal={arXiv preprint arXiv:2304.05332},
  year={2023}
}

@article{li2023quantifying,
  title={Quantifying the Impact of Large Language Models on Collective Opinion Dynamics},
  author={Li, Chao and Su, Xing and Fan, Chao and Han, Haoying and Xue, Cong and Zheng, Chunmo},
  journal={arXiv preprint arXiv:2308.03313},
  year={2023}
}

@article{hasan2023sapien,
  title={SAPIEN: Affective Virtual Agents Powered by Large Language Models},
  author={Hasan, Masum and Ozel, Cengiz and Potter, Sammy and Hoque, Ehsan},
  journal={arXiv preprint arXiv:2308.03022},
  year={2023}
}

@article{ma2023understanding,
  title={Understanding the Benefits and Challenges of Using Large Language Model-based Conversational Agents for Mental Well-being Support},
  author={Ma, Zilin and Mei, Yiyang and Su, Zhaoyuan},
  journal={arXiv preprint arXiv:2307.15810},
  year={2023}
}


@article{blundell2016model,
  title={Model-free episodic control},
  author={Blundell, Charles and Uria, Benigno and Pritzel, Alexander and Li, Yazhe and Ruderman, Avraham and Leibo, Joel Z and Rae, Jack and Wierstra, Daan and Hassabis, Demis},
  journal={arXiv preprint arXiv:1606.04460},
  year={2016}
}

@inproceedings{pritzel2017neural,
  title={Neural episodic control},
  author={Pritzel, Alexander and Uria, Benigno and Srinivasan, Sriram and Badia, Adria Puigdomenech and Vinyals, Oriol and Hassabis, Demis and Wierstra, Daan and Blundell, Charles},
  booktitle={International conference on machine learning},
  pages={2827--2836},
  year={2017},
}

@article{hussein2017imitation,
  title={Imitation learning: A survey of learning methods},
  author={Hussein, Ahmed and Gaber, Mohamed Medhat and Elyan, Eyad and Jayne, Chrisina},
  journal={ACM Computing Surveys (CSUR)},
  volume={50},
  number={2},
  pages={1--35},
  year={2017},
  publisher={ACM New York, NY, USA}
}

@article{christiano2017deep,
  title={Deep reinforcement learning from human preferences},
  author={Christiano, Paul F and Leike, Jan and Brown, Tom and Martic, Miljan and Legg, Shane and Amodei, Dario},
  journal={Advances in neural information processing systems},
  volume={30},
  year={2017}
}


@article{izacard2021unsupervised,
  title={Unsupervised dense information retrieval with contrastive learning},
  author={Izacard, Gautier and Caron, Mathilde and Hosseini, Lucas and Riedel, Sebastian and Bojanowski, Piotr and Joulin, Armand and Grave, Edouard},
  journal={arXiv preprint arXiv:2112.09118},
  year={2021}
}


@article{browne2012survey,
  title={{A survey of Monte Carlo tree search methods}},
  author={Browne, Cameron B and Powley, Edward and Whitehouse, Daniel and Lucas, Simon M and Cowling, Peter I and Rohlfshagen, Philipp and Tavener, Stephen and Perez, Diego and Samothrakis, Spyridon and Colton, Simon},
  journal={IEEE Transactions on Computational Intelligence and AI in games},
  volume={4},
  number={1},
  pages={1--43},
  year={2012},
  publisher={IEEE}
}


@article{nguyen2022survey,
  title={A survey of machine unlearning},
  author={Nguyen, Thanh Tam and Huynh, Thanh Trung and Nguyen, Phi Le and Liew, Alan Wee-Chung and Yin, Hongzhi and Nguyen, Quoc Viet Hung},
  journal={arXiv preprint arXiv:2209.02299},
  year={2022}
}


@misc{zhang2023cumulative,
      title={Cumulative Reasoning with Large Language Models}, 
      author={Yifan Zhang and Jingqin Yang and Yang Yuan and Andrew Chi-Chih Yao},
      year={2023},
      eprint={2308.04371},
      archivePrefix={arXiv},
      primaryClass={cs.AI}
}


@article{jiang2021can,
  title={How can we know when language models know? on the calibration of language models for question answering},
  author={Jiang, Zhengbao and Araki, Jun and Ding, Haibo and Neubig, Graham},
  journal={Transactions of the Association for Computational Linguistics},
  volume={9},
  pages={962--977},
  year={2021},
  publisher={MIT Press One Rogers Street, Cambridge, MA 02142-1209, USA journals-info~…}
}

@inproceedings{braverman2020calibration,
  title={Calibration, entropy rates, and memory in language models},
  author={Braverman, Mark and Chen, Xinyi and Kakade, Sham and Narasimhan, Karthik and Zhang, Cyril and Zhang, Yi},
  booktitle={International Conference on Machine Learning},
  pages={1089--1099},
  year={2020},
}

@article{chen2022close,
  title={A Close Look into the Calibration of Pre-trained Language Models},
  author={Chen, Yangyi and Yuan, Lifan and Cui, Ganqu and Liu, Zhiyuan and Ji, Heng},
  journal={arXiv preprint arXiv:2211.00151},
  year={2022}
}


@inproceedings{liang2021towards,
  title={Towards understanding and mitigating social biases in language models},
  author={Liang, Paul Pu and Wu, Chiyu and Morency, Louis-Philippe and Salakhutdinov, Ruslan},
  booktitle={International Conference on Machine Learning},
  pages={6565--6576},
  year={2021},
}

@article{feng2023pretraining,
  title={From Pretraining Data to Language Models to Downstream Tasks: Tracking the Trails of Political Biases Leading to Unfair NLP Models},
  author={Feng, Shangbin and Park, Chan Young and Liu, Yuhan and Tsvetkov, Yulia},
  journal={arXiv preprint arXiv:2305.08283},
  year={2023}
}


@inproceedings{nguyen2022hari,
  author = {Nguyen, Khanh and Bisk, Yonatan and Daum{\'e} III, Hal},
  title = {A Framework for Learning to Request Rich and Contextually Useful Information from Humans},
  booktitle = {ICML},
  month = jul,
  year = {2022},
}

@article{xie2023olagpt,
  title={OlaGPT: Empowering LLMs With Human-like Problem-Solving Abilities},
  author={Xie, Yuanzhen and Xie, Tao and Lin, Mingxiong and Wei, WenTao and Li, Chenglin and Kong, Beibei and Chen, Lei and Zhuo, Chengxiang and Hu, Bo and Li, Zang},
  journal={arXiv preprint arXiv:2305.16334},
  year={2023}
}


@article{jinxin2023cgmi,
  title={CGMI: Configurable General Multi-Agent Interaction Framework},
  author={Jinxin, Shi and Jiabao, Zhao and Yilei, Wang and Xingjiao, Wu and Jiawen, Li and Liang, He},
  journal={arXiv preprint arXiv:2308.12503},
  year={2023}
}


@article{gao2023s,
  title={S3: Social-network Simulation System with Large Language Model-Empowered Agents},
  author={Gao, Chen and Lan, Xiaochong and Lu, Zhihong and Mao, Jinzhu and Piao, Jinghua and Wang, Huandong and Jin, Depeng and Li, Yong},
  journal={arXiv preprint arXiv:2307.14984},
  year={2023}
}


@book{haslum2019introduction,
  title={An introduction to the planning domain definition language},
  author={Haslum, Patrik and Lipovetzky, Nir and Magazzeni, Daniele and Muise, Christian and Brachman, Ronald and Rossi, Francesca and Stone, Peter},
  volume={13},
  year={2019},
  publisher={Springer}
}