-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathreferences.bib
673 lines (586 loc) · 24 KB
/
references.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
@article{abadi2016tensorflow,
title = {Tensorflow: Large-scale machine learning on heterogeneous distributed systems},
author = {Abadi, Mart{\'\i}n and Agarwal, Ashish and Barham, Paul and Brevdo, Eugene and Chen, Zhifeng and Citro, Craig and Corrado, Greg S and Davis, Andy and Dean, Jeffrey and Devin, Matthieu and others},
journal = {arXiv preprint arXiv:1603.04467},
year = {2016}
}
@article{badrinarayanan2017segnet,
title = {Segnet: A deep convolutional encoder-decoder architecture for image segmentation},
author = {Badrinarayanan, Vijay and Kendall, Alex and Cipolla, Roberto},
journal = {IEEE transactions on pattern analysis and machine intelligence},
volume = {39},
number = {12},
pages = {2481--2495},
year = {2017},
publisher = {IEEE}
}
@online{bair2019soft,
author = {Berkeley Artifical Intelligence Research},
title = {Soft Actor Critic—Deep Reinforcement Learning with Real-World Robots},
year = {2018},
note = {\url{https://bair.berkeley.edu/blog/2018/12/14/sac/}},
urldate = {2018-12-14}
}
@article{bellemare2013arcade,
title = {The arcade learning environment: An evaluation platform for general agents},
author = {Bellemare, Marc G and Naddaf, Yavar and Veness, Joel and Bowling, Michael},
journal = {Journal of Artificial Intelligence Research},
volume = {47},
pages = {253--279},
year = {2013}
}
@book{bellman2015applied,
title = {Applied dynamic programming},
author = {Bellman, Richard E and Dreyfus, Stuart E},
volume = {2050},
year = {2015},
publisher = {Princeton university press}
}
@inproceedings{bewley2019learning,
title = {Learning to drive from simulation without real world labels},
author = {Bewley, Alex and Rigley, Jessica and Liu, Yuxuan and Hawke, Jeffrey and Shen, Richard and Lam, Vinh-Dieu and Kendall, Alex},
booktitle = {2019 International Conference on Robotics and Automation (ICRA)},
pages = {4818--4824},
year = {2019},
organization = {IEEE}
}
@book{bishop2006pattern,
title = {Pattern recognition and machine learning},
author = {Bishop, Christopher M},
year = {2006},
publisher = {Springer}
}
@article{brockman2016openai,
title = {Openai gym, 2016},
author = {Brockman, Greg and Cheung, Vicki and Pettersson, Ludwig and Schneider, Jonas and Schulman, John and Tang, Jie and Zaremba, Wojciech},
journal = {arXiv preprint arXiv:1606.01540},
year = {2016}
}
@online{builtin2019pytorch,
author = {Vihar Kurama},
title = {{PyTorch vs. TensorFlow: Which Framework Is Best for Your Deep Learning Project?}},
year = {2019},
month = {February},
note = {\url{https://builtin.com/data-science/pytorch-vs-tensorflow}},
urldate = {2019-08-28}
}
@online{chara2018wild,
author = {Mohamad Charafeddine},
title = {{Reinforcement Learning in the Wild and Lessons Learned}},
year = {2018},
note = {\url{https://link.medium.com/SRUZ24Itx4}},
urldate = {2018-10-26}
}
@online{cozmo2019SDK,
author = {Anki},
title = {Github repository of {Cozmo SDK} written in Python},
year = {2019},
note = {\url{https://github.com/anki/cozmo-python-sdk}}
}
@inproceedings{deisenroth2011pilco,
title = {PILCO: A model-based and data-efficient approach to policy search},
author = {Deisenroth, Marc and Rasmussen, Carl E},
booktitle = {Proceedings of the 28th International Conference on machine learning (ICML-11)},
pages = {465--472},
year = {2011}
}
@inproceedings{deng2009imagenet,
title = {Imagenet: A large-scale hierarchical image database},
author = {Deng, Jia and Dong, Wei and Socher, Richard and Li, Li-Jia and Li, Kai and Fei-Fei, Li},
booktitle = {2009 IEEE conference on computer vision and pattern recognition},
pages = {248--255},
year = {2009},
organization = {Ieee}
}
@inproceedings{duan2016benchmarking,
title = {Benchmarking deep reinforcement learning for continuous control},
author = {Duan, Yan and Chen, Xi and Houthooft, Rein and Schulman, John and Abbeel, Pieter},
booktitle = {International Conference on Machine Learning},
pages = {1329--1338},
year = {2016}
}
@article{duchi2011adaptive,
title = {Adaptive subgradient methods for online learning and stochastic optimization},
author = {Duchi, John and Hazan, Elad and Singer, Yoram},
journal = {Journal of Machine Learning Research},
volume = {12},
number = {Jul},
pages = {2121--2159},
year = {2011}
}
@article{erhan2009visualizing,
title = {Visualizing higher-layer features of a deep network},
author = {Erhan, Dumitru and Bengio, Yoshua and Courville, Aaron and Vincent, Pascal},
journal = {University of Montreal},
volume = {1341},
number = {3},
pages = {1},
year = {2009}
}
@article{fox2015taming,
title = {Taming the noise in reinforcement learning via soft updates},
author = {Fox, Roy and Pakman, Ari and Tishby, Naftali},
journal = {arXiv preprint arXiv:1512.08562},
year = {2015}
}
@article{franccois2018introduction,
title = {An introduction to deep reinforcement learning},
author = {Fran{\c{c}}ois-Lavet, Vincent and Henderson, Peter and Islam, Riashat and Bellemare, Marc G and Pineau, Joelle and others},
journal = {Foundations and Trends{\textregistered} in Machine Learning},
volume = {11},
number = {3-4},
pages = {219--354},
year = {2018},
publisher = {Now Publishers, Inc.}
}
@article{fujimoto2018addressing,
title = {Addressing function approximation error in actor-critic methods},
author = {Fujimoto, Scott and van Hoof, Herke and Meger, David},
journal = {arXiv preprint arXiv:1802.09477},
year = {2018}
}
@inproceedings{glorot2010understanding,
title = {Understanding the difficulty of training deep feedforward neural networks},
author = {Glorot, Xavier and Bengio, Yoshua},
booktitle = {Proceedings of the thirteenth international conference on artificial intelligence and statistics},
pages = {249--256},
year = {2010}
}
@inproceedings{gu2017deep,
title = {Deep reinforcement learning for robotic manipulation with asynchronous off-policy updates},
author = {Gu, Shixiang and Holly, Ethan and Lillicrap, Timothy and Levine, Sergey},
booktitle = {2017 IEEE international conference on robotics and automation (ICRA)},
pages = {3389--3396},
year = {2017},
organization = {IEEE}
}
@inproceedings{haarnoja2017reinforcement,
title = {Reinforcement learning with deep energy-based policies},
author = {Haarnoja, Tuomas and Tang, Haoran and Abbeel, Pieter and Levine, Sergey},
booktitle = {Proceedings of the 34th International Conference on Machine Learning-Volume 70},
pages = {1352--1361},
year = {2017},
organization = {JMLR. org}
}
@article{haarnoja2018alg,
title = {Soft actor-critic algorithms and applications},
author = {Haarnoja, Tuomas and Zhou, Aurick and Hartikainen, Kristian and Tucker, George and Ha, Sehoon and Tan, Jie and Kumar, Vikash and Zhu, Henry and Gupta, Abhishek and Abbeel, Pieter and others},
journal = {arXiv preprint arXiv:1812.05905},
year = {2018}
}
@article{haarnoja2018soft,
title = {Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor},
author = {Haarnoja, Tuomas and Zhou, Aurick and Abbeel, Pieter and Levine, Sergey},
journal = {arXiv preprint arXiv:1801.01290},
year = {2018}
}
@incollection{hasselt2010double,
title = {Double Q-learning},
author = {Hado V. Hasselt},
booktitle = {Advances in Neural Information Processing Systems 23},
editor = {J. D. Lafferty and C. K. I. Williams and J. Shawe-Taylor and R. S. Zemel and A. Culotta},
pages = {2613--2621},
year = {2010},
publisher = {Curran Associates, Inc.},
url = {http://papers.nips.cc/paper/3964-double-q-learning.pdf}
}
@article{hawke2019urban,
title = {Urban Driving with Conditional Imitation Learning},
author = {Hawke, Jeffrey and Shen, Richard and Gurau, Corina and Sharma, Siddharth and Reda, Daniele and Nikolov, Nikolay and Mazur, Przemyslaw and Micklethwaite, Sean and Griffiths, Nicolas and Shah, Amar and others},
journal = {arXiv preprint arXiv:1912.00177},
year = {2019}
}
@inproceedings{henderson2018deep,
title = {Deep reinforcement learning that matters},
author = {Henderson, Peter and Islam, Riashat and Bachman, Philip and Pineau, Joelle and Precup, Doina and Meger, David},
booktitle = {Thirty-Second AAAI Conference on Artificial Intelligence},
year = {2018}
}
@inproceedings{hessel2018rainbow,
title = {Rainbow: Combining improvements in deep reinforcement learning},
author = {Hessel, Matteo and Modayil, Joseph and Van Hasselt, Hado and Schaul, Tom and Ostrovski, Georg and Dabney, Will and Horgan, Dan and Piot, Bilal and Azar, Mohammad and Silver, David},
booktitle = {Thirty-Second AAAI Conference on Artificial Intelligence},
year = {2018}
}
@article{huval2015empirical,
title = {An empirical evaluation of deep learning on highway driving},
author = {Huval, Brody and Wang, Tao and Tandon, Sameep and Kiske, Jeff and Song, Will and Pazhayampallil, Joel and Andriluka, Mykhaylo and Rajpurkar, Pranav and Migimatsu, Toki and Cheng-Yue, Royce and others},
journal = {arXiv preprint arXiv:1504.01716},
year = {2015}
}
@online{ingredientsRoboticsResearch,
author = { Plappert, Matthias and Andrychowicz, Marcin and Ray, Alex and McGrew, Bob and Baker, Bowen and Powell, Glenn and Schneider, Jonas and Tobin, Josh and Chociej, Maciek and Welinder, Peter and Kumar, Vikash and Zaremba, Wojciech},
title = {Ingredients for Robotics Research},
editor = {OpenAI blog},
month = {February},
year = {2018},
url = {\url{https://openai.com/blog/ingredients-for-robotics-research/}},
note = {\url{https://openai.com/blog/ingredients-for-robotics-research/}}
}
@article{ioffe2015batch,
title = {Batch normalization: Accelerating deep network training by reducing internal covariate shift},
author = {Ioffe, Sergey and Szegedy, Christian},
journal = {arXiv preprint arXiv:1502.03167},
year = {2015}
}
@article{kendall2018learning,
title = {Learning to Drive in a Day},
author = {Kendall, Alex and Hawke, Jeffrey and Janz, David and Mazur, Przemyslaw and Reda, Daniele and Allen, John-Mark and Lam, Vinh-Dieu and Bewley, Alex and Shah, Amar},
journal = {arXiv preprint arXiv:1807.00412},
year = {2018}
}
@inproceedings{kendall2019learning,
title = {Learning to drive in a day},
author = {Kendall, Alex and Hawke, Jeffrey and Janz, David and Mazur, Przemyslaw and Reda, Daniele and Allen, John-Mark and Lam, Vinh-Dieu and Bewley, Alex and Shah, Amar},
booktitle = {2019 International Conference on Robotics and Automation (ICRA)},
pages = {8248--8254},
year = {2019},
organization = {IEEE}
}
@online{kendall2019nowisthetime,
author = {Alex Kendall},
title = {{Now is the Time for Reinforcement Learning on Real Robots}},
year = {2019},
note = {\url{https://alexgkendall.com/reinforcement\_learning/now\_is\_the\_time\_for\_reinforcement\_learning\_on\_real\_robots/}}
}
@article{kingma2013auto,
title = {Auto-encoding variational bayes},
author = {Kingma, Diederik P and Welling, Max},
journal = {arXiv preprint arXiv:1312.6114},
year = {2013}
}
@article{kingma2014adam,
title = {Adam: A method for stochastic optimization},
author = {Kingma, Diederik P and Ba, Jimmy},
journal = {arXiv preprint arXiv:1412.6980},
year = {2014}
}
@inproceedings{konda2000actor,
title = {Actor-critic algorithms},
author = {Konda, Vijay R and Tsitsiklis, John N},
booktitle = {Advances in neural information processing systems},
pages = {1008--1014},
year = {2000}
}
@article{kullback1951information,
title = {On information and sufficiency},
author = {Kullback, Solomon and Leibler, Richard A},
journal = {The annals of mathematical statistics},
volume = {22},
number = {1},
pages = {79--86},
year = {1951},
publisher = {JSTOR}
}
@techreport{kullback1959information,
title = {Information theory and statistics},
author = {Kullback, Solomon},
year = {1959}
}
@book{lapan2018deep,
title = {Deep Reinforcement Learning Hands-On: Apply modern RL methods, with deep Q-networks, value iteration, policy gradients, TRPO, AlphaGo Zero and more},
author = {Lapan, Maxim},
year = {2018},
publisher = {Packt Publishing Ltd}
}
@article{lecun1995convolutional,
title = {Convolutional networks for images, speech, and time series},
author = {LeCun, Yann and Bengio, Yoshua and others},
journal = {The handbook of brain theory and neural networks},
volume = {3361},
number = {10},
pages = {1995},
year = {1995}
}
@article{lecun1998gradient,
title = {Gradient-based learning applied to document recognition},
author = {LeCun, Yann and Bottou, L{\'e}on and Bengio, Yoshua and Haffner, Patrick and others},
journal = {Proceedings of the IEEE},
volume = {86},
number = {11},
pages = {2278--2324},
year = {1998},
publisher = {Taipei, Taiwan}
}
@article{lecun2015deep,
title = {Deep learning},
author = {LeCun, Yann and Bengio, Yoshua and Hinton, Geoffrey},
journal = {nature},
volume = {521},
number = {7553},
pages = {436},
year = {2015},
publisher = {Nature Publishing Group}
}
@article{lillicrap2015continuous,
title = {Continuous control with deep reinforcement learning},
author = {Lillicrap, Timothy P and Hunt, Jonathan J and Pritzel, Alexander and Heess, Nicolas and Erez, Tom and Tassa, Yuval and Silver, David and Wierstra, Daan},
journal = {arXiv preprint arXiv:1509.02971},
year = {2015}
}
@article{lin1992self,
title = {Self-improving reactive agents based on reinforcement learning, planning and teaching},
author = {Lin, Long-Ji},
journal = {Machine learning},
volume = {8},
number = {3-4},
pages = {293--321},
year = {1992},
publisher = {Springer}
}
@online{mellon2017cognitive,
author = {{Carnegie Mellon University}},
title = {{15-494/694}: Cognitive Robotics},
year = {2019},
note = {\url{http://www.cs.cmu.edu/afs/cs/academic/class/15494-s17/}},
urldate = {2019-07-29}
}
@article{mnih2013playing,
title = {Playing atari with deep reinforcement learning},
author = {Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Graves, Alex and Antonoglou, Ioannis and Wierstra, Daan and Riedmiller, Martin},
journal = {arXiv preprint arXiv:1312.5602},
year = {2013}
}
@article{mnih2015human,
title = {Human-level control through deep reinforcement learning},
author = {Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Rusu, Andrei A and Veness, Joel and Bellemare, Marc G and Graves, Alex and Riedmiller, Martin and Fidjeland, Andreas K and Ostrovski, Georg and others},
journal = {Nature},
volume = {518},
number = {7540},
pages = {529},
year = {2015},
publisher = {Nature Publishing Group}
}
@inproceedings{mnih2016asynchronous,
title = {Asynchronous methods for deep reinforcement learning},
author = {Mnih, Volodymyr and Badia, Adria Puigdomenech and Mirza, Mehdi and Graves, Alex and Lillicrap, Timothy and Harley, Tim and Silver, David and Kavukcuoglu, Koray},
booktitle = {International conference on machine learning},
pages = {1928--1937},
year = {2016}
}
@online{openai2018dota,
author = {OpenAI},
title = {{OpenAI Five}},
year = {2018},
note = {\url{https://openai.com/blog/openai-five/}},
urldate = {2018-06-25}
}
@online{openai2018spinningup,
author = {OpenAI},
title = {{OpenAI} {Spinning Up}},
year = {2019},
note = {\url{https://spinningup.openai.com/en/latest/}},
urldate = {2019-07-29}
}
@online{openai2019dota,
author = {OpenAI},
title = {How to Train Your {OpenAI Five}},
year = {2019},
note = {\url{https://openai.com/blog/how-to-train-your-openai-five/}},
urldate = {2019-04-15}
}
@online{openaigymdocs,
author = {OpenAI},
title = {{OpenAI} {Gym} Documentation},
year = {2016},
note = {\url{https://gym.openai.com/docs/}},
urldate = {2019-07-29}
}
@online{openaigymgithub,
author = {OpenAI},
title = {{OpenAI} Gym},
year = {2016},
note = {\url{https://github.com/openai/gym}},
urldate = {2019-07-29}
}
@inproceedings{ort2018autonomous,
title = {Autonomous vehicle navigation in rural environments without detailed prior maps},
author = {Ort, Teddy and Paull, Liam and Rus, Daniela},
booktitle = {2018 IEEE International Conference on Robotics and Automation (ICRA)},
pages = {2040--2047},
year = {2018},
organization = {IEEE}
}
@article{paszke2017automatic,
title = {Automatic differentiation in pytorch},
author = {Paszke, Adam and Gross, Sam and Chintala, Soumith and Chanan, Gregory and Yang, Edward and DeVito, Zachary and Lin, Zeming and Desmaison, Alban and Antiga, Luca and Lerer, Adam},
year = {2017}
}
@online{raffin2019learning,
author = {Raffin, Antonin},
title = {{Learning to Drive Smoothly in Minutes}},
editor = {{Towards Data Science}},
month = {January},
year = {2019},
url = {\url{https://towardsdatascience.com/learning-to-drive-smoothly-in-minutes-450a7cdb35f4}},
note = {\url{https://towardsdatascience.com/learning-to-drive-smoothly-in-minutes-450a7cdb35f4}},
urldate = {2019-01-27}
}
@inproceedings{rawlik2013stochastic,
title = {On stochastic optimal control and reinforcement learning by approximate inference},
author = {Rawlik, Konrad and Toussaint, Marc and Vijayakumar, Sethu},
booktitle = {Twenty-Third International Joint Conference on Artificial Intelligence},
year = {2013}
}
@article{rezende2014stochastic,
title = {Stochastic backpropagation and approximate inference in deep generative models},
author = {Rezende, Danilo Jimenez and Mohamed, Shakir and Wierstra, Daan},
journal = {arXiv preprint arXiv:1401.4082},
year = {2014}
}
@article{rumelhart1988learning,
title = {Learning representations by back-propagating errors},
author = {Rumelhart, David E and Hinton, Geoffrey E and Williams, Ronald J and others},
journal = {Cognitive modeling},
volume = {5},
number = {3},
pages = {1},
year = {1988}
}
@article{schaul2015prioritized,
title = {Prioritized experience replay},
author = {Schaul, Tom and Quan, John and Antonoglou, Ioannis and Silver, David},
journal = {arXiv preprint arXiv:1511.05952},
year = {2015}
}
@article{schulman2017proximal,
title = {Proximal policy optimization algorithms},
author = {Schulman, John and Wolski, Filip and Dhariwal, Prafulla and Radford, Alec and Klimov, Oleg},
journal = {arXiv preprint arXiv:1707.06347},
year = {2017}
}
@book{shalev2014understanding,
title = {Understanding machine learning: From theory to algorithms},
author = {Shalev-Shwartz, Shai and Ben-David, Shai},
year = {2014},
publisher = {Cambridge university press}
}
@inproceedings{silver2014deterministic,
title = {Deterministic policy gradient algorithms},
author = {Silver, David and Lever, Guy and Heess, Nicolas and Degris, Thomas and Wierstra, Daan and Riedmiller, Martin},
year = {2014}
}
@unpublished{silver2015lectures,
author = {David Silver},
institution = {University College London},
year = {2015},
title = {{University College London Course on Reinforcement Learning}},
note = {\url{http://www0.cs.ucl.ac.uk/staff/d.silver/web/Teaching.html}}
}
@article{silver2016mastering,
title = {Mastering the game of Go with deep neural networks and tree search},
author = {Silver, David and Huang, Aja and Maddison, Chris J and Guez, Arthur and Sifre, Laurent and Van Den Driessche, George and Schrittwieser, Julian and Antonoglou, Ioannis and Panneershelvam, Veda and Lanctot, Marc and others},
journal = {nature},
volume = {529},
number = {7587},
pages = {484},
year = {2016},
publisher = {Nature Publishing Group}
}
@article{silver2017mastering,
title = {Mastering chess and shogi by self-play with a general reinforcement learning algorithm},
author = {Silver, David and Hubert, Thomas and Schrittwieser, Julian and Antonoglou, Ioannis and Lai, Matthew and Guez, Arthur and Lanctot, Marc and Sifre, Laurent and Kumaran, Dharshan and Graepel, Thore and others},
journal = {arXiv preprint arXiv:1712.01815},
year = {2017}
}
@article{springenberg2014striving,
title = {Striving for simplicity: The all convolutional net},
author = {Springenberg, Jost Tobias and Dosovitskiy, Alexey and Brox, Thomas and Riedmiller, Martin},
journal = {arXiv preprint arXiv:1412.6806},
year = {2014}
}
@online{stanford2019cs231n,
author = {{Stanford University}},
title = {{CS231n}: Convolutional Neural Networks for Visual Recognition},
year = {2019},
note = {\url{http://cs231n.github.io/}},
urldate = {2019-07-29}
}
@book{sutton2018reinforcement,
title = {Reinforcement learning: An introduction},
author = {Sutton, Richard S and Barto, Andrew G},
year = {2018},
publisher = {MIT press}
}
@article{tieleman2012lecture,
title = {Lecture 6.5-rmsprop: Divide the gradient by a running average of its recent magnitude},
author = {Tieleman, Tijmen and Hinton, Geoffrey},
journal = {COURSERA: Neural networks for machine learning},
volume = {4},
number = {2},
pages = {26--31},
year = {2012}
}
@inproceedings{todorov2012mujoco,
title = {Mujoco: A physics engine for model-based control},
author = {Todorov, Emanuel and Erez, Tom and Tassa, Yuval},
booktitle = {2012 IEEE/RSJ International Conference on Intelligent Robots and Systems},
pages = {5026--5033},
year = {2012},
organization = {IEEE}
}
@online{touretzky2018cozmopedia,
author = {Touretzky, Dave},
title = {{Cozmopedia Wiki}},
year = {2018},
note = {\url{https://github.com/touretzkyds/cozmopedia/wiki}},
urldate = {2019-07-29}
}
@inproceedings{toussaint2009robot,
title = {Robot trajectory optimization using approximate inference},
author = {Toussaint, Marc},
booktitle = {Proceedings of the 26th annual international conference on machine learning},
pages = {1049--1056},
year = {2009},
organization = {ACM}
}
@article{uhlenbeck1930theory,
title = {On the theory of the Brownian motion},
author = {Uhlenbeck, George E and Ornstein, Leonard S},
journal = {Physical review},
volume = {36},
number = {5},
pages = {823},
year = {1930},
publisher = {APS}
}
@inproceedings{van2016deep,
title = {Deep reinforcement learning with double q-learning},
author = {Van Hasselt, Hado and Guez, Arthur and Silver, David},
booktitle = {Thirtieth AAAI conference on artificial intelligence},
year = {2016}
}
@article{wang2015dueling,
title = {Dueling network architectures for deep reinforcement learning},
author = {Wang, Ziyu and Schaul, Tom and Hessel, Matteo and Van Hasselt, Hado and Lanctot, Marc and De Freitas, Nando},
journal = {arXiv preprint arXiv:1511.06581},
year = {2015}
}
@article{watkins1989learning,
title = {Learning from delayed rewards},
author = {Watkins, Christopher John Cornish Hellaby},
year = {1989},
publisher = {King's College, Cambridge}
}
@online{wayve2019human,
author = {WAYVE},
title = {Learning to Drive like a Human},
month = {April},
year = {2019},
note = {\url{https://wayve.ai/blog/driving-like-human}},
urldate = {2019-04-03}
}
@online{wayve2019learned,
author = {WAYVE},
title = {Learned Urban Driving},
year = {2019},
month = {December},
note = {\url{https://wayve.ai/blog/learned-urban-driving}},
urldate = {2019-12-02}
}
@article{ziebart2008maximum,
title = {Maximum entropy inverse reinforcement learning},
author = {Ziebart, Brian D and Maas, Andrew and Bagnell, J Andrew and Dey, Anind K},
year = {2008},
publisher = {figshare}
}