-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path02_Day2_ggplot_charts.html
954 lines (870 loc) · 55.8 KB
/
02_Day2_ggplot_charts.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta charset="utf-8" />
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="generator" content="pandoc" />
<title>R Camp | day 2</title>
<script src="02_Day2_ggplot_charts_files/jquery-1.11.3/jquery.min.js"></script>
<meta name="viewport" content="width=device-width, initial-scale=1" />
<link href="02_Day2_ggplot_charts_files/bootstrap-3.3.5/css/readable.min.css" rel="stylesheet" />
<script src="02_Day2_ggplot_charts_files/bootstrap-3.3.5/js/bootstrap.min.js"></script>
<script src="02_Day2_ggplot_charts_files/bootstrap-3.3.5/shim/html5shiv.min.js"></script>
<script src="02_Day2_ggplot_charts_files/bootstrap-3.3.5/shim/respond.min.js"></script>
<script src="02_Day2_ggplot_charts_files/jqueryui-1.11.4/jquery-ui.min.js"></script>
<link href="02_Day2_ggplot_charts_files/tocify-1.9.1/jquery.tocify.css" rel="stylesheet" />
<script src="02_Day2_ggplot_charts_files/tocify-1.9.1/jquery.tocify.js"></script>
<script src="02_Day2_ggplot_charts_files/navigation-1.1/tabsets.js"></script>
<link href="02_Day2_ggplot_charts_files/font-awesome-5.0.13/css/fa-svg-with-js.css" rel="stylesheet" />
<script src="02_Day2_ggplot_charts_files/font-awesome-5.0.13/js/fontawesome-all.min.js"></script>
<script src="02_Day2_ggplot_charts_files/font-awesome-5.0.13/js/fa-v4-shims.min.js"></script>
<style type="text/css">code{white-space: pre;}</style>
<style type="text/css">
div.sourceCode { overflow-x: auto; }
table.sourceCode, tr.sourceCode, td.lineNumbers, td.sourceCode {
margin: 0; padding: 0; vertical-align: baseline; border: none; }
table.sourceCode { width: 100%; line-height: 100%; background-color: #f8f8f8; }
td.lineNumbers { text-align: right; padding-right: 4px; padding-left: 4px; color: #aaaaaa; border-right: 1px solid #aaaaaa; }
td.sourceCode { padding-left: 5px; }
pre, code { background-color: #f8f8f8; }
code > span.kw { color: #204a87; font-weight: bold; } /* Keyword */
code > span.dt { color: #204a87; } /* DataType */
code > span.dv { color: #0000cf; } /* DecVal */
code > span.bn { color: #0000cf; } /* BaseN */
code > span.fl { color: #0000cf; } /* Float */
code > span.ch { color: #4e9a06; } /* Char */
code > span.st { color: #4e9a06; } /* String */
code > span.co { color: #8f5902; font-style: italic; } /* Comment */
code > span.ot { color: #8f5902; } /* Other */
code > span.al { color: #ef2929; } /* Alert */
code > span.fu { color: #000000; } /* Function */
code > span.er { color: #a40000; font-weight: bold; } /* Error */
code > span.wa { color: #8f5902; font-weight: bold; font-style: italic; } /* Warning */
code > span.cn { color: #000000; } /* Constant */
code > span.sc { color: #000000; } /* SpecialChar */
code > span.vs { color: #4e9a06; } /* VerbatimString */
code > span.ss { color: #4e9a06; } /* SpecialString */
code > span.im { } /* Import */
code > span.va { color: #000000; } /* Variable */
code > span.cf { color: #204a87; font-weight: bold; } /* ControlFlow */
code > span.op { color: #ce5c00; font-weight: bold; } /* Operator */
code > span.pp { color: #8f5902; font-style: italic; } /* Preprocessor */
code > span.ex { } /* Extension */
code > span.at { color: #c4a000; } /* Attribute */
code > span.do { color: #8f5902; font-weight: bold; font-style: italic; } /* Documentation */
code > span.an { color: #8f5902; font-weight: bold; font-style: italic; } /* Annotation */
code > span.cv { color: #8f5902; font-weight: bold; font-style: italic; } /* CommentVar */
code > span.in { color: #8f5902; font-weight: bold; font-style: italic; } /* Information */
</style>
<style type="text/css">
pre:not([class]) {
background-color: white;
}
</style>
<style type="text/css">
h1 {
font-size: 34px;
}
h1.title {
font-size: 38px;
}
h2 {
font-size: 30px;
}
h3 {
font-size: 24px;
}
h4 {
font-size: 18px;
}
h5 {
font-size: 16px;
}
h6 {
font-size: 12px;
}
.table th:not([align]) {
text-align: left;
}
</style>
<link rel="stylesheet" href="css\camp_style.css" type="text/css" />
</head>
<body>
<style type = "text/css">
.main-container {
max-width: 940px;
margin-left: auto;
margin-right: auto;
}
code {
color: inherit;
background-color: rgba(0, 0, 0, 0.04);
}
img {
max-width:100%;
height: auto;
}
.tabbed-pane {
padding-top: 12px;
}
.html-widget {
margin-bottom: 20px;
}
button.code-folding-btn:focus {
outline: none;
}
</style>
<div class="container-fluid main-container">
<!-- tabsets -->
<script>
$(document).ready(function () {
window.buildTabsets("TOC");
});
</script>
<!-- code folding -->
<script>
$(document).ready(function () {
// move toc-ignore selectors from section div to header
$('div.section.toc-ignore')
.removeClass('toc-ignore')
.children('h1,h2,h3,h4,h5').addClass('toc-ignore');
// establish options
var options = {
selectors: "h1,h2",
theme: "bootstrap3",
context: '.toc-content',
hashGenerator: function (text) {
return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_').toLowerCase();
},
ignoreSelector: ".toc-ignore",
scrollTo: 0
};
options.showAndHide = true;
options.smoothScroll = true;
// tocify
var toc = $("#TOC").tocify(options).data("toc-tocify");
});
</script>
<style type="text/css">
#TOC {
margin: 25px 0px 20px 0px;
}
@media (max-width: 768px) {
#TOC {
position: relative;
width: 100%;
}
}
.toc-content {
padding-left: 30px;
padding-right: 40px;
}
div.main-container {
max-width: 1200px;
}
div.tocify {
width: 20%;
max-width: 260px;
max-height: 85%;
}
@media (min-width: 768px) and (max-width: 991px) {
div.tocify {
width: 25%;
}
}
@media (max-width: 767px) {
div.tocify {
width: 100%;
max-width: none;
}
}
.tocify ul, .tocify li {
line-height: 20px;
}
.tocify-subheader .tocify-item {
font-size: 0.90em;
padding-left: 25px;
text-indent: 0;
}
.tocify .list-group-item {
border-radius: 0px;
}
</style>
<!-- setup 3col/9col grid for toc_float and main content -->
<div class="row-fluid">
<div class="col-xs-12 col-sm-4 col-md-3">
<div id="TOC" class="tocify">
</div>
</div>
<div class="toc-content col-xs-12 col-sm-8 col-md-9">
<div class="fluid-row" id="header">
<h1 class="title toc-ignore">R Camp | day 2</h1>
</div>
<div id="good-morning-detectives" class="section level1">
<h1><span class="header-section-number">1</span> Good morning, Detectives!</h1>
<p><br></p>
<p><strong>Please open your RStudio project from Day 1</strong></p>
<ol style="list-style-type: decimal">
<li><strong>Open</strong> your project folder from Day 1.</li>
<li>Double click the <strong>.Rproj</strong> file to open RStudio.</li>
<li>Relax.</li>
</ol>
<p style="display:block; text-align:center;">
<img src="https://pbs.twimg.com/media/DNhps0gUQAAcS55.jpg" width="520">
</p>
<p><br></p>
<div id="camp-schedule" class="section level2">
<h2><span class="header-section-number">1.1</span> CAMP schedule</h2>
<p><strong>Day 1</strong></p>
<ol style="list-style-type: decimal">
<li>Become a pet detective.</li>
<li>Open new RStudio project and R script.</li>
<li>Install new R packages.</li>
<li>Read data into R.</li>
<li>Arrange and filter your data.</li>
<li>Solve a cat mystery.</li>
</ol>
<p><strong>Day 2</strong></p>
<ol style="list-style-type: decimal">
<li>Data transformations
<ul>
<li>Add new columns.</li>
<li>Summarize your data.</li>
<li>Split groups and categories in your data.</li>
<li>Save data.</li>
</ul></li>
<li>Make <strong>plots</strong>.
<ul>
<li>Scatter plots and transparency.</li>
<li>Add a smoothed trend line to the plot.</li>
<li>Add titles, colors, and axis labels.</li>
<li>Bar charts.</li>
<li>Histograms.</li>
<li>Box plots.</li>
<li>Log transform your chart axis.</li>
</ul></li>
</ol>
<p><br></p>
</div>
<div id="day-1-review" class="section level2">
<h2><span class="header-section-number">1.2</span> Day 1 review</h2>
<div id="load-your-packages" class="section level3">
<h3><span class="header-section-number">1.2.1</span> 1. Load your packages</h3>
<p><em>Hint: Put the packages you need at the top of every script.</em></p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">library</span>(<span class="st">"readr"</span>)
<span class="kw">library</span>(<span class="st">"dplyr"</span>)
<span class="co"># Your code starts here.</span></code></pre></div>
<p><br></p>
</div>
<div id="load-data-with-read_csv" class="section level3">
<h3><span class="header-section-number">1.2.2</span> 2. Load data with <code>read_csv()</code></h3>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">all_cats <-<span class="st"> </span><span class="kw">read_csv</span>(<span class="st">"data/missing_cat_list.csv"</span>)</code></pre></div>
<p><br></p>
</div>
<div id="sort-and-arrange-your-data-with-arrange" class="section level3">
<h3><span class="header-section-number">1.2.3</span> 3. Sort and arrange your data with <code>arrange()</code></h3>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Sort from low to high</span>
<span class="kw">arrange</span>(all_cats, age)
<span class="co"># Sort from high to low</span>
<span class="kw">arrange</span>(all_cats, <span class="kw">desc</span>(age))</code></pre></div>
<p><br></p>
</div>
<div id="filter-your-data-with-filter" class="section level3">
<h3><span class="header-section-number">1.2.4</span> 4. Filter your data with <code>filter()</code></h3>
<p><em>Hint: Numbers don’t need quotes.</em></p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Filter to cats 5 years old</span>
cats_5yo <-<span class="st"> </span><span class="kw">filter</span>(all_cats, age <span class="op">==</span><span class="st"> </span><span class="dv">5</span>)
<span class="co"># Filter to cats from UK or Australia</span>
cricket_cats <-<span class="st"> </span><span class="kw">filter</span>(all_cats, country <span class="op">%in%</span><span class="st"> </span><span class="kw">c</span>(<span class="st">"UK"</span>, <span class="st">"Australia"</span>))</code></pre></div>
<p><br></p>
</div>
<div id="questions-from-day-1" class="section level3">
<h3><span class="header-section-number">1.2.5</span> 5. Questions from Day 1</h3>
<ul>
<li>Find new packages and functions
<ul>
<li>Use <strong>Cheatsheets</strong> to find common functions:
<ul>
<li>Go to <em>Help</em> > <em>Cheatsheets</em>.
<ul>
<li><em>Data Transformation</em> is what we’re learning now.</li>
<li><em>Data Visualization</em> is also good.</li>
</ul></li>
</ul></li>
<li>To search functions
<ul>
<li><a href="www.google.com">google.com</a></li>
<li><a href="https://stackoverflow.com/questions/12675147/how-can-we-make-xkcd-style-graphs">stackoverflow.com</a> + use the <code>[r]</code> tag</li>
</ul></li>
<li>To search packages: <a href="https://github.com/RhoInc/CRANsearcher">CRANsearcher</a></li>
</ul></li>
<li>Meet your <em>History</em>
<ul>
<li>Push up in the console to scroll through your recent command history.</li>
</ul></li>
<li>Get function help: <code>?arrange()</code></li>
<li>Learn more R?
<ul>
<li>R basics: <a href="https://tutorials.shinyapps.io/04-Programming-Basics/#section-welcome" class="uri">https://tutorials.shinyapps.io/04-Programming-Basics/#section-welcome</a></li>
<li>More <code>filter()</code>: <a href="https://jjallaire.shinyapps.io/learnr-tutorial-03a-data-manip-filter/" class="uri">https://jjallaire.shinyapps.io/learnr-tutorial-03a-data-manip-filter/</a></li>
<li><a href="http://r4ds.had.co.nz/"><strong>R for Data Science</strong></a> - is a great book that is online and teaches everything.</li>
</ul></li>
</ul>
<p><br></p>
</div>
</div>
</div>
<div id="guess-the-cat" class="section level1 unnumbered">
<h1><em>Guess the Cat</em></h1>
<div class="figure">
<img src="https://i.upworthy.com/nugget/530baf6f46d669b92a00001f/guess-who-game-9e782ebefa57a5fea47e29386f99b356.jpg" align="right" style="margin-top: -20px; margin-left: 20px; margin-right: 36px;" width="250" />
</div>
<blockquote>
<p>“Are you the best cat detective out there?”"</p>
</blockquote>
<p>Let’s play a game to find out.</p>
<p><br></p>
<p><strong>The rules</strong></p>
<ol style="list-style-type: decimal">
<li>Scroll through the missing cat list and find a cat you like.</li>
<li><strong>Don’t let your neighbor see the cat you chose.</strong></li>
<li>Take turns asking each other questions about their cat.</li>
<li>Use the answers you receive to build a big <code>filter()</code> function that you can use to narrow down the potential cats.</li>
<li><em>Elusive</em> answers are encouraged, depending on how nasty you’re feeling.
<ul>
<li>For example, if someone asks you <em>How grumpy is your cat?</em></li>
<li>You can respond <em>My cat’s grumpiness is eqaul to two less than its age.</em></li>
<li>Or if you’re feeling generous you can give a more straightforward answer, such as <em>My cat’s grumpiness is definitely not 2.</em></li>
</ul></li>
<li>The winner is the first to guess their neighbor’s cat.</li>
</ol>
<p>If you have time play another round. Best of 3?</p>
<p><br></p>
</div>
<div id="data-transformation" class="section level1 unnumbered">
<h1>Data transformation</h1>
</div>
<div id="you-have-voicemail-1" class="section level1">
<h1><span class="header-section-number">2</span> | You have Voicemail (1)</h1>
<hr>
<p>Put your detective hat back on. Your neighbor just called and he shared some bad news. His cat has gone missing. He was so worked-up the only thing you could make out in the message was a description of how great his cat’s personality was. Here are the few details you were able to jot down.</p>
<p><em>My neighbor’s great cat:</em></p>
<blockquote>
<p>The cat’s clumsiness and its greediness sum to more than 12.</p>
<p>You could barely make this out, but you think the cat’s grumpiness and playfulness only sum to 6.</p>
<p>And it’s a female cat.</p>
<p>Oh and the cat is the same age as your neighbor <em>(in human years)</em>, who is probably around 85, maybe younger.</p>
</blockquote>
<p>It sounds like we’re going to need some math for this puzzle. Let’s calculate some new columns with <code>mutate()</code> to help find your neighbor’s cat.</p>
</div>
<div id="mutate" class="section level1">
<h1><span class="header-section-number">3</span> | <code>mutate()</code></h1>
<hr>
<p>It’s often useful to edit existing columns in a data frame or add new columns that are functions of existing columns. That’s the job of <code>mutate()</code>.</p>
<div id="get-to-know-your-data-frame" class="section level2">
<h2><span class="header-section-number">3.1</span> Get to know your data frame</h2>
<p>Before we go changing things in the data frame we’ll need to get to know the column names and the tables dimensions a bit better. These quick functions are all great ways to describe your data frame.</p>
<div id="data-frame-details" class="section level3 unnumbered">
<h3>Data frame details</h3>
<ul>
<li><p><code>names(all_cats)</code> show the column names</p></li>
<li><p><code>nrow(all_cats)</code> number of rows</p></li>
<li><p><code>ncol(all_cats)</code> number of columns</p></li>
<li><p><code>summary(all_cats)</code> summary of all columns</p></li>
<li><p><code>glimpse(all_cats)</code> column names, plus a glimpse of first few values (requires loading <em>dplyr</em> package)</p></li>
</ul>
</div>
</div>
<div id="human-years" class="section level2">
<h2><span class="header-section-number">3.2</span> Human years</h2>
<p>In our work we often use <code>mutate</code> to calculate new units for measurments. In this case, let’s estimate the cat ages in human years. We’ll use the equation below to convert cat years to human years.</p>
<blockquote>
<p><strong>Human years</strong> = Cat years * 4 + 20</p>
</blockquote>
<div id="add-human-years" class="section level3 unnumbered">
<h3>Add human years</h3>
<p>Use <code>mutate()</code> to add a column called <code>human_age</code> to the table <em>my_cat</em>.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">all_cats <-<span class="st"> </span><span class="kw">mutate</span>(all_cats,
<span class="dt">human_age =</span> age <span class="op">*</span><span class="st"> </span><span class="dv">4</span> <span class="op">+</span><span class="st"> </span><span class="dv">20</span>)</code></pre></div>
<p><br></p>
</div>
<div id="update-a-column" class="section level3 unnumbered">
<h3>Update a column</h3>
<p>You can also add a <code>found_by</code> column. That way people will know where to send the reward money$$.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">my_cat <-<span class="st"> </span><span class="kw">mutate</span>(my_cat,
<span class="dt">found_by =</span> <span class="st">"Pet Detective Cooper"</span>)</code></pre></div>
<p><br></p>
<p>You can also use <code>mutate()</code> to update the value of a column that is already in your data frame.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">my_cat <-<span class="st"> </span><span class="kw">mutate</span>(my_cat,
<span class="dt">found_by =</span> <span class="st">"Nevermind, it's a secret"</span>)</code></pre></div>
</div>
<div id="pro-tip" class="section level3 unnumbered">
<h3><i class="fa fa-user-secret" aria-hidden="true" style="color:#040707;"></i> Pro-tip!</h3>
<blockquote>
<p>If you use <code>mutate()</code> and provide a single value for a column such as <code>found_by = "Pet Detective"</code>, then every row in the column you created will have that value. If you provide a vector of values, such as <code>human_age = cat_age * 4 + 20</code>, then the column you created will have a list of values that each correspond to the values in the vector you provided. If you provide a vector that has a different length than the number of rows in your data frame, you’ll get an error telling you that the number of values you provide must be equal to the number of rows in your data frame or be a single value.</p>
</blockquote>
</div>
</div>
<div id="find-your-neighbors-cat" class="section level2">
<h2><span class="header-section-number">3.3</span> Find your neighbor’s cat</h2>
<p>Now let’s see if you can use <code>mutate()</code> to help match the description of the cat’s personality.</p>
<p>Here’s a reminder of how your neighbor described his cat.</p>
<blockquote>
<p>The cat’s clumsiness and its greediness scores sum to more than 12. You could barely make this out, but you think the cat’s grumpiness and playfulness only sum to 6. It’s a female cat.</p>
<p>And the cat is the same age as him <em>(in human years)</em>, which is probably around 85, maybe less.</p>
</blockquote>
<p>Use <code>mutate()</code> to add columns to the missing cat list, and then <code>filter()</code> the new columns to narrow the list down to your neighbor’s cat.</p>
<p>Here’s a snippet to get you started.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">library</span>(<span class="st">"dplyr"</span>)
<span class="co"># Add columns to all_cats</span>
all_cats <-<span class="st"> </span><span class="kw">mutate</span>(all_cats,
<span class="dt">clumsy_and_greedy =</span> clumsy <span class="op">+</span><span class="st"> </span>greedy,
<span class="dt">grumpy_and_playful =</span> grumpy <span class="op">+</span><span class="st"> </span>...,
<span class="dt">human_age =</span> age <span class="op">*</span><span class="st"> </span><span class="dv">4</span> <span class="op">+</span><span class="st"> </span>...
)
<span class="co"># Filter for cats matching neighbor's description</span>
<span class="kw">filter</span>(all_cats,
clumsy_and_greedy <span class="op">></span><span class="st"> </span>...,
grumpy_and_playful <span class="op">==</span><span class="st"> </span>...,
human_age <span class="op">></span><span class="st"> </span>...
)</code></pre></div>
<p><br></p>
<p>You can chain these two functions together and do everything in one go. For that you can use the <code>%>%</code> (pipe).</p>
<div class="figure">
<img src="https://d21ii91i3y6o6h.cloudfront.net/gallery_images/from_proof/9302/medium/1447173978/rstudio-hex-pipe-dot-psd.png" align="left" style="margin-right:20px;" width="115" />
</div>
<p><br></p>
<p>In a script the <code>%>%</code> is read as “and then”. In the code below we are telling R to <code>mutate</code> the data <strong>and then</strong> to <code>filter</code> it.</p>
<p><br></p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">nei_cat <-<span class="st"> </span><span class="kw">mutate</span>(all_cats,
<span class="dt">clumsy_and_greedy =</span> clumsy <span class="op">+</span><span class="st"> </span>greedy,
<span class="dt">grumpy_and_playful =</span> grumpy <span class="op">+</span><span class="st"> </span>playful,
<span class="dt">human_age =</span> age <span class="op">*</span><span class="st"> </span><span class="dv">4</span> <span class="op">+</span><span class="st"> </span><span class="dv">20</span>) <span class="op">%>%</span>
<span class="st"> </span><span class="kw">filter</span>(clumsy_and_greedy <span class="op">></span><span class="st"> </span><span class="dv">12</span>,
grumpy_and_playful <span class="op">==</span><span class="st"> </span><span class="dv">6</span>,
human_age <span class="op">></span><span class="st"> </span><span class="dv">80</span>)</code></pre></div>
<div class="quiz">
<h3 id="pop-quiz-hotshot" class="unnumbered">Pop Quiz, hotshot!</h3>
<p><em>What is the name of your friend’s cat?</em></p>
<p><input type="radio"> <em>Damon</em> <br> <input type="radio"> <em>Fluffy George</em> <br> <input type="radio"> <em>Stinkerbell</em> <br> <input type="radio"> <em>Precious Abe</em> <br></p>
<p><br></p>
<details> <summary class = "btn_code"><em>Show solution</em></summary>
<p>
<p><i class="fa fa-check" aria-hidden="true" style="color: green;"></i> <code>Fluffy George</code></p>
<p><em>Breaking Meows! You rock!</em></p>
</p>
</details>
</div>
</div>
</div>
<div id="the-case-of-the-baby-tigers" class="section level1">
<h1><span class="header-section-number">4</span> | The case of the baby tigers 🐯 🐯</h1>
<hr>
<div class="figure">
<img src="https://rforcats.net/assets/img/pipe_gif.gif" width="360" />
</div>
<p><br></p>
<p>Sitting down for your morning coffee you notice the front page of the paper has a story on <strong>cats!</strong><br />
According to Phyllis Cattleby there’s been a whole string of striped kittens gone missing. <em>This sounds like a case for a pet detective.</em></p>
<p>Phyllis is pointing her finger at a circus that just came to town. A circus that seems to be drawing big crowds with their adorable and surprisingly tame “tiger babies”.</p>
<blockquote>
<p>What evidence could we find in the cat database to support or refute the claim that the circus is stealing young striped cats?</p>
</blockquote>
<p>For this level of sleuthing we’re going to need to <strong>summarize</strong> our data.</p>
</div>
<div id="summarize-this" class="section level1">
<h1><span class="header-section-number">5</span> | <code>summarize()</code> this</h1>
<hr>
<div class="figure">
<img src="images/summarize_diagram.png" width="490" />
</div>
<p><code>summarize</code> allows you to apply a summary function like <code>median()</code> to a column and collapse your data down to a single row. To really dig into <code>summarize</code> you’ll want to know some common summary functions, such as <code>sum()</code>, <code>mean()</code>, <code>median()</code>, <code>min()</code>, and <code>max()</code>.</p>
<div id="sum" class="section level2 unnumbered">
<h2><code>sum()</code></h2>
<p>Use <code>summarize()</code> and <code>sum()</code> to find the total of all <code>greedy</code> scores.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">summarize</span>(all_cats, <span class="dt">total_greedy =</span> <span class="kw">sum</span>(greedy))</code></pre></div>
</div>
<div id="mean" class="section level2 unnumbered">
<h2><code>mean()</code></h2>
<p>Use <code>summarize()</code> and <code>mean()</code> to calculate the mean level of grumpiness in all cats.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">cat_summary <-<span class="st"> </span><span class="kw">summarize</span>(all_cats,
<span class="dt">mean_age =</span> <span class="kw">mean</span>(age, <span class="dt">na.rm =</span> T))</code></pre></div>
<p><br></p>
<blockquote>
<p>Note the <code>na.rm = TRUE</code> in the <code>mean()</code> function. This tells R to ignore empty cells or missing values that show up in R as <code>NA</code>. If you leave <code>na.rm</code> out, the <em>mean</em> funciton will return ‘NA’ when it finds a missing value in the data.</p>
</blockquote>
</div>
<div id="median" class="section level2 unnumbered">
<h2><code>median()</code></h2>
<p>Use summarize to calculate the <em>median</em> level of grumpiness in all cats.</p>
<p><code>summarize(all_cats, median_grumpy = median(grumpy))</code></p>
</div>
<div id="max" class="section level2 unnumbered">
<h2><code>max()</code></h2>
<p>Use summarize to calculate the <em>maximum</em> playful score for all cats.</p>
<p><code>all_cats %>% summarize(max_playful = max(playful))</code></p>
</div>
<div id="min" class="section level2 unnumbered">
<h2><code>min()</code></h2>
<p>Use summarize to calculate the <em>minimum</em> playful score for all cats.</p>
<p><code>all_cats %>% summarize(min_playful = min(playful))</code></p>
</div>
<div id="nth" class="section level2 unnumbered">
<h2><code>nth()</code></h2>
<p>Use summarize and <code>nth(name, 12)</code> to find the name of the <em>12th</em> oldest cat in human years.</p>
<p><em>Hint: Use <code>arrange()</code> first.</em></p>
<p><code>arrange(all_cats, desc(human_age)) %>% summarize(cat_name_2 = nth(name, 12))</code></p>
</div>
<div id="sd" class="section level2 unnumbered">
<h2><code>sd()</code></h2>
<p>What is the <em>standard deviation</em> of the grumpiness scores?</p>
<p><code>summarize(all_cats, stdev_grumpy = sd(grumpy))</code></p>
</div>
<div id="quantile" class="section level2 unnumbered">
<h2><code>quantile()</code></h2>
<p><em>Quantiles</em> are useful for finding the upper or lower range of a column. Use the <code>quantile()</code> function to find the the 5th and 95th quantile of the cat ages.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">summarize</span>(all_cats,
<span class="dt">age_5th_pctile =</span> <span class="kw">quantile</span>(age, <span class="fl">0.05</span>, <span class="dt">na.rm =</span> T),
<span class="dt">age_95th_pctile =</span> <span class="kw">quantile</span>(age, <span class="fl">0.95</span>))</code></pre></div>
<p><em>Hint: add <code>na.rm = T</code> to <code>quantile()</code>.</em></p>
</div>
<div id="n" class="section level2 unnumbered">
<h2><code>n()</code></h2>
<p><code>n()</code> stands for <em>count</em>.</p>
<p>Use summarize and <code>n()</code> to count the number of <code>"brown"</code> cats.</p>
<p><em>Hint: Use <code>filter()</code> first.</em></p>
<p><code>filter(all_cats, color == "brown") %>% summarize(cat_count = n())</code></p>
<p><br></p>
<div id="exercise" class="section level4 unnumbered">
<h4>Exercise <i class="fa fa-bicycle" aria-hidden="true" style="color: green"></i></h4>
<p>Create a cat summary using 3 of the math functions above.</p>
</div>
</div>
<div id="striped-cats" class="section level2">
<h2><span class="header-section-number">5.1</span> Striped cats</h2>
<p>Now that we’re equipped with some powerful tools, let’s use <code>summarize()</code> to answer a few questions about the striped cats.</p>
<p><em>Is the age of missing <code>striped</code> cats lower than expected?</em></p>
<p>First find the median age for all the missing cats?</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">summarize</span>(all_cats, <span class="dt">median_age =</span> <span class="kw">median</span>(age, <span class="dt">na.rm =</span> T))</code></pre></div>
<p><br></p>
<p>Now, what is the median age for only striped cats?</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">filter</span>(all_cats, color <span class="op">==</span><span class="st"> "striped"</span>) <span class="op">%>%</span><span class="st"> </span><span class="kw">summarize</span>(<span class="dt">striped_med_age =</span> <span class="kw">median</span>(age, <span class="dt">na.rm =</span> T))</code></pre></div>
<p><br></p>
<p><em>But are striped cats the only color group that is younger than average?</em></p>
<blockquote>
<p>Wouldn’t it be great if we could easily find the age for every color of cat?</p>
</blockquote>
</div>
</div>
<div id="group_by" class="section level1">
<h1><span class="header-section-number">6</span> | <code>group_by()</code></h1>
<hr>
<p>Enter <code>group_by()</code> stage left. If you thought <code>summarize</code> was awesome, wait until you include <code>group_by</code> with your <code>summarize</code> commands.</p>
<p>Try using <code>group_by</code> with the column <em>color</em> and then use <code>summarize</code> to count the number of cats in each group.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">group_by</span>(all_cats, color) <span class="op">%>%</span><span class="st"> </span><span class="kw">summarize</span>(<span class="dt">color_count =</span> <span class="kw">n</span>()) <span class="op">%>%</span><span class="st"> </span><span class="kw">ungroup</span>()</code></pre></div>
<div id="pro-tip-1" class="section level3 unnumbered">
<h3><i class="fa fa-user-secret" aria-hidden="true" style="color:#040707;"></i> Pro-tip!</h3>
<blockquote>
<p>Ending with <code>ungroup()</code> is good practice. This will prevent your data from staying grouped after the summarizing has been completed.</p>
</blockquote>
<p><br></p>
<p>Well that’s interesting, but not conclusive evidence.</p>
<blockquote>
<p>What about the age of the missing striped cats? Are they younger on average than all the other groups?</p>
</blockquote>
<p>Let’s use <code>group_by</code> with the column <em>color</em> again, but this time use <code>summarize</code> to find the <code>mean(age)</code> for each cat color.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">group_by</span>(all_cats, color) <span class="op">%>%</span><span class="st"> </span>
<span class="st"> </span><span class="kw">summarize</span>(<span class="dt">mean_age =</span> <span class="kw">mean</span>(age, <span class="dt">na.rm =</span> T)) <span class="op">%>%</span><span class="st"> </span><span class="kw">ungroup</span>()</code></pre></div>
<p><br></p>
<p>That’s a lot of digits!</p>
</div>
<div id="round" class="section level2">
<h2><span class="header-section-number">6.1</span> <code>round()</code></h2>
<p>You can round the ages to a certain number of digits using the <code>round()</code> function. We can finish by adding the <code>arrange()</code> function to sort the table by our new column.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">group_by</span>(all_cats, color) <span class="op">%>%</span><span class="st"> </span>
<span class="st"> </span><span class="kw">summarize</span>(<span class="dt">mean_age =</span> <span class="kw">mean</span>(age, <span class="dt">na.rm =</span> T),
<span class="dt">mean_age_round =</span> <span class="kw">round</span>(mean_age, <span class="dt">digits =</span> <span class="dv">1</span>)) <span class="op">%>%</span><span class="st"> </span>
<span class="st"> </span><span class="kw">arrange</span>(mean_age_round) <span class="op">%>%</span><span class="st"> </span><span class="kw">ungroup</span>()</code></pre></div>
<p><br></p>
<p><strong>NOTE:</strong> The <code>round()</code> function in R does not automatically round values ending in 5 up, instead it uses scientific rounding. It rounds values ending in 5 to the nearest even number, so 2.5 rounded to the nearest whole number using <code>round()</code> is 2, and 3.5 rounded to the nearest whole number is 4. If you want to round all values ending in 5 up, then you’ll have to use a rounding function from another package.</p>
<div id="now-theres-some-good-evidence" class="section level3">
<h3><span class="header-section-number">6.1.1</span> Now there’s some good evidence!</h3>
<p>Why are the <em>striped</em> cats so much younger? Are they being catnapped and sent to the circus? Let’s put this piece of evidence in our back pocket for now. We can return to it after we learn to make some charts. Maybe then we’ll be able to put together a convincing report to send to the police chief.</p>
</div>
</div>
</div>
<div id="save-files" class="section level1">
<h1><span class="header-section-number">7</span> | Save files</h1>
<hr>
<p>Let’s save the last summary table we created to a <em>CSV</em>. That way we can print it to have it <strong>faxed</strong> to the police later. To save a data frame we’ll use the <code>write_csv()</code> function from our favorite <em>readr</em> package.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># First give the new data a name</span>
ages_by_color <-<span class="st"> </span><span class="kw">group_by</span>(all_cats, color) <span class="op">%>%</span><span class="st"> </span>
<span class="st"> </span><span class="kw">summarize</span>(<span class="dt">mean_age =</span> <span class="kw">mean</span>(age, <span class="dt">na.rm =</span> T),
<span class="dt">mean_age_round =</span> <span class="kw">round</span>(mean_age, <span class="dt">digits =</span> <span class="dv">1</span>)) <span class="op">%>%</span><span class="st"> </span>
<span class="st"> </span><span class="kw">arrange</span>(mean_age_round) <span class="op">%>%</span><span class="st"> </span><span class="kw">ungroup</span>()
<span class="co"># Write the file to your project folder</span>
<span class="kw">write_csv</span>(ages_by_color, <span class="st">"mean_cat_ages_by_color.csv"</span>)</code></pre></div>
<div id="pro-tip-2" class="section level3 unnumbered">
<h3><i class="fa fa-user-secret" aria-hidden="true" style="color:#040707;"></i> Pro-tip!</h3>
<blockquote>
<p><strong>Warning!</strong> R will overwrite a file if the file already exists in a folder. It will not ask for confirmation. You will not collect $200.</p>
</blockquote>
<p><br></p>
</div>
</div>
<div id="grouped-mutate" class="section level1">
<h1><span class="header-section-number">8</span> | Grouped <code>mutate()</code></h1>
<br>
<hr>
<p>We can bring back <code>mutate</code> to add a column based on the grouped values in a data set. For example, you may want to add a column showing the average age by country to the whole table.</p>
<p>When you combine <code>group_by</code> and <code>mutate</code> the new column will be calculated based on the values within each group.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">group_by</span>(all_cats, country) <span class="op">%>%</span><span class="st"> </span><span class="kw">mutate</span>(<span class="dt">country_mean_age =</span> <span class="kw">mean</span>(age, <span class="dt">na.rm =</span> T)) <span class="op">%>%</span><span class="st"> </span><span class="kw">ungroup</span>()</code></pre></div>
<p><br></p>
<div id="exercise-1" class="section level4 unnumbered">
<h4>Exercise <i class="fa fa-bicycle" aria-hidden="true" style="color: green"></i></h4>
<p>Estimate the mean grumpiness for each group of cats with the same greediness score.</p>
<p><br></p>
</div>
<div id="exercise-2" class="section level3 unnumbered">
<h3>Exercise 2 <i class="fa fa-bicycle" aria-hidden="true" style="color: green"></i></h3>
<p>Find the median grumpiness score for each country.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">group_by</span>(all_cats, country) <span class="op">%>%</span><span class="st"> </span>
<span class="st"> </span><span class="kw">mutate</span>(<span class="dt">grumpy_by_country =</span> <span class="kw">median</span>(grumpy, <span class="dt">na.rm =</span> T)) <span class="op">%>%</span><span class="st"> </span>
<span class="st"> </span><span class="kw">ungroup</span>()</code></pre></div>
</div>
<div id="break-time" class="section level2">
<h2><span class="header-section-number">8.1</span> Break time <i class="fa fa-hourglass-half" aria-hidden="true" style="color:#040707;"></i></h2>
<div class="figure">
<img src="http://www.gifbin.com/bin/012012/1331836791_kittens_on_turntables.gif" />
</div>
<p><br></p>
<p>Take 5 minutes to relax.</p>
<p><br><br><br></p>
</div>
</div>
<div id="plots-with-ggplot2" class="section level1">
<h1><span class="header-section-number">9</span> Plots with <em>ggplot2</em></h1>
<div class="figure">
<img src="https://images.duckduckgo.com/iu/?u=https%3A%2F%2Fd21ii91i3y6o6h.cloudfront.net%2Fgallery_images%2Ffrom_proof%2F9296%2Fmedium%2F1447173871%2Frstudio-hex-ggplot2-dot-psd.png&f=1" align="right" style="margin-right: 30px; margin-top: -28px;" width="145" />
</div>
<p><br> Install <em>ggplot2</em> using <code>install.packages("ggplot2")</code>.</p>
<p><br></p>
<div id="load-the-movie-data" class="section level2">
<h2><span class="header-section-number">9.1</span> Load the movie data</h2>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">library</span>(<span class="st">"readr"</span>)
<span class="kw">library</span>(<span class="st">"dplyr"</span>)
<span class="kw">library</span>(<span class="st">"ggplot2"</span>)
<span class="co"># Path to movie data</span>
movie_url <-<span class="st"> "https://raw.githubusercontent.com/MPCA-air/RCamp/master/data/movies/IMDB.csv"</span>
<span class="co"># Read the IMDB movie data and save as `movies`</span>
movies <-<span class="st"> </span><span class="kw">read_csv</span>(movie_url)</code></pre></div>
<div id="rename-the-column-color-to-movie_color." class="section level3">
<h3><span class="header-section-number">9.1.1</span> Rename the column <code>color</code> to “movie_color”.</h3>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Show column names</span>
<span class="co">#names(movies)</span>
<span class="co"># Rename the 'actor_1_name' column</span>
movies <-<span class="st"> </span><span class="kw">rename</span>(movies, <span class="dt">superstar =</span> actor_1_name)</code></pre></div>
<p><br><br></p>
</div>
</div>
<div id="the-3-parts-of-a-ggplot" class="section level2">
<h2><span class="header-section-number">9.2</span> The 3 parts of a <em>ggplot</em></h2>
<div id="set-the-base-plot." class="section level3">
<h3><span class="header-section-number">9.2.1</span> 1. Set the base plot.</h3>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">ggplot</span>(movies)</code></pre></div>
<p><img src="02_Day2_ggplot_charts_files/figure-html/unnamed-chunk-14-1.png" width="960" /></p>
<p><br></p>
<blockquote>
<p>Note when we load the package it’s <code>library ("ggplot2")</code> but when we use the function, it’s <code>ggplot(movies)</code> without the 2 following ggplot. It’s annoying, but that’s the way it is.</p>
</blockquote>
</div>
<div id="set-the-x-y-aesthetics." class="section level3">
<h3><span class="header-section-number">9.2.2</span> 2. Set the X, Y <em>(aesthetics)</em>.</h3>
<p><em>Aesthetics</em> are the visual components from the data that you want to use in the chart. These also determine the dimensions of the plot.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">ggplot</span>(movies, <span class="kw">aes</span>(<span class="dt">x =</span> movie_facebook_likes, <span class="dt">y =</span> gross_mil)) </code></pre></div>
<p><img src="02_Day2_ggplot_charts_files/figure-html/unnamed-chunk-15-1.png" width="960" /></p>
</div>
<div id="add-layers-geometries." class="section level3">
<h3><span class="header-section-number">9.2.3</span> 3. Add layers <em>(geometries)</em>.</h3>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">ggplot</span>(movies, <span class="kw">aes</span>(<span class="dt">x =</span> movie_facebook_likes, <span class="dt">y =</span> gross_mil)) <span class="op">+</span><span class="st"> </span>
<span class="st"> </span><span class="kw">geom_point</span>()</code></pre></div>
<p><img src="02_Day2_ggplot_charts_files/figure-html/unnamed-chunk-16-1.png" width="960" /></p>
<p><br></p>
</div>
<div id="pro-tip-3" class="section level3">
<h3><span class="header-section-number">9.2.4</span> <i class="fa fa-user-secret" aria-hidden="true" style="color:#040707;"></i> Pro-tip!</h3>
<blockquote>
<p>When you add more layers using <code>+</code>, remember to place it at the end of each new line.</p>
</blockquote>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># This will work</span>
<span class="kw">ggplot</span>() <span class="op">+</span>
<span class="st"> </span><span class="kw">geom_point</span>()
<span class="co"># BUT this will give you a nasty error message</span>
<span class="kw">ggplot</span>()
<span class="op">+</span><span class="st"> </span><span class="kw">geom_point</span>()</code></pre></div>
<div id="exercise-3" class="section level4 unnumbered">
<h4>Exercise <i class="fa fa-bicycle" aria-hidden="true" style="color: green"></i></h4>
<p>Try making a scatterplot of any two columns.</p>
<p><em>Hint: Numeric variables will be more informative.</em></p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">ggplot</span>(movies, <span class="kw">aes</span>(<span class="dt">x =</span> ?column1, <span class="dt">y =</span> ?column2)) <span class="op">+</span><span class="st"> </span><span class="kw">geom_point</span>()</code></pre></div>
</div>
</div>
</div>
<div id="scatterplots-and-point-charts." class="section level2">
<h2><span class="header-section-number">9.3</span> Scatterplots and point charts.</h2>
<p>Let’s select only recent movies using <code>filter()</code>.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">filter</span>(movies, title_year <span class="op">>=</span><span class="st"> </span><span class="dv">2010</span>)</code></pre></div>
<div id="to-repeat-the-earlier-chart-with-the-filtered-data-use-the-pipe." class="section level3">
<h3><span class="header-section-number">9.3.1</span> To repeat the earlier chart with the filtered data use the <code>%>%</code> pipe.</h3>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">filter</span>(movies, title_year <span class="op">>=</span><span class="st"> </span><span class="dv">2010</span>) <span class="op">%>%</span>
<span class="st"> </span><span class="kw">ggplot</span>(<span class="kw">aes</span>(<span class="dt">x =</span> movie_facebook_likes, <span class="dt">y =</span> gross_mil)) <span class="op">+</span><span class="st"> </span>
<span class="st"> </span><span class="kw">geom_point</span>()</code></pre></div>
<p><img src="02_Day2_ggplot_charts_files/figure-html/unnamed-chunk-20-1.png" width="960" /></p>
</div>
<div id="add-transparency-to-the-points." class="section level3">
<h3><span class="header-section-number">9.3.2</span> Add transparency to the points.</h3>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">filter</span>(movies, title_year <span class="op">>=</span><span class="st"> </span><span class="dv">2010</span>) <span class="op">%>%</span>
<span class="st"> </span><span class="kw">ggplot</span>(<span class="kw">aes</span>(<span class="dt">x =</span> movie_facebook_likes, <span class="dt">y =</span> gross_mil)) <span class="op">+</span><span class="st"> </span>
<span class="st"> </span><span class="kw">geom_point</span>(<span class="dt">alpha =</span> <span class="fl">0.1</span>) </code></pre></div>
<p><img src="02_Day2_ggplot_charts_files/figure-html/unnamed-chunk-21-1.png" width="960" /></p>
</div>
<div id="add-a-regression-line." class="section level3">
<h3><span class="header-section-number">9.3.3</span> Add a regression line.</h3>
<p>We can keep adding layers! You can build your plot sandwich as big as you like.<br />
<br></p>
<div class="figure">
<img src="http://3.bp.blogspot.com/-3neH1OJJkss/VdT0e7l_GrI/AAAAAAAABKs/8fJwUmRfCjs/s1600/54066b1cb96cd.jpg" width="300" />
</div>
<p><br></p>
<p>Use <code>geom_smooth()</code> to add a regression line.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">filter</span>(movies, title_year <span class="op">>=</span><span class="st"> </span><span class="dv">2010</span>) <span class="op">%>%</span><span class="st"> </span>
<span class="st"> </span><span class="kw">ggplot</span>(<span class="kw">aes</span>(<span class="dt">x =</span> movie_facebook_likes, <span class="dt">y =</span> gross_mil)) <span class="op">+</span><span class="st"> </span>
<span class="st"> </span><span class="kw">geom_point</span>(<span class="dt">alpha =</span> <span class="fl">0.25</span>) <span class="op">+</span>
<span class="st"> </span><span class="kw">geom_smooth</span>(<span class="dt">method =</span> <span class="st">"lm"</span>)</code></pre></div>
<p><img src="02_Day2_ggplot_charts_files/figure-html/unnamed-chunk-22-1.png" width="960" /></p>
<div id="exercise-4" class="section level4 unnumbered">
<h4>Exercise <i class="fa fa-bicycle" aria-hidden="true" style="color: green"></i></h4>
<p>Make a scatterplot of <code>imdb_score</code> and <code>gross_mil</code> with a fitted line showing the relationship.</p>
<details> <summary class = "btn_code"><em>Show solution</em></summary>
<p>
<p>Stop cheating! Just kidding here’s some code to help.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">filter</span>(movies, title_year <span class="op">>=</span><span class="st"> </span><span class="dv">2010</span>) <span class="op">%>%</span><span class="st"> </span>
<span class="st"> </span><span class="kw">ggplot</span>(<span class="kw">aes</span>(<span class="dt">x =</span> imdb_score, <span class="dt">y =</span> gross_mil)) <span class="op">+</span><span class="st"> </span>
<span class="st"> </span><span class="kw">geom_point</span>(<span class="dt">alpha =</span> <span class="fl">0.25</span>) <span class="op">+</span>
<span class="st"> </span><span class="kw">geom_smooth</span>(<span class="dt">method =</span> <span class="st">"lm"</span>)</code></pre></div>
<img src="02_Day2_ggplot_charts_files/figure-html/unnamed-chunk-23-1.png" width="960" />
</p>
<p></details></p>
<p><br></p>
</div>
</div>
</div>
<div id="histograms" class="section level2">
<h2><span class="header-section-number">9.4</span> Histograms</h2>
<p>Now let’s make some histograms showing how the total number of movies change over time.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">ggplot</span>(movies, <span class="kw">aes</span>(<span class="dt">x =</span> title_year)) <span class="op">+</span><span class="st"> </span><span class="kw">geom_histogram</span>()</code></pre></div>
<p><img src="02_Day2_ggplot_charts_files/figure-html/unnamed-chunk-24-1.png" width="960" /></p>
<p><br></p>
<p>To show the changes per decade we can break the years into groups of 10.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">ggplot</span>(movies, <span class="kw">aes</span>(<span class="dt">x =</span> title_year)) <span class="op">+</span><span class="st"> </span><span class="kw">geom_histogram</span>(<span class="dt">binwidth =</span> <span class="dv">10</span>)</code></pre></div>
<p><img src="02_Day2_ggplot_charts_files/figure-html/unnamed-chunk-25-1.png" width="960" /></p>
<div id="break-it-down-by-movie_color." class="section level3">
<h3><span class="header-section-number">9.4.1</span> Break it down by <code>movie_color</code>.</h3>
<p>You can assign different aesthetics to variables in the data set. The example below sets the <code>fill</code> color to variable <code>movie_color</code>. This will color code each color type, one color for black and white movies and one for color movies.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">ggplot</span>(movies, <span class="kw">aes</span>(<span class="dt">x =</span> title_year, <span class="dt">fill =</span> movie_color)) <span class="op">+</span>
<span class="st"> </span><span class="kw">geom_histogram</span>(<span class="dt">binwidth =</span> <span class="dv">10</span>)</code></pre></div>
<p><img src="02_Day2_ggplot_charts_files/figure-html/unnamed-chunk-26-1.png" width="960" /></p>
</div>
<div id="move-the-bars-side-by-side-instead-of-stacked." class="section level3">
<h3><span class="header-section-number">9.4.2</span> Move the bars side-by-side instead of stacked.</h3>
<p>It’s difficult to see what’s going on with the black and white films. Let’s split the colors apart using <code>position_dodge()</code>.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">ggplot</span>(movies, <span class="kw">aes</span>(<span class="dt">x =</span> title_year, <span class="dt">fill =</span> movie_color)) <span class="op">+</span>
<span class="st"> </span><span class="kw">geom_histogram</span>(<span class="dt">binwidth =</span> <span class="dv">10</span>, <span class="dt">position =</span> <span class="st">"dodge"</span>)</code></pre></div>
<p><img src="02_Day2_ggplot_charts_files/figure-html/unnamed-chunk-27-1.png" width="960" /></p>
</div>
<div id="split-into-separate-charts-for-black-and-white-vs.color" class="section level3">
<h3><span class="header-section-number">9.4.3</span> Split into separate charts for black and white vs. color</h3>
<p>Maybe it would work better to use two separate charts. For that we can use <code>facet_wrap()</code>.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">ggplot</span>(movies, <span class="kw">aes</span>(<span class="dt">x =</span> title_year, <span class="dt">fill =</span> movie_color)) <span class="op">+</span>
<span class="st"> </span><span class="kw">geom_histogram</span>(<span class="dt">binwidth =</span> <span class="dv">10</span>) <span class="op">+</span>
<span class="st"> </span><span class="kw">facet_wrap</span>(<span class="op">~</span><span class="st"> </span>movie_color)</code></pre></div>
<p><img src="02_Day2_ggplot_charts_files/figure-html/unnamed-chunk-28-1.png" width="960" /></p>
</div>
<div id="free-y-axis" class="section level3">
<h3><span class="header-section-number">9.4.4</span> Free y-axis</h3>
<p>That is almost good. It’s still hard to see the changes in black and white films. Let’s make the y-axis independent for each group using <code>scales = "free_y"</code>. Type <code>?facet_wrap</code> to see more options.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">ggplot</span>(movies, <span class="kw">aes</span>(title_year, <span class="dt">fill =</span> movie_color)) <span class="op">+</span>
<span class="st"> </span><span class="kw">geom_histogram</span>(<span class="dt">binwidth =</span> <span class="dv">10</span>) <span class="op">+</span>
<span class="st"> </span><span class="kw">facet_wrap</span>(<span class="op">~</span><span class="st"> </span>movie_color, <span class="dt">scales =</span> <span class="st">"free_y"</span>)</code></pre></div>
<p><img src="02_Day2_ggplot_charts_files/figure-html/unnamed-chunk-29-1.png" width="960" /></p>
<p><em>Note: When the scales are not uniform, make sure to point this out to your readers. Otherwise people might assume the scales are the same. Then they would think there are just as many black and white films as color movies.</em></p>
<p><br></p>
<div id="exercise-5" class="section level4 unnumbered">
<h4>Exercise <i class="fa fa-bicycle" aria-hidden="true" style="color: green"></i></h4>
<p>Make a histogram of the number of movies by decade with separate <code>fill</code> colors for each <code>content_rating</code>.</p>
<p>Decide which is the best way to present the bars: <em>stacked, side-by-side, or on separate charts.</em></p>
<p><br></p>
<details> <summary class = "btn_code"><em>Show solution</em></summary>
<p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Stacked</span>
<span class="kw">ggplot</span>(movies, <span class="kw">aes</span>(title_year, <span class="dt">fill =</span> content_rating)) <span class="op">+</span>
<span class="st"> </span><span class="kw">geom_histogram</span>(<span class="dt">binwidth =</span> <span class="dv">10</span>)
<span class="co"># Side by side</span>
<span class="kw">ggplot</span>(movies, <span class="kw">aes</span>(title_year, <span class="dt">fill =</span> content_rating)) <span class="op">+</span>
<span class="st"> </span><span class="kw">geom_histogram</span>(<span class="dt">binwidth =</span> <span class="dv">10</span>, <span class="dt">position =</span> <span class="kw">position_dodge</span>())
<span class="co"># Separate charts</span>
<span class="kw">ggplot</span>(movies, <span class="kw">aes</span>(title_year, <span class="dt">fill =</span> content_rating)) <span class="op">+</span>
<span class="st"> </span><span class="kw">geom_histogram</span>(<span class="dt">binwidth =</span> <span class="dv">10</span>) <span class="op">+</span><span class="st"> </span>
<span class="st"> </span><span class="kw">facet_wrap</span>( <span class="op">~</span><span class="st"> </span>content_rating)
<span class="co"># Free scale it</span>
<span class="kw">ggplot</span>(movies, <span class="kw">aes</span>(title_year, <span class="dt">fill =</span> content_rating)) <span class="op">+</span>
<span class="st"> </span><span class="kw">geom_histogram</span>(<span class="dt">binwidth =</span> <span class="dv">10</span>) <span class="op">+</span><span class="st"> </span>
<span class="st"> </span><span class="kw">facet_wrap</span>( <span class="op">~</span><span class="st"> </span>content_rating, <span class="dt">scales =</span> <span class="st">"free_y"</span>)</code></pre></div>
</p>
<p></details> <br></p>
</div>
</div>
</div>
</div>
<div id="day-2-homework" class="section level1">
<h1><span class="header-section-number">10</span> Day 2 homework</h1>
<ol style="list-style-type: decimal">
<li>Load some data into R from a recent project of yours.
<ul>
<li>If you have data in Excel, you can export a <em>.CSV</em> file by choosing “Save as” and selecting CSV from the file type menu.</li>
</ul></li>
<li>Create 2 types of plots using the data.</li>
<li>Paste the function <code>runif(1)</code> into your console.
<ul>
<li>If the number you get is bigger than <code>0.2</code>, email your charts to the rest of the class.</li>
</ul></li>
</ol>
<p><br></p>
</div>
<div id="return-to-rcamp" class="section level1 unnumbered">
<h1>Return to <a href="index.html">RCamp</a></h1>
<p><br></p>
</div>
</div>
</div>
</div>
<script>
// add bootstrap table styles to pandoc tables
function bootstrapStylePandocTables() {
$('tr.header').parent('thead').parent('table').addClass('table table-condensed');
}
$(document).ready(function () {
bootstrapStylePandocTables();
});
</script>
<!-- dynamically load mathjax for compatibility with self-contained -->
<script>
(function () {
var script = document.createElement("script");
script.type = "text/javascript";
script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
document.getElementsByTagName("head")[0].appendChild(script);
})();
</script>
</body>
</html>