diff --git a/R/add-general.R b/R/add-general.R index 68a1dff0..73d0f18d 100644 --- a/R/add-general.R +++ b/R/add-general.R @@ -236,15 +236,15 @@ ff_value <- function(.fun, .count = FALSE) { if (.count) { plot <- plot + ggplot2::stat_count(ggplot2::aes(label = format_number(ggplot2::after_stat(count), accuracy = accuracy, scale_cut = scale_cut)), - geom = "text", vjust = vjust, hjust = hjust, size = size, position = position, ...) + geom = "text", vjust = vjust, hjust = hjust, size = size, position = position, show.legend = FALSE, ...) } else { if ((stringr::str_sub(ptype, 2, 2) == "c")) { plot <- plot + ggplot2::stat_summary(ggplot2::aes(label = format_number(ggplot2::after_stat(y), accuracy = accuracy, scale_cut = scale_cut)), - fun = .fun, geom = "text", vjust = vjust, hjust = hjust, size = size, position = position, ...) + fun = .fun, geom = "text", vjust = vjust, hjust = hjust, size = size, position = position, show.legend = FALSE, ...) } if ((stringr::str_sub(ptype, 1, 1) == "c")) { plot <- plot + ggplot2::stat_summary(ggplot2::aes(label = format_number(ggplot2::after_stat(x), accuracy = accuracy, scale_cut = scale_cut)), - fun = .fun, geom = "text", vjust = vjust, hjust = hjust, size = size, position = position, ...) + fun = .fun, geom = "text", vjust = vjust, hjust = hjust, size = size, position = position, show.legend = FALSE, ...) } } plot diff --git a/_pkgdown.yml b/_pkgdown.yml index b85b3f2a..d4eef511 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -15,9 +15,9 @@ articles: - title: Articles navbar: ~ contents: + - articles/Advanced-plotting - articles/Visualizing-data - articles/Color-schemes - - articles/Advanced-plotting - articles/Design-principles reference: @@ -32,10 +32,10 @@ reference: - subtitle: Data points & amounts - contents: - starts_with("add_data_points") - - starts_with("add_line") - - starts_with("add_heatmap") - starts_with("add_count") - starts_with("add_sum") + - starts_with("add_heatmap") + - starts_with("add_line") - subtitle: Central tendency - contents: - starts_with("add_mean") diff --git a/man/figures/README-unnamed-chunk-7-1.png b/man/figures/README-unnamed-chunk-7-1.png index 1db09991..634e0690 100644 Binary files a/man/figures/README-unnamed-chunk-7-1.png and b/man/figures/README-unnamed-chunk-7-1.png differ diff --git a/tests/testthat/test-add-misc.R b/tests/testthat/test-add-misc.R index 2fd689aa..65bb40a5 100644 --- a/tests/testthat/test-add-misc.R +++ b/tests/testthat/test-add-misc.R @@ -14,6 +14,7 @@ test_that("add annotation", { vdiffr::expect_doppelganger("add title and caption", .) }) +# violins failing in remote check # test_that("add boxplot and violin works", { # study %>% # tidyplot(x = treatment, y = score, color = treatment) %>% diff --git a/tests/testthat/test-adjust.R b/tests/testthat/test-adjust.R index 4834fe12..1b4f84ab 100644 --- a/tests/testthat/test-adjust.R +++ b/tests/testthat/test-adjust.R @@ -106,6 +106,7 @@ test_that("plotmath expressions work", { vdiffr::expect_doppelganger("plotmath expression x axis labels", .) }) +# failing in remote check # test_that("adjust plot area size work", { # study %>% # tidyplot(x = dose, y = score, color = group) %>% diff --git a/vignettes/articles/Advanced-plotting.Rmd b/vignettes/articles/Advanced-plotting.Rmd index cace6822..fb3f534f 100644 --- a/vignettes/articles/Advanced-plotting.Rmd +++ b/vignettes/articles/Advanced-plotting.Rmd @@ -271,3 +271,5 @@ time_course %>% add_title("orientation = y") ``` +# Padding + diff --git a/vignettes/articles/Visualizing-data.Rmd b/vignettes/articles/Visualizing-data.Rmd index 4b32739f..71689d0a 100644 --- a/vignettes/articles/Visualizing-data.Rmd +++ b/vignettes/articles/Visualizing-data.Rmd @@ -20,12 +20,209 @@ knitr::opts_chunk$set( ```{r setup} library(tidyplots) ``` + +# Data points + +Plotting the raw data points is probably the most bare bone way to visualize a dataset. the corresponding function in tidyplots is called `add_data_points()`. + +```{r} +animals %>% + tidyplot(x = weight, y = size) %>% + add_data_points() +``` + +In the above example some data points appear to overlap other points. To account for this so called _overplotting_, you might want to add a thin white border around the points. This is achieved by setting the parameter `confetti = TRUE`. + +```{r} +animals %>% + tidyplot(x = weight, y = size) %>% + add_data_points(confetti = TRUE) +``` + +Another way is to make the points transparent using the `alpha` parameter. + +```{r} +animals %>% + tidyplot(x = weight, y = size) %>% + add_data_points(alpha = 0.3) +``` + +Or by changing the plotting symbol to an open `shape`. + +```{r} +animals %>% + tidyplot(x = weight, y = size) %>% + add_data_points(shape = 1) +``` + +However, data points can also be used when plotting a _discrete variable_ like `treatment` against a _continuous variable_ like `score`. + +```{r} +study %>% + tidyplot(x = treatment, y = score) %>% + add_data_points() +``` + +To avoid overplotting in this scenario, there are two additional options. You can add some random noise or _jitter_ to the y position. + +```{r} +study %>% + tidyplot(x = treatment, y = score) %>% + add_data_points_jitter() +``` + +Alternatively, you can use an algorithm that keeps the points centered and just moves potentially overlapping points to the sides. + +```{r} +study %>% + tidyplot(x = treatment, y = score) %>% + add_data_points_beeswarm() +``` + # Amounts -# Distributions +For some datasets, it makes sense to `count` or `sum` up, data points in order to arrive to a conclusion. As one example, let's have a look at the `spendings` dataset. + +```{r results='markup'} +spendings +``` + +As you can see, this dataset contains family spendings over a time period of 15 days in October. Here, it might be informative to see which spending categories are reoccurring and which are just one time spendings. + +```{r} +spendings %>% + tidyplot(x = category) %>% + add_count_bar() +``` + +One thing to note here is that the x axis labels are overlapping and are thus unreadable. There are at least two possible solutions for this. One is to swap the x and y axis. ```{r} +spendings %>% + tidyplot(y = category) %>% + add_count_bar() +``` + +The other one is to rotate the x axis labels. + +```{r} +spendings %>% + tidyplot(x = category) %>% + add_count_bar() %>% + adjust_x_axis(rotate_labels = TRUE) +``` + +Now we can appreciate that this family had reoccurring spendings for _Food_ but just one spending for _Housing_. + +Next, we ask the question how much was spend on each of the categories by plotting the `sum` amount. + +```{r} +spendings %>% + tidyplot(x = category, y = amount, color = category) %>% + add_sum_bar() %>% + adjust_x_axis(rotate_labels = TRUE) +``` + +Note that we had to introduce the parameter `y = amount` in the `tidyplot()` function to make it clear which variable should be summed up. + +I also added `color = category` in the `tidyplot()` function to have the variable `category` encoded by different colors. + +Since the data labels for the variable `category` are now duplicated in the plot, one could argue that it would be justified to remove the duplicated information on the x axis. + +```{r} +spendings %>% + tidyplot(x = category, y = amount, color = category) %>% + add_sum_bar() %>% + adjust_x_axis(rotate_labels = TRUE) %>% + remove_x_axis_labels() %>% + remove_x_axis_title() %>% + remove_x_axis_ticks() +``` +Note that besides the x axis labels, I also removed the x axis ticks and x axis title to achieve a clean look. + +Of course you are free to play around with different graphical representations of the sum values. Here is an example of a lollipop plot constructed from a thin `bar` and a `dot`. + +```{r} +spendings %>% + tidyplot(x = category, y = amount, color = category) %>% + add_sum_bar(width = 0.03) %>% + add_sum_dot() %>% + add_sum_value(accuracy = 1) %>% + adjust_x_axis(rotate_labels = TRUE) %>% + remove_x_axis_labels() %>% + remove_x_axis_title() %>% + remove_x_axis_ticks() +``` + +I also added the sum value as text label using the `add_sum_value()` function. + +# Heatmaps + +Heatmaps are a great way to plot a _continuous variable_ across to two _discrete variables_. To exemplify this, we will have a look at the `gene_expression` dataset. + +```{r results='markup'} +gene_expression %>% + dplyr::glimpse() +``` + +I used the `dplyr::glimpse()` function from the dplyr package get a nice overview of all variables including in the dataset. We will start by plotting the `expression` values of each `external_gene_name` across the `sample` variable. + +```{r} +gene_expression %>% + tidyplot(x = sample, y = external_gene_name, color = expression) %>% + add_heatmap() +``` + +One thing to note here is that the y axis labeks are overlapping. So let's increase the height of the plot area from 50 to 80 mm. + +```{r fig.height=5} +gene_expression %>% + tidyplot(x = sample, y = external_gene_name, color = expression) %>% + add_heatmap() %>% + adjust_plot_area_size(height = 80) +``` + +The next thing to note is that some of the rows like _Map1a_ and _Kif1a_ show very high expression while others show much lower values. Let's apply a classical technique to reserve the variations in the color for differences within each row. This is done by calculating _row z scores_ for each row individually. Luckily, tidyplots does this for us when setting the parameter `scale = "row"` withing the `add_heatmap()` function call. + +```{r} +gene_expression %>% + tidyplot(x = sample, y = external_gene_name, color = expression) %>% + add_heatmap(scale = "row") %>% + adjust_plot_area_size(height = 80) +``` + +Now it much easier to appreciate the dynamics of individual genes across the samples on the x axis. + +However, the rows appear to be mixed. Some having rather high expression in the "Eip" samples while others have high value in the "Hip" samples. Conveniently, in the dataset there is a variable called `direction`, which is either "up" or "down". Let's use this variable to sort our y axis. + +```{r} +gene_expression %>% + tidyplot(x = sample, y = external_gene_name, color = expression) %>% + add_heatmap(scale = "row") %>% + adjust_plot_area_size(height = 80) %>% + sort_y_axis_labels(direction) +``` + +This starts looking like a classical gene expression heatmap + +# Central tendency + + + +```{r} + +``` + +# Dispersion & uncertainty + +```{r} + +``` + +# Distribution + +```{r} distributions %>% tidyplot(name, value) %>% add_mean_bar(alpha = 0.3) %>% @@ -59,27 +256,11 @@ energy %>% tidyplot(x = energy_type, y = power, color = energy_type) %>% add_violin() %>% add_data_points_beeswarm(jitter_width = 0.8, alpha = 0.3) - ``` - -# Proportions +# Proportion ```{r} - -# themes - -p4 <- - energy %>% - tidyplot(year, power, color = energy_source) %>% - add_barstack_absolute() - -p4 -p4 %>% theme_minimal_y() -p4 %>% theme_ggplot2() - -# proportions - energy %>% tidyplot(year, power, color = energy_source) %>% add_barstack_absolute() @@ -163,7 +344,23 @@ animals %>% animals %>% tidyplot(x = diet, y = speed, color = family) %>% add_barstack_relative() +``` + +# Statistical comparison + +```{r} + +``` +# Annotation + +```{r} + +``` + + + +```{r} # curves energy_week %>%