From 4bf0e6258558c801e2152ed99a4bb09f6225b95a Mon Sep 17 00:00:00 2001 From: Mattia Greco Date: Mon, 8 Jan 2024 13:51:18 +0100 Subject: [PATCH] modify filter functions --- R/filter.R | 89 +++++++++++++++++++++++++----------- man/filter_by_coordinates.Rd | 2 +- man/filter_by_month.Rd | 2 +- man/filter_by_species.Rd | 4 +- man/filter_by_year.Rd | 2 +- 5 files changed, 68 insertions(+), 31 deletions(-) diff --git a/R/filter.R b/R/filter.R index ee4e579..9a30e56 100644 --- a/R/filter.R +++ b/R/filter.R @@ -12,12 +12,12 @@ reshape_forcis <- function(data){ if (get_data_type(data) %in% c("CPR North")) { - stop(paste0("This function is not designed to work with 'CPR North' data"), call. = FALSE) + stop("This function is not designed to work with 'CPR North' data", call. = FALSE) } taxa_cols <- get_species_names(data) metadat_cols <- get_required_columns() - dat_reshaped <- dat_reshaped %>% + dat_reshaped <- data %>% select(all_of(taxa_cols),metadat_cols) %>% pivot_longer(all_of(taxa_cols), names_to = 'taxa', @@ -29,7 +29,7 @@ reshape_forcis <- function(data){ #' Filter forcis data by year of sampling #' -#' @param data forcis data in long format +#' @param data forcis data #' @param years numeric vector of selected years #' #' @return A `data.frame`. @@ -40,21 +40,32 @@ reshape_forcis <- function(data){ filter_by_year <- function(data,years){ year_vector <- as.numeric(years) - filtered_dat <- data %>% - filter(! is.na(.data$profile_date_time)) %>% - mutate(new_profile_date_time = dmy(.data$profile_date_time)) %>% - mutate(year=year(.data$new_profile_date_time)) %>% - filter(.data$year %in% year_vector) %>% - select(-c(.data$year,.data$new_profile_date_time)) - - return(filtered_dat) - + + if (get_data_type(data)=="Sediment trap"){ + filtered_dat <- data %>% + filter(! is.na(.data$sample_date_time_start)) %>% + mutate(new_sample_date_start=gsub(' .*','',sample_date_time_start)) %>% + mutate(new_sample_date_start = dmy(.data$new_sample_date_start)) %>% + mutate(year=year(.data$new_sample_date_start)) %>% + filter(.data$year %in% year_vector) %>% + select(-c(.data$year,.data$new_sample_date_start)) + return(filtered_dat) + + } else { + filtered_dat <- data %>% + filter(! is.na(.data$profile_date_time)) %>% + mutate(new_profile_date_time = dmy(.data$profile_date_time)) %>% + mutate(year=year(.data$new_profile_date_time)) %>% + filter(.data$year %in% year_vector) %>% + select(-c(.data$year,.data$new_profile_date_time)) + return(filtered_dat) + } } #' Filter forcis data by month of sampling #' -#' @param data forcis data in long format +#' @param data forcis data #' @param months numeric vector of selected months #' #' @return A `data.frame`. @@ -66,22 +77,32 @@ filter_by_year <- function(data,years){ filter_by_month <- function(data,months){ month_vector <- as.numeric(months) - - filtered_dat <- data %>% - filter(! is.na(.data$profile_date_time)) %>% - mutate(new_profile_date_time =dmy(.data$profile_date_time)) %>% - mutate(month=month(.data$new_profile_date_time)) %>% - filter(.data$month %in% month_vector)%>% - select(-c(.data$month,.data$new_profile_date_time)) - - return(filtered_dat) - + if (get_data_type(data)=="Sediment trap"){ + filtered_dat <- data %>% + filter(! is.na(.data$sample_date_time_start)) %>% + mutate(new_sample_date_start=gsub(' .*','',sample_date_time_start)) %>% + mutate(new_sample_date_start = dmy(.data$new_sample_date_start)) %>% + mutate(month=month(.data$new_sample_date_start)) %>% + filter(.data$month %in% month_vector) %>% + select(-c(.data$month,.data$new_sample_date_start)) + return(filtered_dat) + + } else { + filtered_dat <- data %>% + filter(! is.na(.data$profile_date_time)) %>% + mutate(new_profile_date_time =dmy(.data$profile_date_time)) %>% + mutate(month=month(.data$new_profile_date_time)) %>% + filter(.data$month %in% month_vector)%>% + select(-c(.data$month,.data$new_profile_date_time)) + + return(filtered_dat) + } } #' Filter forcis data by coordinate square #' -#' @param data forcis data in long format +#' @param data forcis data #' @param coord_square a numeric vector containing in this order minimum latitute, #' minimum longitude, maximum latitude, maximum longitude #' @@ -113,19 +134,33 @@ filter_by_coordinates <- function(data, coord_square){ #' #' @param data forcis data in long format, except for CPR North data #' @param species a character vector listing species of interest -#' +#' @param remove_NAs logical, If FALSE, retains all taxa including those with NA counts #' @return A `data.frame` #' @export #' #' @examples #' ## ADD EXAMPLE ---- -filter_by_species <- function (data,species ){ +filter_by_species <- function (data,species, remove_NAs=TRUE ){ my_species <- as.character(species) + taxa_cols <- get_species_names(data) + + if (length(taxa_cols)>0) { + stop("This function requires data in long format", call. = FALSE) + } + + if (get_data_type(data) %in% c("CPR North")) { + stop("This function is not designed to work with 'CPR North' data", call. = FALSE) + } filtered_dat <- data %>% filter(.data$taxa %in% my_species) - # filter(! is.na(counts)) + + if(remove_NAs) { + + filtered_dat <- filtered_dat %>% + filter(! is.na(.data$counts)) + } return(filtered_dat) } diff --git a/man/filter_by_coordinates.Rd b/man/filter_by_coordinates.Rd index 9939af1..1353e2b 100644 --- a/man/filter_by_coordinates.Rd +++ b/man/filter_by_coordinates.Rd @@ -7,7 +7,7 @@ filter_by_coordinates(data, coord_square) } \arguments{ -\item{data}{forcis data in long format} +\item{data}{forcis data} \item{coord_square}{a numeric vector containing in this order minimum latitute, minimum longitude, maximum latitude, maximum longitude} diff --git a/man/filter_by_month.Rd b/man/filter_by_month.Rd index b28736f..ee112fa 100644 --- a/man/filter_by_month.Rd +++ b/man/filter_by_month.Rd @@ -7,7 +7,7 @@ filter_by_month(data, months) } \arguments{ -\item{data}{forcis data in long format} +\item{data}{forcis data} \item{months}{numeric vector of selected months} } diff --git a/man/filter_by_species.Rd b/man/filter_by_species.Rd index 347b9b5..2f6fe1d 100644 --- a/man/filter_by_species.Rd +++ b/man/filter_by_species.Rd @@ -4,12 +4,14 @@ \alias{filter_by_species} \title{Filter forcis data by species} \usage{ -filter_by_species(data, species) +filter_by_species(data, species, remove_NAs = TRUE) } \arguments{ \item{data}{forcis data in long format, except for CPR North data} \item{species}{a character vector listing species of interest} + +\item{remove_NAs}{logical, If FALSE, retains all taxa including those with NA counts} } \value{ A \code{data.frame} diff --git a/man/filter_by_year.Rd b/man/filter_by_year.Rd index b038dd8..eae2dbd 100644 --- a/man/filter_by_year.Rd +++ b/man/filter_by_year.Rd @@ -7,7 +7,7 @@ filter_by_year(data, years) } \arguments{ -\item{data}{forcis data in long format} +\item{data}{forcis data} \item{years}{numeric vector of selected years} }