diff --git a/R/getDegTx.R b/R/getDegTx.R index 04de54d..cba6979 100644 --- a/R/getDegTx.R +++ b/R/getDegTx.R @@ -9,10 +9,10 @@ #' object containing the transcript data desired to be studied. #' @param type A `character(1)` specifying the transcripts set type. #' These were determined by Joshua M. Stolz et al, 2022. Here the names "cell_component", "top1500", -#' and "standard" refer to models that were determined to be effective in removing degradation effects. -#' The "standard" model involves taking the union of the top 1000 transcripts +#' and "top1000" refer to models that were determined to be effective in removing degradation effects. +#' The "top1000" model involves taking the union of the top 1000 transcripts #' associated with degradation from the interaction model and the main effect model. -#' The "top1500" model is the same as the "standard model except the +#' The "top1500" model is the same as the "top1000 model except the #' union of the top 1500 genes associated with degradation is selected. #' The most effective of our models, "cell_component", involved deconvolution of #' the degradation matrix to determine the proportion of cell types within our studied tissue. @@ -32,8 +32,8 @@ #' @import rlang #' #' @examples -#' degTx <- getDegTx(rse_tx, "standard") -getDegTx <- function(rse_tx, type = c("cell_component", "standard", "top1500"), +#' degTx <- getDegTx(rse_tx, "top1000") +getDegTx <- function(rse_tx, type = c("cell_component", "top1000", "top1500"), sig_transcripts = NULL, assayname = "tpm", verbose = TRUE) { # type = arg_match(type) if (is.null(sig_transcripts)) { diff --git a/R/qSVA.R b/R/qSVA.R index ba67a83..327d8cd 100644 --- a/R/qSVA.R +++ b/R/qSVA.R @@ -35,7 +35,7 @@ #' qSVA(rse_tx = rse_tx, type = "cell_component", mod = mod, assayname = "tpm") #' qSVA <- - function(rse_tx, type = c("cell_component", "standard", "top1500"), + function(rse_tx, type = c("cell_component", "top1000", "top1500"), sig_transcripts = NULL, mod, assayname) { if (is.null(sig_transcripts)) { type <- arg_match(type) # must be one of those in the list if sig_transcripts is NULL diff --git a/R/select_transcripts.R b/R/select_transcripts.R index a7277fd..218a2da 100644 --- a/R/select_transcripts.R +++ b/R/select_transcripts.R @@ -4,11 +4,11 @@ #' #' @param type A `character(1)` specifying the transcripts set type. #' These were determined by Joshua M. Stolz -#' et al, 2022. Here the names "cell_component", "top1500", and "standard" refer +#' et al, 2022. Here the names "cell_component", "top1500", and "top1000" refer #' to models that were determined to be effective in removing degradation -#' effects. The "standard" model involves taking the union of the top 1000 +#' effects. The "top1000" model involves taking the union of the top 1000 #' transcripts associated with degradation from the interaction model and the -#' main effect model. The "top1500" model is the same as the "standard" model +#' main effect model. The "top1500" model is the same as the "top1000" model #' except the union of the top 1500 genes associated with degradation is #' selected. The most effective of our models, "cell_component", involved #' deconvolution of the degradation matrix to determine the proportion of cell @@ -28,13 +28,13 @@ #' #' ## Example where match.arg() auto-completes #' select_transcripts("top") -select_transcripts <- function(type = c("cell_component", "top1500", "standard")) { +select_transcripts <- function(type = c("cell_component", "top1500", "top1000")) { type <- match.arg(type) if (type == "cell_component") { return(qsvaR::transcripts$cell_component) } else if (type == "top1500") { return(qsvaR::transcripts$tx1500) - } else if (type == "standard") { + } else if (type == "top1000") { return(qsvaR::transcripts$standard) } } diff --git a/R/transcripts-data.R b/R/transcripts-data.R index cf9b861..82c563b 100644 --- a/R/transcripts-data.R +++ b/R/transcripts-data.R @@ -2,11 +2,11 @@ #' #' An object storing three lists of transcripts each corresponding to a model #' used in the degradation experiment. These were determined by Joshua M. Stolz -#' et al, 2022. Here the names "cell_component", "top1500", and "standard" refer +#' et al, 2022. Here the names "cell_component", "top1500", and "top1000" refer #' to models that were determined to be effective in removing degradation -#' effects. The "standard" model involves taking the union of the top 1000 +#' effects. The "top1000" model involves taking the union of the top 1000 #' transcripts associated with degradation from the interaction model and the -#' main effect model. The "top1500" model is the same as the "standard" model +#' main effect model. The "top1500" model is the same as the "top1000" model #' except the union of the top 1500 genes associated with degradation is #' selected. The most effective of our models, "cell_component", involved #' deconvolution of the degradation matrix to determine the proportion of cell diff --git a/man/getDegTx.Rd b/man/getDegTx.Rd index baefbbb..fa1afa3 100644 --- a/man/getDegTx.Rd +++ b/man/getDegTx.Rd @@ -6,7 +6,7 @@ \usage{ getDegTx( rse_tx, - type = c("cell_component", "standard", "top1500"), + type = c("cell_component", "top1000", "top1500"), sig_transcripts = NULL, assayname = "tpm", verbose = TRUE @@ -18,10 +18,10 @@ object containing the transcript data desired to be studied.} \item{type}{A \code{character(1)} specifying the transcripts set type. These were determined by Joshua M. Stolz et al, 2022. Here the names "cell_component", "top1500", -and "standard" refer to models that were determined to be effective in removing degradation effects. -The "standard" model involves taking the union of the top 1000 transcripts +and "top1000" refer to models that were determined to be effective in removing degradation effects. +The "top1000" model involves taking the union of the top 1000 transcripts associated with degradation from the interaction model and the main effect model. -The "top1500" model is the same as the "standard model except the +The "top1500" model is the same as the "top1000 model except the union of the top 1500 genes associated with degradation is selected. The most effective of our models, "cell_component", involved deconvolution of the degradation matrix to determine the proportion of cell types within our studied tissue. @@ -46,5 +46,5 @@ postmortem brain tissues. This object can later be used to obtain the principle necessary to remove the effect of degradation in differential expression. } \examples{ -degTx <- getDegTx(rse_tx, "standard") +degTx <- getDegTx(rse_tx, "top1000") } diff --git a/man/normalize_tx_names.Rd b/man/normalize_tx_names.Rd index 5e4174b..7b08381 100644 --- a/man/normalize_tx_names.Rd +++ b/man/normalize_tx_names.Rd @@ -17,5 +17,5 @@ A This function removes the Gencode/ENSEMBL version from the transcript ID, while protecting _PAR_Y suffixes if present } \examples{ -ensIDs <- normalize_tx_names(rownames(rse_tx)) +ensIDs <- normalize_tx_names(rownames(rse_tx)) } diff --git a/man/qSVA.Rd b/man/qSVA.Rd index 30bae9b..2f52cd6 100644 --- a/man/qSVA.Rd +++ b/man/qSVA.Rd @@ -6,7 +6,7 @@ \usage{ qSVA( rse_tx, - type = c("cell_component", "standard", "top1500"), + type = c("cell_component", "top1000", "top1500"), sig_transcripts = NULL, mod, assayname diff --git a/man/select_transcripts.Rd b/man/select_transcripts.Rd index 1614fcb..02dfbd4 100644 --- a/man/select_transcripts.Rd +++ b/man/select_transcripts.Rd @@ -4,16 +4,16 @@ \alias{select_transcripts} \title{Select transcripts associated with degradation} \usage{ -select_transcripts(type = c("cell_component", "top1500", "standard")) +select_transcripts(type = c("cell_component", "top1500", "top1000")) } \arguments{ \item{type}{A \code{character(1)} specifying the transcripts set type. These were determined by Joshua M. Stolz -et al, 2022. Here the names "cell_component", "top1500", and "standard" refer +et al, 2022. Here the names "cell_component", "top1500", and "top1000" refer to models that were determined to be effective in removing degradation -effects. The "standard" model involves taking the union of the top 1000 +effects. The "top1000" model involves taking the union of the top 1000 transcripts associated with degradation from the interaction model and the -main effect model. The "top1500" model is the same as the "standard" model +main effect model. The "top1500" model is the same as the "top1000" model except the union of the top 1500 genes associated with degradation is selected. The most effective of our models, "cell_component", involved deconvolution of the degradation matrix to determine the proportion of cell diff --git a/man/transcripts.Rd b/man/transcripts.Rd index 4b560fc..ba1d1d6 100644 --- a/man/transcripts.Rd +++ b/man/transcripts.Rd @@ -14,11 +14,11 @@ transcripts \description{ An object storing three lists of transcripts each corresponding to a model used in the degradation experiment. These were determined by Joshua M. Stolz -et al, 2022. Here the names "cell_component", "top1500", and "standard" refer +et al, 2022. Here the names "cell_component", "top1500", and "top1000" refer to models that were determined to be effective in removing degradation -effects. The "standard" model involves taking the union of the top 1000 +effects. The "top1000" model involves taking the union of the top 1000 transcripts associated with degradation from the interaction model and the -main effect model. The "top1500" model is the same as the "standard" model +main effect model. The "top1500" model is the same as the "top1000" model except the union of the top 1500 genes associated with degradation is selected. The most effective of our models, "cell_component", involved deconvolution of the degradation matrix to determine the proportion of cell diff --git a/man/which_tx_names.Rd b/man/which_tx_names.Rd index e0f7f37..254722f 100644 --- a/man/which_tx_names.Rd +++ b/man/which_tx_names.Rd @@ -20,6 +20,6 @@ This function is used to check if tx1 and tx2 are GENCODE or ENSEMBL transcript and return an integer vector of tx1 transcript indexes that are in tx2. } \examples{ -sig_tx <- select_transcripts("cell_component") +sig_tx <- select_transcripts("cell_component") whichTx <- which_tx_names(rownames(rse_tx), sig_tx) } diff --git a/vignettes/Intro_qsvaR.Rmd b/vignettes/Intro_qsvaR.Rmd index 2627df4..9b44a50 100644 --- a/vignettes/Intro_qsvaR.Rmd +++ b/vignettes/Intro_qsvaR.Rmd @@ -151,7 +151,7 @@ rse_tx <- rse_tx[rowMeans(assays(rse_tx)$tpm) > 0.3, ] ## Get Degradation Matrix -In this next step we subset for the transcripts associated with degradation. These were determined by Joshua M. Stolz et al, 2022. We have provided three models to choose from. Here the names `"cell_component"`, `"top1500"`, and `"standard"` refer to models that were determined to be effective in removing degradation effects. The `"standard"` model involves taking the union of the top 1000 transcripts associated with degradation from the interaction model and the main effect model. The `"top1500"` model is the same as the `"standard"` model except the union of the top 1500 genes associated with degradation is selected. The most effective of our models, `"cell_component"`, involved deconvolution of the degradation matrix to determine the proportion of cell types within our studied tissue. These proportions were then added to our `model.matrix()` and the union of the top 1000 transcripts in the interaction model, the main effect model, and the cell proportions model were used to generate this model of quality surrogate variables (qSVs). In this example we will choose `"cell_component"` when using the `getDegTx()` and `select_transcripts()` functions. +In this next step we subset for the transcripts associated with degradation. These were determined by Joshua M. Stolz et al, 2022. We have provided three models to choose from. Here the names `"cell_component"`, `"top1500"`, and `"top1000"` refer to models that were determined to be effective in removing degradation effects. The `"top1000"` model involves taking the union of the top 1000 transcripts associated with degradation from the interaction model and the main effect model. The `"top1500"` model is the same as the `"top1000"` model except the union of the top 1500 genes associated with degradation is selected. The most effective of our models, `"cell_component"`, involved deconvolution of the degradation matrix to determine the proportion of cell types within our studied tissue. These proportions were then added to our `model.matrix()` and the union of the top 1000 transcripts in the interaction model, the main effect model, and the cell proportions model were used to generate this model of quality surrogate variables (qSVs). In this example we will choose `"cell_component"` when using the `getDegTx()` and `select_transcripts()` functions. ```{r VennDiagram,fig.cap="The above venn diagram shows the overlap between transcripts in each of the previously mentioned models.", echo=FALSE} knitr::include_graphics("../man/figures/transcripts_venn_diagramm.png")