diff --git a/R/DEqual.R b/R/DEqual.R index 488e2ed..9ea4a67 100644 --- a/R/DEqual.R +++ b/R/DEqual.R @@ -16,6 +16,14 @@ #' @param deg_tstats an optional`data.frame()` with a column "t" containing #' t-statistics resulted from a degradation experiment. Default is the #' internal `qsvaR::degradation_tstats` from the package authors. +#' @param show.legend logical (default TRUE) to show legend in the plot +#' @param show.cor specify where to show the correlation value. Can be one of +#' "caption", "corner-top", "corner-bottom", or "none". +#' @param font.size numeric value to set the base font size of the plot +#' @param cor.size numeric (default font.size/2) to set the font size for the +#' correlation text +#' @param cor.label character (default "cor: ") to set the text preceding the +#' correlation value #' #' @return a `ggplot` object of the DE t-statistic vs #' the DE statistic from degradation diff --git a/man/DEqual.Rd b/man/DEqual.Rd new file mode 100644 index 0000000..a47fbd1 --- /dev/null +++ b/man/DEqual.Rd @@ -0,0 +1,69 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/DEqual.R +\name{DEqual} +\alias{DEqual} +\title{Differential expression quality (DEqual) plot} +\usage{ +DEqual( + DE, + deg_tstats = qsvaR::degradation_tstats, + show.legend = TRUE, + show.cor = c("caption", "corner-top", "corner-bottom", "none"), + font.size = 12, + cor.size = font.size/2, + cor.label = "cor: " +) +} +\arguments{ +\item{DE}{a \code{data.frame()} with a column "t" containing the t-statistics +from Differential Expression, typically generated with \code{limma::topTable()}. +\code{rownames(DE)} must have transcript Ensembl/Gencode IDs.} + +\item{deg_tstats}{an optional\code{data.frame()} with a column "t" containing +t-statistics resulted from a degradation experiment. Default is the +internal \code{qsvaR::degradation_tstats} from the package authors.} + +\item{show.legend}{logical (default TRUE) to show legend in the plot} + +\item{show.cor}{specify where to show the correlation value. Can be one of +"caption", "corner-top", "corner-bottom", or "none".} + +\item{font.size}{numeric value to set the base font size of the plot} + +\item{cor.size}{numeric (default font.size/2) to set the font size for the +correlation text} + +\item{cor.label}{character (default "cor: ") to set the text preceding the +correlation value} +} +\value{ +a \code{ggplot} object of the DE t-statistic vs +the DE statistic from degradation +} +\description{ +A DEqual plot compares the effect of RNA degradation from an independent +degradation experiment on the y axis to the effect of the outcome of +interest. They were orignally described by Jaffe et al, PNAS, 2017 +\url{https://doi.org/10.1073/pnas.1617384114}. Other DEqual versions are +included in Collado-Torres et al, Neuron, 2019 +\url{https://doi.org/10.1016/j.neuron.2019.05.013}. This function compares your +t-statistics of interest computed on transcripts against the +t-statistics from degradation time adjusting for the six brain regions from +degradation experiment data used for determining \code{rse_tx}. +} +\examples{ + +## Random differential expression t-statistics for the same transcripts +## we have degradation t-statistics for in `degradation_tstats`. +set.seed(101) +random_de <- data.frame( + t = rt(nrow(degradation_tstats), 5), + row.names = sample( + rownames(degradation_tstats), + nrow(degradation_tstats) + ) +) + +## Create the DEqual plot +DEqual(random_de) +} diff --git a/man/degradation_tstats.Rd b/man/degradation_tstats.Rd new file mode 100644 index 0000000..545c578 --- /dev/null +++ b/man/degradation_tstats.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/degradation_tstats-data.R +\docType{data} +\name{degradation_tstats} +\alias{degradation_tstats} +\title{Degradation time t-statistics} +\format{ +A \code{data.frame()} with the \code{t} statistics for degradation time. The +\code{rownames()} are the GENCODE transcript IDs. +} +\description{ +These t-statistics are derived from the degradation timepoints data +built into qsvaR. They are the results from multiple models where +we determined the association of transcripts with degradation time +adjusting for brain region (so parallel degradation effects across +brain regions). They are used for plotting in \code{DEqual()}. +} +\seealso{ +\link{DEqual} +} +\keyword{datasets} diff --git a/man/getDegTx.Rd b/man/getDegTx.Rd new file mode 100644 index 0000000..baefbbb --- /dev/null +++ b/man/getDegTx.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/getDegTx.R +\name{getDegTx} +\alias{getDegTx} +\title{Obtain expression matrix for degraded transcripts} +\usage{ +getDegTx( + rse_tx, + type = c("cell_component", "standard", "top1500"), + sig_transcripts = NULL, + assayname = "tpm", + verbose = TRUE +) +} +\arguments{ +\item{rse_tx}{A \link[SummarizedExperiment:RangedSummarizedExperiment-class]{RangedSummarizedExperiment-class} +object containing the transcript data desired to be studied.} + +\item{type}{A \code{character(1)} specifying the transcripts set type. +These were determined by Joshua M. Stolz et al, 2022. Here the names "cell_component", "top1500", +and "standard" refer to models that were determined to be effective in removing degradation effects. +The "standard" model involves taking the union of the top 1000 transcripts +associated with degradation from the interaction model and the main effect model. +The "top1500" model is the same as the "standard model except the +union of the top 1500 genes associated with degradation is selected. +The most effective of our models, "cell_component", involved deconvolution of +the degradation matrix to determine the proportion of cell types within our studied tissue. +These proportions were then added to our \code{model.matrix()} and the union of the top 1000 transcripts in the interaction model, +the main effect model, and the cell proportions model were used to generate this model of qSVs.} + +\item{sig_transcripts}{A list of transcripts determined to have degradation signal in the qsva expanded paper.} + +\item{assayname}{character string specifying the name of the assay desired in rse_tx} + +\item{verbose}{specify if the function should report how many model transcripts were matched} +} +\value{ +A +\link[SummarizedExperiment:RangedSummarizedExperiment-class]{RangedSummarizedExperiment-class} +object. +} +\description{ +This function is used to obtain a \link[SummarizedExperiment:RangedSummarizedExperiment-class]{RangedSummarizedExperiment-class} +of transcripts and their expression values #' These transcripts are selected based on a prior study of RNA degradation in +postmortem brain tissues. This object can later be used to obtain the principle components +necessary to remove the effect of degradation in differential expression. +} +\examples{ +degTx <- getDegTx(rse_tx, "standard") +} diff --git a/man/getPCs.Rd b/man/getPCs.Rd new file mode 100644 index 0000000..9a50d78 --- /dev/null +++ b/man/getPCs.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/getPCs.R +\name{getPCs} +\alias{getPCs} +\title{PCs from transcripts} +\usage{ +getPCs(rse_tx, assayname = "tpm") +} +\arguments{ +\item{rse_tx}{Ranged Summarizeed Experiment with only trancsripts selected for qsva} + +\item{assayname}{character string specifying the name of the assay desired in rse_tx} +} +\value{ +prcomp object generated by taking the pcs of degraded transcripts +} +\description{ +This function returns the pcs from the obtained RangedSummarizedExperiment object of selected transcripts +} +\examples{ +getPCs(rse_tx, "tpm") +} diff --git a/man/get_qsvs.Rd b/man/get_qsvs.Rd new file mode 100644 index 0000000..68a51d2 --- /dev/null +++ b/man/get_qsvs.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_qsvs.R +\name{get_qsvs} +\alias{get_qsvs} +\title{Generate matrix of qsvs} +\usage{ +get_qsvs(qsvPCs, k) +} +\arguments{ +\item{qsvPCs}{prcomp object generated by taking +the pcs of degraded transcripts} + +\item{k}{number of qsvs to be included.} +} +\value{ +matrix with k principal components for each sample. +} +\description{ +Using the pcs and the k number of components be included, +we generate the qsva matrix. +} +\examples{ +qsv <- getPCs(rse_tx, "tpm") +get_qsvs(qsv, 2) +} diff --git a/man/k_qsvs.Rd b/man/k_qsvs.Rd new file mode 100644 index 0000000..90cb1a4 --- /dev/null +++ b/man/k_qsvs.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/k_qsvs.R +\name{k_qsvs} +\alias{k_qsvs} +\title{Apply num.sv algorithm to determine the number of pcs to be included} +\usage{ +k_qsvs(rse_tx, mod, assayname) +} +\arguments{ +\item{rse_tx}{A \link[SummarizedExperiment:RangedSummarizedExperiment-class]{RangedSummarizedExperiment-class} object containing +the transcript data desired to be studied.} + +\item{mod}{Model Matrix with necessary variables the you would +model for in differential expression} + +\item{assayname}{character string specifying the name of the assay desired in rse_tx} +} +\value{ +integer representing number of pcs to be included +} +\description{ +Apply num.sv algorithm to determine the number of pcs to be included +} +\examples{ +## First we need to define a statistical model. We'll use the example +## rse_tx data. Note that the model you'll use in your own data +## might look different from this model. +mod <- model.matrix(~ mitoRate + Region + rRNA_rate + totalAssignedGene + RIN, + data = colData(rse_tx) +) + +## To ensure that the results are reproducible, you will need to set a +## random seed with the set.seed() function. Internally, we are using +## sva::num.sv() which needs a random seed to ensure reproducibility of the +## results. +set.seed(20230621) +k_qsvs(rse_tx, mod, "tpm") +} diff --git a/man/normalize_tx_names.Rd b/man/normalize_tx_names.Rd new file mode 100644 index 0000000..5e4174b --- /dev/null +++ b/man/normalize_tx_names.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{normalize_tx_names} +\alias{normalize_tx_names} +\title{Remove version number from Gencode/Ensembl transcript names} +\usage{ +normalize_tx_names(txnames) +} +\arguments{ +\item{txnames}{A \code{character()} vector of GENCODE or ENSEMBL transcript IDs} +} +\value{ +A +\code{character()} vector of transcript names without versioning +} +\description{ +This function removes the Gencode/ENSEMBL version from the transcript ID, while protecting _PAR_Y suffixes if present +} +\examples{ +ensIDs <- normalize_tx_names(rownames(rse_tx)) +} diff --git a/man/qSVA.Rd b/man/qSVA.Rd new file mode 100644 index 0000000..30bae9b --- /dev/null +++ b/man/qSVA.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/qSVA.R +\name{qSVA} +\alias{qSVA} +\title{A wrapper function used to perform qSVA in one step.} +\usage{ +qSVA( + rse_tx, + type = c("cell_component", "standard", "top1500"), + sig_transcripts = NULL, + mod, + assayname +) +} +\arguments{ +\item{rse_tx}{A \link[SummarizedExperiment:RangedSummarizedExperiment-class]{RangedSummarizedExperiment-class} object containing +the transcript data desired to be studied.} + +\item{type}{a character string specifying which model you would +like to use from the sets of signature transcripts identified +by the qsvaR package. This can be omitted if a custom set of +transcripts is provided to sig_transcripts.} + +\item{sig_transcripts}{A list of transcript IDs that are associated +with degradation signal. Specifying a \code{character()} input with ENSEMBL +transcript IDs (whose values should match entries in \code{rownames(rse_tx)}). +This argument provides a custom list of transcripts for adjusting +for degradation; this should be used instead of the \code{type} argument.} + +\item{mod}{Model Matrix with necessary variables the you would +model for in differential expression} + +\item{assayname}{character string specifying the name of +the assay desired in rse_tx} +} +\value{ +matrix with k principal components for each sample +} +\description{ +A wrapper function used to perform qSVA in one step. +} +\examples{ +## First we need to define a statistical model. We'll use the example +## rse_tx data. Note that the model you'll use in your own data +## might look different from this model. +mod <- model.matrix(~ mitoRate + Region + rRNA_rate + totalAssignedGene + RIN, + data = colData(rse_tx) +) + +## To ensure that the results are reproducible, you will need to set a +## random seed with the set.seed() function. Internally, we are using +## sva::num.sv() which needs a random seed to ensure reproducibility of the +## results. +set.seed(20230621) +qSVA(rse_tx = rse_tx, type = "cell_component", mod = mod, assayname = "tpm") + +} diff --git a/man/rse_tx.Rd b/man/rse_tx.Rd new file mode 100644 index 0000000..4edc43e --- /dev/null +++ b/man/rse_tx.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rse_tx-data.R +\docType{data} +\name{rse_tx} +\alias{rse_tx} +\title{Example of RSE object with RNA-seq transcript quantification data} +\format{ +A \link[SummarizedExperiment:RangedSummarizedExperiment-class]{RangedSummarizedExperiment-class} +} +\description{ +This data is a \link[SummarizedExperiment:RangedSummarizedExperiment-class]{RangedSummarizedExperiment-class} +with transcript quantification data stored in an "tpm" assay. It is +used to demonstrate the use of qsvaR in bulk RNA-seq data. +} +\seealso{ +\link{getPCs} \link{k_qsvs} \link{getDegTx} \link{qSVA} +} +\keyword{datasets} diff --git a/man/select_transcripts.Rd b/man/select_transcripts.Rd new file mode 100644 index 0000000..cbbe671 --- /dev/null +++ b/man/select_transcripts.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/select_transcripts.R +\name{select_transcripts} +\alias{select_transcripts} +\title{Select transcripts associated with degradation} +\usage{ +select_transcripts(type = c("cell_component", "top1500", "standard")) +} +\arguments{ +\item{type}{A \code{character(1)} specifying the transcripts set type. +These were determined by Joshua M. Stolz et al, 2022. Here the names "cell_component", "top1500", and "standard" refer to models that were determined to be effective in removing degradation effects. +The "standard" model involves taking the union of the top 1000 transcripts associated with degradation from the interaction model and the main effect model. +The "top1500" model is the same as the "standard model except the union of the top 1500 genes associated with degradation is selected. +The most effective of our models, "cell_component", involved deconvolution of the degradation matrix to determine the proportion of cell types within our studied tissue. +These proportions were then added to our \code{model.matrix()} and the union of the top 1000 transcripts in the interaction model, the main effect model, and the cell proportions model were used to generate this model of qSVs.} +} +\value{ +A \code{character()} with the transcript IDs. +} +\description{ +Helper function to select which experimental model will be used to generate the qSVs. +} +\examples{ +## Default set of transcripts associated with degradation +sig_transcripts <- select_transcripts() +length(sig_transcripts) +head(sig_transcripts) + +## Example where match.arg() auto-completes +select_transcripts("top") +} diff --git a/man/transcripts.Rd b/man/transcripts.Rd new file mode 100644 index 0000000..1721a58 --- /dev/null +++ b/man/transcripts.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/transcripts-data.R +\docType{data} +\name{transcripts} +\alias{transcripts} +\title{Transcripts for Degradation Models} +\format{ +A \code{list()} with character strings containing the transcripts selected by each model. +Each string is a GENCODE transcript IDs. +} +\usage{ +transcripts +} +\description{ +An object storing three lists of transcripts each corresponding to a model used in the degradation experiment. +These were determined by Joshua M. Stolz et al, 2022. Here the names "cell_component", "top1500", and "standard" refer to models that were determined to be effective in removing degradation effects. +The "standard" model involves taking the union of the top 1000 transcripts associated with degradation from the interaction model and the main effect model. +The "top1500" model is the same as the "standard" model except the union of the top 1500 genes associated with degradation is selected. +The most effective of our models, "cell_component", involved deconvolution of the degradation matrix to determine the proportion of cell types within our studied tissue. +These proportions were then added to our \code{model.matrix()} and the union of the top 1000 transcripts in the interaction model, the main effect model, and the cell proportions model were used to generate this model of qSVs. +} +\seealso{ +\link{select_transcripts} +} +\keyword{datasets} diff --git a/man/which_tx_names.Rd b/man/which_tx_names.Rd new file mode 100644 index 0000000..e0f7f37 --- /dev/null +++ b/man/which_tx_names.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{which_tx_names} +\alias{which_tx_names} +\title{Check validity of transcript vectors and return a vector matching indexes in tx1} +\usage{ +which_tx_names(txnames, sig_tx) +} +\arguments{ +\item{txnames}{A \code{character()} vector of GENCODE or ENSEMBL transcript IDs.} + +\item{sig_tx}{A \code{character()} vector of GENCODE or ENSEMBL signature transcript IDs.} +} +\value{ +A +\code{integer()} vector of \code{txnames} transcript indexes in \code{sig_tx}. +} +\description{ +This function is used to check if tx1 and tx2 are GENCODE or ENSEMBL transcript IDs +and return an integer vector of tx1 transcript indexes that are in tx2. +} +\examples{ +sig_tx <- select_transcripts("cell_component") +whichTx <- which_tx_names(rownames(rse_tx), sig_tx) +}