Skip to content

Commit

Permalink
+pbDD
Browse files Browse the repository at this point in the history
  • Loading branch information
CATALYST-project committed Sep 30, 2024
1 parent b9fcf7f commit d7725a2
Show file tree
Hide file tree
Showing 13 changed files with 525 additions and 63 deletions.
8 changes: 6 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ Authors@R: c(
person("Pierre-Luc", "Germain", role="aut"),
person("Charlotte", "Soneson", role="aut"),
person("Anthony", "Sonrel", role="aut"),
person("Jeroen", "Gilis", role="aut"),
person("Davide", "Risso", role="aut"),
person("Lieven", "Clement", role="aut"),
person("Mark D.", "Robinson", role=c("aut", "fnd"),
email="[email protected]"))
Imports:
Expand All @@ -33,22 +36,23 @@ Imports:
Suggests:
BiocStyle,
countsimQC,
cowplot,
ExperimentHub,
iCOBRA,
knitr,
patchwork,
phylogram,
RColorBrewer,
reshape2,
rmarkdown,
statmod,
stageR,
testthat,
UpSetR
biocViews: ImmunoOncology, DifferentialExpression, Sequencing,
SingleCell, Software, StatisticalMethod, Visualization
License: GPL-3
VignetteBuilder: knitr
RoxygenNote: 7.2.3
RoxygenNote: 7.3.2
Encoding: UTF-8
URL: https://github.com/HelenaLC/muscat
BugReports: https://github.com/HelenaLC/muscat/issues
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
export(aggregateData)
export(calcExprFreqs)
export(mmDS)
export(pbDD)
export(pbDS)
export(pbFlatten)
export(pbHeatmap)
Expand All @@ -11,6 +12,7 @@ export(prepSCE)
export(prepSim)
export(resDS)
export(simData)
export(stagewise_DS_DD)
import(ggplot2)
importFrom(BiocParallel,MulticoreParam)
importFrom(BiocParallel,SerialParam)
Expand Down Expand Up @@ -101,6 +103,7 @@ importFrom(lmerTest,contest)
importFrom(lmerTest,lmer)
importFrom(matrixStats,colAnys)
importFrom(matrixStats,rowAnyNAs)
importFrom(matrixStats,rowMedians)
importFrom(matrixStats,rowMins)
importFrom(matrixStats,rowQuantiles)
importFrom(matrixStats,rowSds)
Expand Down
65 changes: 39 additions & 26 deletions R/pbDS.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
#' Can be a list for multiple, independent comparisons.
#' @param min_cells a numeric. Specifies the minimum number of cells in a given
#' cluster-sample required to consider the sample for differential testing.
#' @param filter characterstring specifying whether
#' @param filter character string specifying whether
#' to filter on genes, samples, both or neither.
#' @param treat logical specifying whether empirical Bayes moderated-t
#' p-values should be computed relative to a minimum fold-change threshold.
Expand Down Expand Up @@ -82,16 +82,16 @@
#' @export

pbDS <- function(pb,
method = c("edgeR", "DESeq2", "limma-trend", "limma-voom"),
design = NULL, coef = NULL, contrast = NULL, min_cells = 10,
filter = c("both", "genes", "samples", "none"), treat = FALSE,
verbose = TRUE, BPPARAM = SerialParam(progressbar = verbose)) {
method=c("edgeR", "DESeq2", "limma-trend", "limma-voom", "DD"),
design=NULL, coef=NULL, contrast=NULL, min_cells=10,
filter=c("both", "genes", "samples", "none"), treat=FALSE,
verbose=TRUE, BPPARAM=SerialParam(progressbar=verbose)) {

# check validity of input arguments
args <- as.list(environment())
method <- match.arg(method)
filter <- match.arg(filter)
.check_pbs(pb, check_by = TRUE)
.check_pbs(pb, check_by=TRUE)
.check_args_pbDS(args)
stopifnot(is(BPPARAM, "BiocParallelParam"))

Expand All @@ -104,7 +104,7 @@ pbDS <- function(pb,
}
if (is.null(coef) & is.null(contrast)) {
c <- colnames(design)[ncol(design)]
contrast <- makeContrasts(contrasts = c, levels = design)
contrast <- makeContrasts(contrasts=c, levels=design)
args$contrast <- contrast
}

Expand All @@ -117,18 +117,19 @@ pbDS <- function(pb,
if (!is.list(coef))
coef <- list(coef)
cs <- vapply(coef, function(i)
paste(colnames(design)[i], collapse = "-"),
paste(colnames(design)[i], collapse="-"),
character(1))
names(cs) <- names(coef) <- cs
}
ct <- ifelse(is.null(coef), "contrast", "coef")

if (!is.function(method)) {
fun <- switch(method,
"DESeq2" = .DESeq2,
"edgeR" = .edgeR,
"limma-trend" = .limma_trend,
"limma-voom" = .limma_voom)
"DD"=.edgeR_NB,
"edgeR"=.edgeR,
"DESeq2"=.DESeq2,
"limma-voom"=.limma_voom,
"limma-trend"=.limma_trend)
} else {
fun_call <- 1
}
Expand All @@ -139,28 +140,30 @@ pbDS <- function(pb,
n_cells <- .n_cells(pb)
names(kids) <- kids <- assayNames(pb)
res <- bplapply(
BPPARAM = BPPARAM,
BPPARAM=BPPARAM,
kids, function (k) {
rmv <- n_cells[k, ] < min_cells
d <- design[colnames(y <- pb[ , !rmv]), , drop = FALSE]
d <- design[colnames(y <- pb[ , !rmv]), , drop=FALSE]
if (filter %in% c("samples", "both")) {
ls <- colSums(assay(y, k))
ol <- isOutlier(ls, log = TRUE, type = "lower", nmads = 3)
d <- d[colnames(y <- y[, !ol]), , drop = FALSE]
ol <- isOutlier(ls, log=TRUE, type="lower", nmads=3)
d <- d[colnames(y <- y[, !ol]), , drop=FALSE]
}
if (any(tabulate(y$group_id) < 2)
|| qr(d)$rank == nrow(d)
|| qr(d)$rank== nrow(d)
|| qr(d)$rank < ncol(d))
return(NULL)
y <- y[rowSums(assay(y, k)) != 0, , drop = FALSE]
y <- y[rowSums(assay(y, k)) != 0, , drop=FALSE]
if (filter %in% c("genes", "both") & max(assay(y, k)) > 100)
y <- y[filterByExpr(assay(y, k), d), , drop = FALSE]
y <- y[filterByExpr(assay(y, k), d), , drop=FALSE]
# drop samples without any detected features
keep <- colAnys(assay(y, k) > 0)
y <- y[, keep, drop = FALSE]
d <- d[keep, , drop = FALSE]
args <- list(x = y, k = k, design = d, coef = coef,
contrast = contrast, ct = ct, cs = cs, treat = treat)
y <- y[, keep, drop=FALSE]
d <- d[keep, , drop=FALSE]
args <- list(
x=y, k=k, design=d, coef=coef,
contrast=contrast, ct=ct, cs=cs,
treat=treat, nc=n_cells[k, !rmv])
args <- args[intersect(names(args), fun_args)]
suppressWarnings(do.call(fun, args))
})
Expand All @@ -169,14 +172,24 @@ pbDS <- function(pb,
rmv <- vapply(res, is.null, logical(1))
res <- res[!rmv]

if (length(res) == 0) stop(
if (length(res)== 0) stop(
"Specified filtering options result in no genes in any clusters ",
"being tested. To force testing, consider modifying arguments ",
"'min_cells' and/or 'filter'. See '?pbDS' for details.")

# reorganize & do global p-value adjustment
names(i) <- i <- c("table", "data", "fit")
res <- lapply(i, map, .x = res)
res <- lapply(i, map, .x=res)
res$table <- .p_adj_global(res$table)
return(c(res, list(args = args)))
return(c(res, list(args=args)))
}

#' @rdname pbDS
#' @export
pbDD <- function(pb, design=NULL, coef=NULL, contrast=NULL,
min_cells=10, filter=c("both", "genes", "samples", "none"),
verbose=TRUE, BPPARAM=SerialParam(progressbar=verbose))
{
args <- as.list(environment())
do.call(pbDS, c(args, list(method="DD")))
}
121 changes: 121 additions & 0 deletions R/stagewiseDD.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
#' @importFrom dplyr bind_rows
.res_DX <- function(res_DS, res_DD) {
# for each contrast...
names(cts) <- cts <- names(res_DS$table)
lapply(cts, function(ct) {
# for each cluster...
names(kids) <- kids <- names(res_DS$table[[ct]])
lapply(kids, function(kid) {
# get DS/DD results
DS <- res_DS$table[[ct]][[kid]]
DD <- res_DD$table[[ct]][[kid]]
# add missing genes
DS <- bind_rows(DS, data.frame(gene=setdiff(DD$gene, DS$gene)))
DD <- bind_rows(DD, data.frame(gene=setdiff(DS$gene, DD$gene)))
# reorder & return both
DD <- DD[match(DS$gene, DD$gene), ]
return(list(DS=DS, DD=DD))
})
})
}

#' @rdname stagewise_DS_DD
#' @title Perform two-stage testing on DS and DD
#'
#' @param res_DS a list of DS testing results as returned
#' by \code{\link{pbDS}} or \code{\link{mmDS}}.
#' @param res_DD a list of DD testing results as returned
#' by \code{\link{pbDD}} (or \code{\link{pbDS}} with \code{method="DD"}).
#' @param sce (optional) \code{SingleCellExperiment} object containing the data
#' that underlies testing, prior to summarization with \code{\link{aggregateData}}.
#' Used for validation of inputs in order to prevent unexpected failure/results.
#'
#' @return
#' A list of \code{DFrame}s containing results for each contrast and cluster.
#' Each table contains DS and DD results for genes shared between analyses,
#' as well as results from stagewise testing analysis, namely:
#' \itemize{
#' \item{\code{p_adj}: FDR adjusted p-values for the
#' screening hypothesis that a gene is neither DS nor DD
#' (see \code{?stageR::getAdjustedPValues} for details)}
#' \item{\code{p_val.DS/D}: confirmation stage p-values for DS/D}}
#'
#' @examples
#' data(example_sce)
#'
#' pbs_sum <- aggregateData(example_sce, assay="counts", fun="sum")
#' pbs_det <- aggregateData(example_sce, assay="counts", fun="num.detected")
#'
#' res_DS <- pbDS(pbs_sum, min_cells=0, filter="none", verbose=FALSE)
#' res_DD <- pbDD(pbs_det, min_cells=0, filter="none", verbose=FALSE)
#'
#' res <- stagewise_DS_DD(res_DS, res_DD)
#' head(res[[1]][[1]]) # results for 1st cluster
#'
#' @importFrom S4Vectors DataFrame
#' @importFrom purrr map_depth
#' @export

stagewise_DS_DD <- function(res_DS, res_DD, sce=NULL, verbose=FALSE) {
if (!requireNamespace("stageR", quietly=TRUE))
stop("Install 'stageR' to use this function.")

# validity checks
# TODO: helper to check validity of 'res_DS/D'
# against each other and, optionally, 'sce'
stopifnot(
# same coefs/constrasts
names(x <- res_DS$table) ==
names(y <- res_DD$table),
# any shared clusters
sum(mapply(\(i, j)
length(intersect(i, j)),
i=lapply(x, names),
j=lapply(y, names))) > 0)
if (!is.null(sce)) {
.check_sce(sce)
. <- map_depth(list(x, y), 3, \(df) df$gene %in% rownames(sce))
stopifnot("gene(s) present in 'res_DS/D' not found in 'sce'"=unlist(.))
. <- map_depth(list(x, y), 3, \(df) df$cluster_id %in% sce$cluster_id)
stopifnot("cluster(s) present in 'res_DS/D' not found in 'sce'"=unlist(.))
}

# assure that results contain same set of genes, in the same order
# (indepedent of different filtering criteria for the two analyses)
res_DX <- .res_DX(res_DS=res_DS, res_DD=res_DD)

# perform harmonic mean p-value aggregation according to
# (https://www.pnas.org/doi/full/10.1073/pnas.1814092116)
.mu <- \(x) 1/mean(1/x, na.rm=TRUE)

# perform stagewise testing
res <- map_depth(res_DX, 2, \(x) {
ps <- data.frame(
p_val.DS=x$DS$p_val,
p_val.DD=x$DD$p_val,
row.names=x$DS$gene)
qs <- apply(ps, 1, .mu); names(qs) <- x$DS$gene
obj <- stageR::stageR(qs, as.matrix(ps), FALSE)
eva <- expression({
obj <- stageR::stageWiseAdjustment(obj,
method="none", alpha=0.05, allowNA=TRUE)
res <- stageR::getAdjustedPValues(obj,
onlySignificantGenes=FALSE, order=FALSE)
})
res <- if (verbose) eval(eva) else suppressMessages({eval(eva)})
# TODO: communicate this better with the user?
if (is.null(res)) res <- NA
colnames(res)[1] <- "p_adj"
return(res)
})
names(cs) <- cs <- names(res)
lapply(cs, \(c) {
names(ks) <- ks <- names(res[[c]])
lapply(ks, \(k) {
gs <- rownames(df <- res[[c]][[k]])
res_DS <- I((. <- res_DS$table[[c]][[k]])[match(gs, .$gene), ])
res_DD <- I((. <- res_DD$table[[c]][[k]])[match(gs, .$gene), ])
DataFrame(gene=gs, df, cluster_id=k, contrast=c, res_DS, res_DD)
})
})
}
34 changes: 34 additions & 0 deletions R/utils-pbDS.R
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,40 @@
list(table = tbl, data = y, fit = fit)
}

#' @importFrom matrixStats rowMedians
.edgeR_NB <- \(x, k, design, coef, contrast, ct, cs, nc) {
y <- assay(x, k)
# Gene_level filtering to remove genes detected in
# almost all cells of almost all pseudobulk samples
med_detection <- rowMedians(sweep(y, 2, nc, "/"))
gene_filter <- med_detection < 0.9
# Normalization offset to remove systematic differences between pseudobulk
# samples that are due to technical or nuisance biological variability.
# Idea obtained from cellular detection rate (CDR) normalization from MAST.
# Note that this normalization is used instead of 'edgeR::calcNormFactors()'.
of <- colMeans(sweep(y[gene_filter, ], 2, nc, "/"))
# construct 'DGEList'
y <- suppressMessages(DGEList(
counts = y[gene_filter, ],
group = x$group_id[colnames(y)],
remove.zeros = TRUE))
# add offsets to 'DGEList'
y$offset <- log(nc * of)
# run an 'edgeR' analysis
y <- estimateDisp(y, design)
fit <- glmQLFit(y, design, robust = TRUE)
tbl <- lapply(cs, function(c) {
fit <- glmQLFTest(fit,
coef[[c]],
contrast[, c],
poisson.bound = FALSE)
tbl <- topTags(fit, n = Inf, sort.by = "none")
tbl <- rename(tbl$table, p_val = "PValue", p_adj.loc = "FDR")
tbl <- .res_df(tbl, k, ct, c)
})
list(table = tbl, data = y, fit = fit)
}

#' @importFrom dplyr rename
#' @importFrom edgeR calcNormFactors DGEList
#' @importFrom limma contrasts.fit eBayes lmFit topTable topTreat voom treat
Expand Down
Loading

0 comments on commit d7725a2

Please sign in to comment.