Skip to content

Commit

Permalink
Merge pull request #2 from paulzierep/update-bar-phyloseq
Browse files Browse the repository at this point in the history
Update bar phyloseq
  • Loading branch information
MaraBesemer authored Jan 10, 2025
2 parents 8879646 + 5cabf8f commit 3749464
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 65 deletions.
135 changes: 76 additions & 59 deletions tools/phyloseq/phyloseq_plot_bar.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,38 +7,42 @@ suppressPackageStartupMessages(library("ggplot2"))

# Define options
option_list <- list(
make_option(c("--input"),
action = "store", dest = "input",
help = "Input file containing a phyloseq object"
),
make_option(c("--x"),
action = "store", dest = "x",
help = "Variable for x-axis (e.g., 'Sample', 'Phylum')"
),
make_option(c("--fill"),
action = "store", dest = "fill", default = NULL,
help = "Variable for fill color (e.g., 'Genus', 'Order') (optional)"
),
make_option(c("--facet"),
action = "store", dest = "facet", default = NULL,
help = "Facet by variable (optional)"
),
make_option(c("--output"),
action = "store", dest = "output",
help = "Output file (PDF)"
),
make_option(c("--topX"),
action = "store", dest = "topX", default = NULL,
help = "Show only the top X taxa based on abundance (e.g., '10') (optional)"
),
make_option(c("--keepOthers"),
action = "store_true", dest = "keepOthers", default = FALSE,
help = "Keep taxa not in topX and label them as 'Others' (optional)"
),
make_option(c("--normalize"),
action = "store_true", dest = "normalize", default = FALSE,
help = "Normalize abundances to sum to 100% (optional)"
)
make_option(c("--input"),
action = "store", dest = "input",
help = "Input file containing a phyloseq object"
),
make_option(c("--x"),
action = "store", dest = "x",
help = "Variable for x-axis (e.g., 'Sample', 'Phylum')"
),
make_option(c("--fill"),
action = "store", dest = "fill", default = NULL,
help = "Variable for fill color (e.g., 'Genus', 'Order') (optional)"
),
make_option(c("--facet"),
action = "store", dest = "facet", default = NULL,
help = "Facet by variable (optional)"
),
make_option(c("--output"),
action = "store", dest = "output",
help = "Output file (PDF)"
),
make_option(c("--topX"),
action = "store", dest = "topX", default = NULL,
help = "Show only the top X taxa based on abundance (e.g., '10') (optional)"
),
make_option(c("--keepOthers"),
action = "store_true", dest = "keepOthers", default = FALSE,
help = "Keep taxa not in topX and label them as 'Others' (optional)"
),
make_option(c("--keepNonAssigned"),
action = "store_true", dest = "keepNonAssigned", default = FALSE,
help = "Keep taxa labeled as 'Not Assigned' (optional)"
),
make_option(c("--normalize"),
action = "store_true", dest = "normalize", default = FALSE,
help = "Normalize abundances to sum to 100% (optional)"
)
)

# Parse arguments
Expand All @@ -48,19 +52,26 @@ opt <- args$options

# Validate required options
if (is.null(opt$input) || opt$input == "") {
stop("Error: Input file is required.")
stop("Error: Input file is required.")
}
if (is.null(opt$output) || opt$output == "") {
stop("Error: Output file is required.")
stop("Error: Output file is required.")
}

# Load phyloseq object
print(paste("Trying to read:", opt$input))
physeq <- readRDS(opt$input)

print(rank_names(physeq))
print(opt$fill)
# Normalize to relative abundances if requested
if (opt$normalize) {
print("Normalizing abundances to sum to 100%...")
physeq <- transform_sample_counts(physeq, function(x) 100 * x / sum(x))
}

if (opt$keepNonAssigned) {
# Add synthetic "Not Assigned" for missing/NA taxa
tax_table(physeq) <- apply(tax_table(physeq), c(1, 2), function(x) ifelse(is.na(x) | x == "", "Not Assigned", x))
}
# Check if the 'x' and 'fill' variables are valid
sample_vars <- colnames(sample_data(physeq))

Expand All @@ -72,50 +83,56 @@ if (!is.null(opt$topX) && opt$topX != "") {
}

# Aggregate the data at the selected rank (e.g., Phylum)
tax_rank <- opt$fill # Adjust as necessary
tax_rank <- opt$fill # Adjust as necessary
physeq_agg <- tax_glom(physeq, taxrank = tax_rank)

# Get the abundance of each taxon at the selected rank
taxa_abundance <- taxa_sums(physeq_agg)

# Summarize the abundance at each taxonomic rank (grouping by taxonomic name)
tax_table_agg <- tax_table(physeq_agg)
taxa_abundance_by_rank <- tapply(taxa_abundance, tax_table_agg[, tax_rank], sum)

# Identify the top X taxa
top_taxa <- names(sort(taxa_abundance, decreasing = TRUE))[1:topX]
# Identify the top X taxa by summed abundance
top_taxa <- names(sort(taxa_abundance_by_rank, decreasing = TRUE))[1:topX]

print("Only plotting taxa in TopX taxa group:")
print(top_taxa)

# Get the OTUs corresponding to the top taxa
otus_in_top_taxa <- rownames(tax_table_agg)[tax_table_agg[, tax_rank] %in% top_taxa]

if (opt$keepOthers) {
# Label taxa not in top_taxa as "Others"
tax_table(physeq_agg)[, tax_rank][!rownames(tax_table(physeq_agg)) %in% top_taxa] <- "Others"
tax_table(physeq_agg)[, tax_rank][!rownames(tax_table(physeq_agg)) %in% otus_in_top_taxa] <- "Others"
physeq <- physeq_agg
} else {
# Subset the phyloseq object to keep only the top X taxa
physeq_filtered <- prune_taxa(top_taxa, physeq_agg)
physeq_filtered <- prune_taxa(otus_in_top_taxa, physeq_agg)
physeq <- physeq_filtered
}
}

# Normalize to relative abundances if requested
if (opt$normalize) {
print("Normalizing abundances to sum to 100%...")
physeq <- transform_sample_counts(physeq, function(x) 100 * x / sum(x))
}

# Generate bar plot
if (!is.null(opt$x) && opt$x != "") {
p <- plot_bar(physeq, x = opt$x, fill = opt$fill)
p <- plot_bar(physeq, x = opt$x, fill = opt$fill)
} else {
p <- plot_bar(physeq, fill = opt$fill) # If no x is provided, don't include x
p <- plot_bar(physeq, fill = opt$fill) # If no x is provided, don't include x
}

# Only facet if the facet variable is provided and exists in the sample data
if (!is.null(opt$facet) && opt$facet != "") {
if (opt$facet %in% sample_vars) {
p <- p + facet_wrap(as.formula(paste("~", opt$facet)))
} else {
warning(paste("Facet variable", opt$facet, "does not exist in the sample data. Faceting will be skipped."))
}
if (opt$facet %in% sample_vars) {
p <- p + facet_wrap(as.formula(paste("~", opt$facet)))
} else {
warning(paste("Facet variable", opt$facet, "does not exist in the sample data. Faceting will be skipped."))
}
}

# Save to output file using PDF device
print(paste("Saving plot to:", opt$output))
pdf(file = opt$output, width = 10, height = 8)
print(p)
dev.off()
# Save to output file
ggsave(
opt$output,
width = 10,
height = 8,
device="png"
)
14 changes: 8 additions & 6 deletions tools/phyloseq/phyloseq_plot_bar.xml
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,18 @@ Rscript '${__tool_directory__}/phyloseq_plot_bar.R'
--output '$output'
--topX '${topX}'
--keepOthers '${keepOthers}'
--keepNonAssigned '${keepNonAssigned}'
--normalize '${normalize}'
]]></command>
<inputs>
<expand macro="phyloseq_input"/>
<param name="x" type="text" label="X-axis variable" help="Variable for the x-axis (e.g., Sample, Phylum)" />
<param name="fill" type="text" label="Fill variable" help="Variable to color the bars (e.g., Genus, Order)" />
<param name="facet" type="text" optional="true" label="Facet by variable" help="Optional: Variable to facet the chart by (e.g., SampleType)" />
<param name="topX" type="integer" optional="true" label="Top X" help="Optional: Only show the top X values" />
<param name="keepOthers" type="boolean" optional="true" label="Keep 'Others'" help="Optional: Keep an 'Others' category for values not in the top X" />
<param name="normalize" type="boolean" optional="true" label="Normalize" help="Optional: Normalize abundances to sum to 100%" />
<param name="x" type="text" optional="true" label="X-axis variable" help="Variable for the x-axis (e.g., Sample, Phylum). If not specified the Samples are taken." />
<param name="fill" type="text" label="Fill variable" help="Variable to color the bars (e.g., Genus, Order)." />
<param name="facet" type="text" optional="true" label="Facet by variable" help="Variable to facet the chart by (e.g., SampleType)." />
<param name="topX" value="10" type="integer" optional="true" label="Top X" help="Only show the ranks with the top X abundance." />
<param name="keepOthers" type="boolean" label="Keep 'Others'" help="Keep OTUs which are not in top X as 'Others'." />
<param name="keepNonAssigned" type="boolean" label="Keep Non Assigned" help="Keep OTUs that are not assigned at this rank and lable as 'Not Assigned'." />
<param name="normalize" type="boolean" label="Normalize" help="Normalize abundances to sum to 100%. Normalization is performed before Top X selection." />
</inputs>
<outputs>
<data name="output" format="pdf" label="Bar Chart (PDF)" />
Expand Down

0 comments on commit 3749464

Please sign in to comment.