-
Notifications
You must be signed in to change notification settings - Fork 235
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add netboxr tool (beta version) - third attempt (#1233)
* Changes made based on feedback from second PR * small changes here and there * fix geneList name * More small changes * Change .txt to txt * Remove r-optparse dependency Co-authored-by: Björn Grüning <[email protected]>
- Loading branch information
Showing
11 changed files
with
2,213 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
description: netboxr enables automated discovery of biological process modules by network analysis | ||
long_description: | | ||
The NetBox algorithm identifies candidate cancer-related functional modules using a data-driven, network-based approach that combines prior knowledge with a network clustering algorithm. The network modules are derived de novo, and can be used to identify new functional gene groups that cross the boundaries of curated gene sets. | ||
name: netboxr | ||
owner: bgruening | ||
categories: | ||
- Systems Biology |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
# Set up R error handling to go to stderr | ||
options(show.error.messages = FALSE, | ||
error = function() { | ||
cat(geterrmessage(), file = stderr()) | ||
q("no", 1, FALSE)}) | ||
# Avoid crashing Galaxy with an UTF8 error on German LC settings | ||
loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | ||
# Import required libraries and data | ||
suppressPackageStartupMessages({ | ||
library(netboxr) | ||
library(igraph) | ||
library(RColorBrewer) | ||
}) | ||
|
||
data(netbox2010) | ||
args <- commandArgs(TRUE) | ||
# Vars | ||
gene_list <- scan(args[2], what = character(), sep = "\n") | ||
cutoff <- args[4] | ||
community <- args[6] | ||
global_model <- args[8] | ||
global_iterations <- args[10] | ||
global_number <- args[12] | ||
local_model <- args[14] | ||
local_iterations <- args[16] | ||
|
||
network_plot <- args[18] | ||
plot_width <- args[20] | ||
output_sif <- args[22] | ||
neighbor_list <- args[24] | ||
modmem <- args[26] | ||
nt <- args[28] | ||
|
||
sink("metadata.txt") | ||
sink(stdout(), type = "message") | ||
# Network analysis as described in netboxr vignette | ||
sif_network <- netbox2010$network | ||
graph_reduced <- networkSimplify(sif_network, directed = FALSE) | ||
threshold <- cutoff | ||
results <- print(geneConnector(geneList = gene_list, networkGraph = graph_reduced, | ||
directed = FALSE, pValueAdj = "BH", pValueCutoff = threshold, | ||
communityMethod = community, keepIsolatedNodes = FALSE)) | ||
|
||
# Check the p-value of the selected linker | ||
linker_df <- results$neighborData | ||
linker_df[linker_df$pValueFDR < threshold, ] | ||
graph_layout <- layout_with_fr(results$netboxGraph) | ||
|
||
# Global Network Null Model | ||
if (global_model) { | ||
global_test <- globalNullModel(netboxGraph = results$netboxGraph, networkGraph = graph_reduced, | ||
iterations = global_iterations, numOfGenes = global_number) | ||
global_test | ||
} | ||
|
||
# Local Network Null Model | ||
if (local_model) { | ||
local_test <- localNullModel(netboxGraph = results$netboxGraph, iterations = local_iterations) | ||
local_test | ||
} | ||
|
||
## Output | ||
# Plot the edge annotated graph | ||
if (network_plot) { | ||
|
||
edges <- results$netboxOutput | ||
interaction_type <- unique(edges[, 2]) | ||
interaction_type_color <- brewer.pal(length(interaction_type), name = "Spectral") | ||
edge_colors <- data.frame(interaction_type, interaction_type_color, stringsAsFactors = FALSE) | ||
colnames(edge_colors) <- c("INTERACTION_TYPE", "COLOR") | ||
netbox_graph_annotated <- annotateGraph(netboxResults = results, edgeColors = | ||
edge_colors, directed = FALSE, linker = TRUE) | ||
pdf("network_plot.pdf", width = plot_width) | ||
plot(results$netboxCommunity, netbox_graph_annotated, layout = graph_layout, | ||
vertex.size = 10, vertex.shape = V(netbox_graph_annotated)$shape, edge.color | ||
= E(netbox_graph_annotated)$interactionColor, edge.width = 3) | ||
|
||
# Add interaction type annotations | ||
legend(x = -1.8, y = -1, legend = interaction_type, col = | ||
interaction_type_color, lty = 1, lwd = 2, bty = "n", cex = 1) | ||
dev.off() | ||
} | ||
|
||
# Local Network Null Model | ||
if (local_model) { | ||
pdf("localModel_histogram.pdf") | ||
h <- hist(local_test$randomModularityScore, breaks = 35, plot = FALSE) | ||
h$density <- h$counts / sum(h$counts) | ||
plot(h, freq = FALSE, ylim = c(0, 0.1), xlim = c(0.1, 0.6), col = "lightblue") | ||
abline(v = local_test$modularityScoreObs, col = "red") | ||
dev.off() | ||
} | ||
|
||
# NetBox algorithm output in SIF format. | ||
if (output_sif) { | ||
write.table(results$netboxOutput, file = "network.sif", sep = "\t", quote = FALSE, | ||
col.names = FALSE, row.names = FALSE) | ||
} | ||
|
||
# Save neighbor data | ||
if (neighbor_list) { | ||
write.table(results$neighborData, file = "neighbor_data.txt", sep = "\t", | ||
quote = FALSE, col.names = TRUE, row.names = FALSE) | ||
} | ||
|
||
#Save identified pathway module numbers | ||
if (modmem) { | ||
write.table(results$moduleMembership, file = "community.membership.txt", sep = "\t", | ||
quote = FALSE, col.names = FALSE, row.names = FALSE) | ||
} | ||
|
||
# Save file that indicates whether the node is a 'linker' or 'candidate' | ||
if (nt) { | ||
write.table(results$nodeType, file = "nodeType.txt", sep = "\t", quote = FALSE, col.names = FALSE, | ||
row.names = FALSE) | ||
} | ||
sink(NULL) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,151 @@ | ||
<tool id="netboxr" name="netboxr" version="1.6.0"> | ||
<description>enables automated discovery of biological process modules by network analysis.</description> | ||
<requirements> | ||
<requirement type="package" version="1.6.0">bioconductor-netboxr</requirement> | ||
</requirements> | ||
<command detect_errors="exit_code"><![CDATA[ | ||
Rscript '$__tool_directory__/netboxr_r.R' | ||
--geneList '$geneList' | ||
--cutoff '$cutoff' | ||
--community $community | ||
--globalModel $globalModel | ||
--globalIterations '$globalIterations' | ||
--globalNumber '$globalNumber' | ||
--localModel $localModel | ||
--localIterations '$localIterations' | ||
--networkPlot $networkPlot | ||
--plotWidth '$plotWidth' | ||
--outputSIF $outputSIF | ||
--neighborList $neighborList | ||
--modmem $modmem | ||
--nt $nt | ||
]]></command> | ||
<inputs> | ||
<param argument="--geneList" type="data" format="txt" label="Gene list" help="A newline-delimited text file that contains a list of genes of interest" /> | ||
<param argument="--cutoff" type="float" value="0.05" label="P-Value Cut-Off" help="The corrected p-value (or q-value) cut-off to apply for the graph output. Default: 0.05" /> | ||
<param argument="--community" type="select" label="Community detection method" help="Community detection methods include edge betweenness score (EBC), leading eigenvector method (LEC), Louvain method (LOUV), or Leiden method (LEID). Default: EBC"> | ||
<option value="ebc">EBC</option> | ||
<option value="lec">LEC</option> | ||
<option value="louvain">LOUV</option> | ||
<option value="leiden">LEID</option> | ||
</param> | ||
<param argument="--globalModel" type="boolean" truevalue="True" falsevalue="False" checked="True" label="Global network null model" help="Perform global network null model analysis. The global network null model calculates the empirical p-value as the number of times (over a set of iterations) the size of the largest connected component (the giant component) in the network coming from the same number of randomly selected genes equals or exceeds the size of the largest connected component in the observed network. Default: Yes" /> | ||
<param argument="--globalIterations" type="integer" value="10" label="Global network null model iterations" help="Global network null model iterations. Recommended: 1000" /> | ||
<param argument="--globalNumber" type="integer" value="274" label="Global network model number of genes" help="Global network null model number of genes." /> | ||
<param argument="--localModel" type="boolean" truevalue="True" falsevalue="False" checked="True" label="Local network null model" help="Perform local network null model analysis. The local network null model evaluates the deviation of modularity in the observed network from modularity distribution in the random network. Default: Yes" /> | ||
<param argument="--localIterations" type="integer" value="10" label="Local network null model iterations" help="Local network null model iterations. Recommended: 1000" /> | ||
<param argument="--networkPlot" type="boolean" truevalue="True" falsevalue="False" checked="True" label="Network plot" help="Output the annotated graph of netboxr output. Default: Yes" /> | ||
<param argument="--plotWidth" type="integer" value="8" label="Network plot width" help="Plot width. Recommended: 9" /> | ||
<param argument="--outputSIF" type="boolean" truevalue="True" falsevalue="False" checked="True" label="Network in SIF format" help="Output the network in SIF format. Default: Yes" /> | ||
<param argument="--neighborList" type="boolean" truevalue="True" falsevalue="False" checked="True" label="Neighbor list" help="Output a table containing the information of all neighbor nodes. Default: Yes" /> | ||
<param argument="--modmem" type="boolean" truevalue="True" falsevalue="False" checked="True" label="Module membership" help="Output a table containing the identified pathway module numbers. Default: Yes" /> | ||
<param argument="--nt" type="boolean" truevalue="True" falsevalue="False" checked="True" label="Node type" help="Output a table containing the information of node types. Default: Yes" /> | ||
</inputs> | ||
<outputs> | ||
<data name="output_pdf" format="pdf" from_work_dir="network_plot.pdf" label="${tool.name} on ${on_string}: Network plot"> | ||
<filter>output_pdf is True</filter> | ||
</data> | ||
<data name="localModel_pdf" format="pdf" from_work_dir="localModel_histogram.pdf" label="${tool.name} on ${on_string}: Local model histogram"> | ||
<filter>localModel is True</filter> | ||
</data> | ||
<data name="output_sif" format="sif" from_work_dir="network.sif" label="${tool.name} on ${on_string}: SIF network"> | ||
<filter>outputSIF is True</filter> | ||
</data> | ||
<data name="output_neighbor" format="tabular" from_work_dir="neighbor_data.txt" label="${tool.name} on ${on_string}: Neighbor data"> | ||
<filter>output_neighbor is True</filter> | ||
</data> | ||
<data name="moduleMembership" format="tabular" from_work_dir="community.membership.txt" label="${tool.name} on ${on_string}: Module membership"> | ||
<filter>moduleMembership is True</filter> | ||
</data> | ||
<data name="nodeType" format="tabular" from_work_dir="nodeType.txt" label="${tool.name} on ${on_string}: Node type"> | ||
<filter>nodeType is True</filter> | ||
</data> | ||
<data name="metadata" format="txt" from_work_dir="metadata.txt" label="${tool.name} on ${on_string}: Metadata" /> | ||
</outputs> | ||
<tests> | ||
<test> | ||
<param name="geneList" value="netbox2010_data.txt" /> | ||
<param name="cutoff" value="0.05" /> | ||
<param name="community" value="ebc" /> | ||
<param name="globalModel" value="True" /> | ||
<param name="globalIterations" value="10" /> | ||
<param name="globalNumber" value="274" /> | ||
<param name="localModel" value="True" /> | ||
<param name="localIterations" value="10" /> | ||
<param name="networkPlot" value="True" /> | ||
<param name="plotWidth" value="8" /> | ||
<param name="outputSIF" value="True" /> | ||
<param name="neighborList" value="True" /> | ||
<param name="modmem" value="True" /> | ||
<param name="nt" value="True" /> | ||
<output name="output_pdf" file="network_plot.pdf" compare="sim_size" /> | ||
<output name="localModel_pdf" file="localModel_histogram.pdf" compare="sim_size" /> | ||
<output name="output_sif" file="network.sif"> | ||
<assert_contents> | ||
<has_text_matching expression="CBL\tPP\tPIK3R1" /> | ||
<has_text_matching expression="PIK3R1\tIN_SAME_COMPONENT\tAGAP2" /> | ||
</assert_contents> | ||
</output> | ||
<output name="output_neighbor" file="neighbor_data.txt"> | ||
<assert_contents> | ||
<has_text_matching expression="idx\tname\tlocalDegree\tglobalDegree\tpValueRaw\toddsRatio\tpValueFDR" /> | ||
<has_text_matching expression="1712\tCRK\t11\t81\t2.39208791593171e-05\t1.70873218314362\t0.0186673141177578" /> | ||
<has_text_matching expression="6187\tRPS27\t2\t186\t0.975923212389053\t-0.825808759394024\t0.975923212389053" /> | ||
</assert_contents> | ||
</output> | ||
<output name="moduleMembership" file="community.membership.txt"> | ||
<assert_contents> | ||
<has_text_matching expression="CBL\t1" /> | ||
<has_text_matching expression="CCT6A\t10" /> | ||
</assert_contents> | ||
</output> | ||
<output name="nodeType" file="nodeType.txt"> | ||
<assert_contents> | ||
<has_text_matching expression="CDKN2B\tcandidate" /> | ||
<has_text_matching expression="PTPN11\tlinker" /> | ||
</assert_contents> | ||
</output> | ||
</test> | ||
</tests> | ||
<help><![CDATA[ | ||
NetBox is a network-based approach that combines prior knowledge with a | ||
network clustering algorithm. The algorithm allows for the identification | ||
of functional modules and allows for combining multiple data types, such as | ||
mutations and copy number alterations. NetBox performs network analysis on | ||
human interaction networks, and comes pre-loaded with a Human Interaction | ||
Network (HIN) derived from four literature curated data sources, including | ||
the Human Protein Reference Database (HPRD), Reactome, NCI-Nature Pathway | ||
Interaction (PID) Database, and the MSKCC Cancer Cell Map. | ||
----- | ||
**INPUTS** | ||
**Gene list** | ||
A newline-delimited text file that contains a list of genes of interest is | ||
required. | ||
Example of text file contents: | ||
EGFR | ||
TP53 | ||
ACTB | ||
GAPDH | ||
**Cutoff value** | ||
The corrected p-value (or q-value) cut-off to apply for the graph output. | ||
The default p-value is 0.05. | ||
**OUTPUTS** | ||
* A PDF with a plot of the edge annotated graph (optional) | ||
* A text file with tabular data containing information of all neighbor nodes (optional) | ||
]]></help> | ||
<citations> | ||
<citation type="doi">10.1371/journal.pone.0234669</citation> | ||
</citations> | ||
</tool> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
CBL 1 | ||
ADAM12 1 | ||
PIK3R1 1 | ||
PTPN11 1 | ||
KIT 1 | ||
MPDZ 1 | ||
KDR 1 | ||
ERBB2 1 | ||
SPRY2 1 | ||
FRS2 1 | ||
TEK 1 | ||
GAB1 1 | ||
PDGFRA 1 | ||
IFNG 1 | ||
VLDLR 1 | ||
DOCK1 1 | ||
EGFR 1 | ||
JAK2 1 | ||
CRK 1 | ||
PIK3C2B 1 | ||
PTPRB 1 | ||
PTEN 1 | ||
SH3GL2 1 | ||
FGF23 1 | ||
MAPK11 1 | ||
PIK3CA 1 | ||
HLA-DRA 1 | ||
AVIL 1 | ||
AGAP2 1 | ||
TBP 2 | ||
RB1 2 | ||
TAF1 2 | ||
CDK6 2 | ||
CDKN2A 2 | ||
TP53 2 | ||
HSPA1A 2 | ||
RBBP5 2 | ||
SNAPC3 2 | ||
MDM4 2 | ||
MDM2 2 | ||
KLF6 2 | ||
CDKN2C 2 | ||
CDK4 2 | ||
PIM1 2 | ||
BNC2 2 | ||
EPHA3 2 | ||
CCND2 2 | ||
CDKN2B 2 | ||
IFNW1 3 | ||
IFNA2 3 | ||
IFNB1 3 | ||
IFNAR1 3 | ||
IFNA1 3 | ||
KCNA5 4 | ||
PTPRD 4 | ||
PTPRE 4 | ||
CNTN2 5 | ||
NCAM1 5 | ||
NUP50 6 | ||
THOC4 6 | ||
NUP107 6 | ||
SNRPE 6 | ||
DCTN2 7 | ||
TUBGCP2 7 | ||
FGFR1OP 7 | ||
TUBGCP6 7 | ||
A2M 8 | ||
LYZ 8 | ||
PPARA 9 | ||
STAC3 9 | ||
CCT2 10 | ||
CCT6A 10 |
Binary file not shown.
Oops, something went wrong.