Skip to content

Commit

Permalink
✨🐛 Introduce importableOnly filter to fix gsa analysis issues when ma…
Browse files Browse the repository at this point in the history
…tching PUBCHEM entities
  • Loading branch information
EliotRagueneau committed Nov 14, 2023
1 parent 0f71b02 commit cfe1239
Show file tree
Hide file tree
Showing 11 changed files with 355 additions and 143 deletions.
4 changes: 2 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
<parent>
<groupId>org.reactome.maven</groupId>
<artifactId>reactome-parent</artifactId>
<version>1.0.3</version>
<version>1.0.5-SNAPSHOT</version>
</parent>

<groupId>org.reactome.server.tools</groupId>
<artifactId>analysis-core</artifactId>
<version>3.4.7</version>
<version>3.4.8-SNAPSHOT</version>
<packaging>jar</packaging>

<description>The Reactome analysis-core extracts data from the graph-database to create the intermediate data format
Expand Down
3 changes: 2 additions & 1 deletion src/main/java/org/reactome/server/analysis/core/Main.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ public static void main(String[] args) throws JSAPException {
SimpleJSAP jsap = new SimpleJSAP(Main.class.getName(), "Connect to Reactome Graph Database",
new Parameter[]{
new FlaggedOption("host", JSAP.STRING_PARSER, "bolt://localhost:7687", JSAP.NOT_REQUIRED, 'h', "host", "The neo4j host")
, new FlaggedOption("db", JSAP.STRING_PARSER, "graph.db", JSAP.NOT_REQUIRED, 'd', "db", "The neo4j database name")
, new FlaggedOption("user", JSAP.STRING_PARSER, "neo4j", JSAP.NOT_REQUIRED, 'u', "user", "The neo4j user")
, new FlaggedOption("password", JSAP.STRING_PARSER, "neo4jj", JSAP.REQUIRED, 'k', "password", "The neo4j password")
, new FlaggedOption("output", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'o', "output", "The file where the results are written to")
Expand All @@ -46,7 +47,7 @@ public static void main(String[] args) throws JSAPException {
if (jsap.messagePrinted()) System.exit(1);

//Initialising ReactomeCore Neo4j configuration
ReactomeGraphCore.initialise(config.getString("host"), config.getString("user"), config.getString("password"), AnalysisCoreNeo4jConfig.class);
ReactomeGraphCore.initialise(config.getString("host"), config.getString("user"), config.getString("password"), config.getString("db"), AnalysisCoreNeo4jConfig.class);

TEST_MAIN_SPECIES = config.getBoolean("test");
VERBOSE = config.getBoolean("verbose");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import org.reactome.server.analysis.core.model.resource.Resource;
import org.reactome.server.analysis.core.result.model.MappedEntity;
import org.reactome.server.analysis.core.result.model.MappedIdentifier;
import org.reactome.server.analysis.core.result.utils.ExternalAnalysisResultCheck;
import org.reactome.server.analysis.core.util.MapSet;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -39,9 +40,9 @@ public IdentifiersMapping(AnalysisData analysisData) {
this.analysisData = analysisData;
}

public List<MappedEntity> run(Set<String> identifiers, SpeciesNode speciesNode, boolean includeInteractors) {
public List<MappedEntity> run(Set<String> identifiers, SpeciesNode speciesNode, boolean includeInteractors, boolean importableOnly) {
this.increaseCounter();
MapSet<String, MappedIdentifier> mapping = getMapping(identifiers, speciesNode, includeInteractors);
MapSet<String, MappedIdentifier> mapping = getMapping(identifiers, speciesNode, includeInteractors, importableOnly);
this.decreaseCounter();
List<MappedEntity> rtn = new ArrayList<>();
for (String identifier : mapping.keySet()) {
Expand All @@ -54,7 +55,7 @@ public static long getMappingCount() {
return MAPPING_COUNT;
}

private MapSet<String, MappedIdentifier> getMapping(Set<String> identifiers, SpeciesNode speciesNode, boolean includeInteractors) {
private MapSet<String, MappedIdentifier> getMapping(Set<String> identifiers, SpeciesNode speciesNode, boolean includeInteractors, boolean importableOnly) {
MapSet<String, MappedIdentifier> rtn = new MapSet<>();

final int originalSampleSize = identifiers.size();
Expand All @@ -67,9 +68,11 @@ private MapSet<String, MappedIdentifier> getMapping(Set<String> identifiers, Spe

MapSet<Resource, EntityNode> resourceEntities = entitiesMap.get(identifier);
for (Resource resource : resourceEntities.keySet()) {
for (EntityNode node : resourceEntities.getElements(resource)) {
if (speciesNode != null) node = node.getProjection(speciesNode);
if (node != null) rtn.add(identifier, new MappedIdentifier(node.getIdentifier()));
if (!importableOnly || ExternalAnalysisResultCheck.isValidResource(resource.getName())) {
for (EntityNode node : resourceEntities.getElements(resource)) {
if (speciesNode != null) node = node.getProjection(speciesNode);
if (node != null) rtn.add(identifier, new MappedIdentifier(node.getIdentifier()));
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,17 @@ public HierarchiesData(Map<SpeciesNode, PathwayHierarchy> pathwayHierarchies, Ma
this.pathwayLocation = pathwayLocation;
}

public void addNotFound(AnalysisIdentifier identifier){
public void addNotFound(AnalysisIdentifier identifier) {
this.notFound.add(identifier);
}

public List<PathwayNode> getUniqueHitPathways(SpeciesNode species){
public List<PathwayNode> getUniqueHitPathways(SpeciesNode species) {
Set<SpeciesPathway> found = new HashSet<>();
List<PathwayNode> rtn = new LinkedList<>();
for (PathwayNode pathwayNode : this.getHitPathways()) {
if(species==null || pathwayNode.getSpecies().equals(species)){
if (species == null || pathwayNode.getSpecies().equals(species)) {
SpeciesPathway sp = new SpeciesPathway(pathwayNode);
if(!found.contains(sp)){
if (!found.contains(sp)) {
rtn.add(pathwayNode);
found.add(sp);
}
Expand All @@ -45,7 +45,7 @@ public Map<SpeciesNode, PathwayHierarchy> getPathwayHierarchies() {
return pathwayHierarchies;
}

private Set<PathwayNode> getHitPathways(){
private Set<PathwayNode> getHitPathways() {
Set<PathwayNode> rtn = new HashSet<>();
for (SpeciesNode species : pathwayHierarchies.keySet()) {
rtn.addAll(pathwayHierarchies.get(species).getHitPathways());
Expand All @@ -62,7 +62,7 @@ public MapSet<Long, PathwayNode> getPathwayLocation() {
}

@SuppressWarnings("ConstantConditions")
public void setResultStatistics(Map<MainResource, Integer> sampleSizePerResource, Integer notFound, boolean includeInteractors){
public void setResultStatistics(Map<MainResource, Integer> sampleSizePerResource, Integer notFound, boolean includeInteractors) {
for (SpeciesNode species : this.pathwayHierarchies.keySet()) {
PathwayHierarchy hierarchy = this.pathwayHierarchies.get(species);
for (PathwayRoot node : hierarchy.getChildren()) {
Expand All @@ -79,21 +79,32 @@ public void setResultStatistics(Map<MainResource, Integer> sampleSizePerResource
//Contains several sets of PathwayStatistic objects depending on the main resource (this one is used to calculate
//the entities FDR result based on the entities pValues
MapSet<MainResource, PathwayStatistic> pathwayResourceEntityPValue = new MapSet<MainResource, PathwayStatistic>();
MapSet<MainResource, PathwayStatistic> pathwayResourceEntityPValueImportable = new MapSet<>();

//This one does not depend on main resource because is for the combined result of the entities FDR based in their pValues
List<PathwayStatistic> pathwayEntityPValue = new LinkedList<PathwayStatistic>();
List<PathwayStatistic> pathwayEntityPValue = new LinkedList<>();
List<PathwayStatistic> pathwayEntityPValueImportable = new LinkedList<>();

//First thing we have to do, is iterate over the hit pathways and populate the lists (and MapSet) defined above
for (PathwayNode node : hierarchy.getHitPathways()) {
PathwayNodeData nodeData = node.getPathwayNodeData();

boolean hasImportable = false;

for (MainResource resource : nodeData.getResources()) {
Double pValue = nodeData.getEntitiesPValue(resource);
if(pValue!=null)
if (pValue != null) {
pathwayResourceEntityPValue.add(resource, new PathwayStatistic(node, pValue));
if (!resource.isAuxMainResource()) {
hasImportable = true;
pathwayResourceEntityPValueImportable.add(resource, new PathwayStatistic(node, pValue));
}
}
}
Double pValue = nodeData.getEntitiesPValue();
pathwayEntityPValue.add(new PathwayStatistic(node, pValue));
if (hasImportable)
pathwayEntityPValueImportable.add(new PathwayStatistic(node, nodeData.getEntitiesPValue(true)));
}
/*
Here we have to iterate over the different resources where the "individual" results have been found
Expand All @@ -116,15 +127,23 @@ public void setResultStatistics(Map<MainResource, Integer> sampleSizePerResource
this.setFDRWithBenjaminiHochberg(pathwayEntityPValue);
for (PathwayStatistic pathwayStatistic : pathwayEntityPValue) {
PathwayNodeData nodeData = pathwayStatistic.getPathwayNode().getPathwayNodeData();
nodeData.setEntitiesFDR(pathwayStatistic.getFDR());
nodeData.setEntitiesFDR(false, pathwayStatistic.getFDR());
}

this.setFDRWithBenjaminiHochberg(pathwayEntityPValueImportable);
for (PathwayStatistic pathwayStatistic : pathwayEntityPValueImportable) {
PathwayNodeData nodeData = pathwayStatistic.getPathwayNode().getPathwayNodeData();
nodeData.setEntitiesFDR(true, pathwayStatistic.getFDR());
}

}
}

/**
* Use this method to calculate FDR from a list of pvalues using Benjamini-Hochberg
* method. The implementation of this method is based on the source code for MEMo
* (http://cbio.mskcc.org/tools/memo/).
*
* @param list a list of PathwayStatic objects representing the hit pathways and their pValue
*/
private void setFDRWithBenjaminiHochberg(List<PathwayStatistic> list) {
Expand All @@ -141,7 +160,7 @@ private void setFDRWithBenjaminiHochberg(List<PathwayStatistic> list) {
}
}

private class PathwayStatistic implements Comparable<PathwayStatistic>{
private class PathwayStatistic implements Comparable<PathwayStatistic> {
private PathwayNode pathwayNode;
private Double pValue;
private Double fdr;
Expand Down
Loading

0 comments on commit cfe1239

Please sign in to comment.