diff --git a/dataload/07_create_db/neo4j/create_indexes.cypher b/dataload/07_create_db/neo4j/create_indexes.cypher deleted file mode 100644 index 9218d85..0000000 --- a/dataload/07_create_db/neo4j/create_indexes.cypher +++ /dev/null @@ -1,21 +0,0 @@ - -CREATE INDEX node_id FOR (n:GraphNode) ON n.`grebi:nodeId` -; -CREATE INDEX subgraph FOR (n:GraphNode) ON n.`grebi:subgraph` -; -CREATE INDEX id_id FOR (n:Id) ON n.`id` -; -CALL db.awaitIndexes(10800) -; -MATCH (ancestor:`ols:Class`)<-[:`biolink:broad_match`*1..]-(subclass:`ols:Class`) -WITH ancestor, count(DISTINCT subclass) AS num_desc -SET ancestor.num_desc = num_desc -; -WITH COLLECT { MATCH (cl:`ols:Class`) RETURN max(cl.num_desc) }[0] AS max_num_desc -MATCH (cl2:`ols:Class`) -SET cl2.ic = 1.0 - (cl2.num_desc/max_num_desc) -; -CREATE INDEX ic FOR (n:GraphNode) ON (n.ic) -; -CALL db.awaitIndexes(10800) -; diff --git a/dataload/07_create_db/neo4j/cypher/create_indexes.cypher b/dataload/07_create_db/neo4j/cypher/create_indexes.cypher new file mode 100644 index 0000000..cedfe69 --- /dev/null +++ b/dataload/07_create_db/neo4j/cypher/create_indexes.cypher @@ -0,0 +1,11 @@ + +CREATE INDEX node_id FOR (n:GraphNode) ON n.`grebi:nodeId` +; +CREATE INDEX subgraph FOR (n:GraphNode) ON n.`grebi:subgraph` +; +CREATE INDEX id_id FOR (n:Id) ON n.`id` +; +CREATE INDEX ic FOR (n:GraphNode) ON (n.ic) +; +CALL db.awaitIndexes(10800) +; diff --git a/dataload/07_create_db/neo4j/cypher/ic_scores_1.cypher b/dataload/07_create_db/neo4j/cypher/ic_scores_1.cypher new file mode 100644 index 0000000..77f3934 --- /dev/null +++ b/dataload/07_create_db/neo4j/cypher/ic_scores_1.cypher @@ -0,0 +1,4 @@ +MATCH (ancestor:`ols:Class`)<-[:`biolink:broad_match`*1..]-(subclass:`ols:Class`) +WITH ancestor, count(DISTINCT subclass) AS num_desc +SET ancestor.num_desc = num_desc +; \ No newline at end of file diff --git a/dataload/07_create_db/neo4j/cypher/ic_scores_2.cypher b/dataload/07_create_db/neo4j/cypher/ic_scores_2.cypher new file mode 100644 index 0000000..d8c51d7 --- /dev/null +++ b/dataload/07_create_db/neo4j/cypher/ic_scores_2.cypher @@ -0,0 +1,4 @@ +WITH COLLECT { MATCH (cl:`ols:Class`) RETURN max(cl.num_desc) }[0] AS max_num_desc +MATCH (cl2:`ols:Class`) +SET cl2.ic = 1.0 - (cl2.num_desc/max_num_desc) +; diff --git a/dataload/07_create_db/neo4j/neo4j_import.dockersh b/dataload/07_create_db/neo4j/neo4j_import.dockersh index 056ab5c..49fa482 100644 --- a/dataload/07_create_db/neo4j/neo4j_import.dockersh +++ b/dataload/07_create_db/neo4j/neo4j_import.dockersh @@ -35,10 +35,14 @@ sleep 20 echo Creating neo4j indexes... -cypher-shell -a neo4j://127.0.0.1:7687 --non-interactive -f /create_indexes.cypher +cypher-shell -a neo4j://127.0.0.1:7687 --non-interactive -f /cypher/ic_scores_1.cypher +sleep 20 +cypher-shell -a neo4j://127.0.0.1:7687 --non-interactive -f /cypher/ic_scores_2.cypher +sleep 20 +cypher-shell -a neo4j://127.0.0.1:7687 --non-interactive -f /cypher/create_indexes.cypher echo Creating neo4j indexes done -sleep 5 +sleep 20 neo4j stop diff --git a/dataload/07_create_db/neo4j/neo4j_import.slurm.py b/dataload/07_create_db/neo4j/neo4j_import.slurm.py index 73204a9..b8611f9 100644 --- a/dataload/07_create_db/neo4j/neo4j_import.slurm.py +++ b/dataload/07_create_db/neo4j/neo4j_import.slurm.py @@ -35,7 +35,7 @@ def main(): '--bind ' + shlex.quote(neo_data_path) + ':/data', '--bind ' + shlex.quote(neo_logs_path) + ':/logs', '--bind ' + os.path.abspath(os.path.join(os.environ['GREBI_HOME'], '07_create_db/neo4j/neo4j_import.dockersh')) + ':/import.sh', - '--bind ' + os.path.abspath(os.path.join(os.environ['GREBI_HOME'], '07_create_db/neo4j/create_indexes.cypher')) + ':/create_indexes.cypher', + '--bind ' + os.path.abspath(os.path.join(os.environ['GREBI_HOME'], '07_create_db/neo4j/cypher')) + ':/cypher', '--writable-tmpfs', '--network=none', '--env NEO4J_AUTH=none', @@ -50,7 +50,7 @@ def main(): '-v ' + shlex.quote(neo_data_path) + ':/data', '-v ' + shlex.quote(neo_logs_path) + ':/logs', '-v ' + os.path.abspath(os.path.join(os.environ['GREBI_HOME'], '07_create_db/neo4j/neo4j_import.dockersh')) + ':/import.sh', - '-v ' + os.path.abspath(os.path.join(os.environ['GREBI_HOME'], '07_create_db/neo4j/create_indexes.cypher')) + ':/create_indexes.cypher', + '-v ' + os.path.abspath(os.path.join(os.environ['GREBI_HOME'], '07_create_db/neo4j/cypher')) + ':/cypher', '-e NEO4J_AUTH=none', 'neo4j:5.18.0', 'bash /import.sh'