From 5b9de416025ddbe3f3abeb65ab4fca14334ba8b0 Mon Sep 17 00:00:00 2001 From: AndreaGuarracino Date: Sat, 27 Jul 2024 10:01:24 +0200 Subject: [PATCH] allow to flip w.r.t. paths used as reference --- src/algorithms/flip.cpp | 60 ++++++++++++++++++++++++++++++------ src/algorithms/flip.hpp | 3 +- src/subcommand/flip_main.cpp | 17 +++++++--- 3 files changed, 64 insertions(+), 16 deletions(-) diff --git a/src/algorithms/flip.cpp b/src/algorithms/flip.cpp index 27548b1aa..e21618dd0 100644 --- a/src/algorithms/flip.cpp +++ b/src/algorithms/flip.cpp @@ -8,10 +8,13 @@ using namespace handlegraph; void flip_paths(graph_t& graph, graph_t& into, - const std::vector& no_flips) { + const std::vector& no_flips, + const std::vector& ref_flips) { + // Copy the nodes graph.for_each_handle([&](const handle_t& h) { into.create_handle(graph.get_sequence(h), graph.get_id(h)); }); + // Copy the edges graph.for_each_handle([&](const handle_t& h) { graph.follow_edges(h, false, [&](const handle_t& next) { into.create_edge(into.get_handle(graph.get_id(h), graph.get_is_reverse(h)), @@ -22,15 +25,47 @@ void flip_paths(graph_t& graph, into.get_handle(graph.get_id(h), graph.get_is_reverse(h))); }); }); + + // Paths to not flip ska::flat_hash_set no_flip; for (auto& p : no_flips) { no_flip.insert(p); } + + // Paths to use as reference for flipping + ska::flat_hash_set ref_flip; + for (auto& p : ref_flips) { ref_flip.insert(p); } + + // Prepare all path handles in a vector (for parallel processing) std::vector paths; - std::vector into_paths; // for each path, find its average orientation graph.for_each_path_handle([&](const path_handle_t& p) { paths.push_back(p); }); + + // Check whether reference paths are (in general) in fwd or rev orientation + uint64_t ref_rev = 0; + uint64_t ref_fwd = 0; + // OpenMP's reduction: each thread maintains its own private copy of these variables + // during the parallel region, and OpenMP combines them at the end using addition. +#pragma omp parallel for reduction(+:ref_rev,ref_fwd) + for (auto& path : paths) { + if (ref_flip.count(path)) { + graph.for_each_step_in_path( + path, + [&ref_rev,&ref_fwd,&graph](const step_handle_t& s) { + auto h = graph.get_handle_of_step(s); + auto len = graph.get_length(h); + if (graph.get_is_reverse(h)) { + ref_rev += len; + } else { + ref_fwd += len; + } + }); + } + } + #pragma omp parallel for for (auto& path : paths) { - if (!no_flip.count(path)) { + // ref_paths must not be flipped either + if (!no_flip.count(path) && !ref_flip.count(path)) { + // Check path orientation with respect to the graph uint64_t rev = 0; uint64_t fwd = 0; graph.for_each_step_in_path( @@ -44,11 +79,20 @@ void flip_paths(graph_t& graph, fwd += len; } }); - // those that tend to be reversed more than forward should be flipped - if (rev > fwd) { + + // Check if the path should be flipped + bool flip_path = false; + if (ref_flip.size() > 0) { + // if ref paths are reversed, reversed paths should be + // not flipped to stay consistent with the reference + flip_path = ref_rev > ref_fwd ? rev < fwd : rev > fwd; + } else { + // those that tend to be reversed more than forward should be flipped + flip_path = rev > fwd; + } + if (flip_path) { auto name = graph.get_path_name(path) + "_inv"; auto flipped = into.create_path_handle(name); - into_paths.push_back(flipped); std::vector v; graph.for_each_step_in_path( path, @@ -63,7 +107,6 @@ void flip_paths(graph_t& graph, } } else { auto fwd = into.create_path_handle(graph.get_path_name(path)); - into_paths.push_back(fwd); graph.for_each_step_in_path( path, [&fwd,&into,&graph](const step_handle_t& s) { @@ -75,7 +118,6 @@ void flip_paths(graph_t& graph, } else { // add the path as-is auto fwd = into.create_path_handle(graph.get_path_name(path)); - into_paths.push_back(fwd); graph.for_each_step_in_path( path, [&fwd,&into,&graph](const step_handle_t& s) { @@ -87,7 +129,6 @@ void flip_paths(graph_t& graph, } ska::flat_hash_set> edges_to_create; - #pragma omp parallel for for (auto& path : paths) { // New edges can be due only when paths are flipped @@ -104,7 +145,6 @@ void flip_paths(graph_t& graph, }); } } - // add missing edges for (auto edge: edges_to_create) { into.create_edge(edge.first, edge.second); diff --git a/src/algorithms/flip.hpp b/src/algorithms/flip.hpp index 714853ab6..66607dbb4 100644 --- a/src/algorithms/flip.hpp +++ b/src/algorithms/flip.hpp @@ -27,7 +27,8 @@ using namespace handlegraph; /// the graph in the forward orientation. void flip_paths(graph_t& graph, graph_t& into, - const std::vector& no_flips); + const std::vector& no_flips, + const std::vector& ref_flips); } diff --git a/src/subcommand/flip_main.cpp b/src/subcommand/flip_main.cpp index f3fe4a381..09d6dbec7 100644 --- a/src/subcommand/flip_main.cpp +++ b/src/subcommand/flip_main.cpp @@ -18,14 +18,15 @@ int main_flip(int argc, char **argv) { } const std::string prog_name = "odgi flip"; argv[0] = (char *) prog_name.c_str(); - --argc -; + --argc; + args::ArgumentParser parser("Flip (reverse complement) paths to match the graph."); args::Group mandatory_opts(parser, "[ MANDATORY ARGUMENTS ]"); args::ValueFlag og_in_file(mandatory_opts, "FILE", "Load the succinct variation graph in ODGI format from this *FILE*. The file name usually ends with *.og*. GFA is also supported.", {'i', "idx"}); args::ValueFlag og_out_file(mandatory_opts, "FILE", "Write the sorted dynamic succinct variation graph to this file (e.g. *.og*).", {'o', "out"}); args::Group flip_opts(parser, "[ Flip Options ]"); args::ValueFlag _no_flips(flip_opts, "FILE", "Don't flip paths listed one per line in FILE.", {'n', "no-flips"}); + args::ValueFlag _ref_flips(flip_opts, "FILE", "Flip paths to match the orientation of the paths listed one per line in FILE.", {'r', "ref-flips"}); args::Group threading_opts(parser, "[ Threading ]"); args::ValueFlag nthreads(threading_opts, "N", "Number of threads to use for parallel operations.", {'t', "threads"}); @@ -80,7 +81,8 @@ int main_flip(int argc, char **argv) { graph.set_number_of_threads(num_threads); std::vector no_flips; - if (_no_flips) { + std::vector ref_flips; + if (_no_flips || _ref_flips) { // path loading auto load_paths = [&](const std::string& path_names_file) { std::ifstream path_names_in(path_names_file); @@ -116,11 +118,16 @@ int main_flip(int argc, char **argv) { return paths; }; - no_flips = load_paths(args::get(_no_flips)); + if (_no_flips) { + no_flips = load_paths(args::get(_no_flips)); + } + if (_ref_flips) { + ref_flips = load_paths(args::get(_ref_flips)); + } } graph_t into; - algorithms::flip_paths(graph, into, no_flips); + algorithms::flip_paths(graph, into, no_flips, ref_flips); const std::string outfile = args::get(og_out_file); if (outfile == "-") {