Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

odgi flip -r/--ref-flips: flip with respect to a set of paths (used as references) #585

Merged
merged 1 commit into from
Aug 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 50 additions & 10 deletions src/algorithms/flip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,13 @@ using namespace handlegraph;

void flip_paths(graph_t& graph,
graph_t& into,
const std::vector<path_handle_t>& no_flips) {
const std::vector<path_handle_t>& no_flips,
const std::vector<path_handle_t>& ref_flips) {
// Copy the nodes
graph.for_each_handle([&](const handle_t& h) {
into.create_handle(graph.get_sequence(h), graph.get_id(h));
});
// Copy the edges
graph.for_each_handle([&](const handle_t& h) {
graph.follow_edges(h, false, [&](const handle_t& next) {
into.create_edge(into.get_handle(graph.get_id(h), graph.get_is_reverse(h)),
Expand All @@ -22,15 +25,47 @@ void flip_paths(graph_t& graph,
into.get_handle(graph.get_id(h), graph.get_is_reverse(h)));
});
});

// Paths to not flip
ska::flat_hash_set<path_handle_t> no_flip;
for (auto& p : no_flips) { no_flip.insert(p); }

// Paths to use as reference for flipping
ska::flat_hash_set<path_handle_t> ref_flip;
for (auto& p : ref_flips) { ref_flip.insert(p); }

// Prepare all path handles in a vector (for parallel processing)
std::vector<path_handle_t> paths;
std::vector<path_handle_t> into_paths;
// for each path, find its average orientation
graph.for_each_path_handle([&](const path_handle_t& p) { paths.push_back(p); });

// Check whether reference paths are (in general) in fwd or rev orientation
uint64_t ref_rev = 0;
uint64_t ref_fwd = 0;
// OpenMP's reduction: each thread maintains its own private copy of these variables
// during the parallel region, and OpenMP combines them at the end using addition.
#pragma omp parallel for reduction(+:ref_rev,ref_fwd)
for (auto& path : paths) {
if (ref_flip.count(path)) {
graph.for_each_step_in_path(
path,
[&ref_rev,&ref_fwd,&graph](const step_handle_t& s) {
auto h = graph.get_handle_of_step(s);
auto len = graph.get_length(h);
if (graph.get_is_reverse(h)) {
ref_rev += len;
} else {
ref_fwd += len;
}
});
}
}

#pragma omp parallel for
for (auto& path : paths) {
if (!no_flip.count(path)) {
// ref_paths must not be flipped either
if (!no_flip.count(path) && !ref_flip.count(path)) {
// Check path orientation with respect to the graph
uint64_t rev = 0;
uint64_t fwd = 0;
graph.for_each_step_in_path(
Expand All @@ -44,11 +79,20 @@ void flip_paths(graph_t& graph,
fwd += len;
}
});
// those that tend to be reversed more than forward should be flipped
if (rev > fwd) {

// Check if the path should be flipped
bool flip_path = false;
if (ref_flip.size() > 0) {
// if ref paths are reversed, reversed paths should be
// not flipped to stay consistent with the reference
flip_path = ref_rev > ref_fwd ? rev < fwd : rev > fwd;
} else {
// those that tend to be reversed more than forward should be flipped
flip_path = rev > fwd;
}
if (flip_path) {
auto name = graph.get_path_name(path) + "_inv";
auto flipped = into.create_path_handle(name);
into_paths.push_back(flipped);
std::vector<handle_t> v;
graph.for_each_step_in_path(
path,
Expand All @@ -63,7 +107,6 @@ void flip_paths(graph_t& graph,
}
} else {
auto fwd = into.create_path_handle(graph.get_path_name(path));
into_paths.push_back(fwd);
graph.for_each_step_in_path(
path,
[&fwd,&into,&graph](const step_handle_t& s) {
Expand All @@ -75,7 +118,6 @@ void flip_paths(graph_t& graph,
} else {
// add the path as-is
auto fwd = into.create_path_handle(graph.get_path_name(path));
into_paths.push_back(fwd);
graph.for_each_step_in_path(
path,
[&fwd,&into,&graph](const step_handle_t& s) {
Expand All @@ -87,7 +129,6 @@ void flip_paths(graph_t& graph,
}

ska::flat_hash_set<std::pair<handle_t, handle_t>> edges_to_create;

#pragma omp parallel for
for (auto& path : paths) {
// New edges can be due only when paths are flipped
Expand All @@ -104,7 +145,6 @@ void flip_paths(graph_t& graph,
});
}
}

// add missing edges
for (auto edge: edges_to_create) {
into.create_edge(edge.first, edge.second);
Expand Down
3 changes: 2 additions & 1 deletion src/algorithms/flip.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ using namespace handlegraph;
/// the graph in the forward orientation.
void flip_paths(graph_t& graph,
graph_t& into,
const std::vector<path_handle_t>& no_flips);
const std::vector<path_handle_t>& no_flips,
const std::vector<path_handle_t>& ref_flips);

}

Expand Down
17 changes: 12 additions & 5 deletions src/subcommand/flip_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,15 @@ int main_flip(int argc, char **argv) {
}
const std::string prog_name = "odgi flip";
argv[0] = (char *) prog_name.c_str();
--argc
;
--argc;

args::ArgumentParser parser("Flip (reverse complement) paths to match the graph.");
args::Group mandatory_opts(parser, "[ MANDATORY ARGUMENTS ]");
args::ValueFlag<std::string> og_in_file(mandatory_opts, "FILE", "Load the succinct variation graph in ODGI format from this *FILE*. The file name usually ends with *.og*. GFA is also supported.", {'i', "idx"});
args::ValueFlag<std::string> og_out_file(mandatory_opts, "FILE", "Write the sorted dynamic succinct variation graph to this file (e.g. *.og*).", {'o', "out"});
args::Group flip_opts(parser, "[ Flip Options ]");
args::ValueFlag<std::string> _no_flips(flip_opts, "FILE", "Don't flip paths listed one per line in FILE.", {'n', "no-flips"});
args::ValueFlag<std::string> _ref_flips(flip_opts, "FILE", "Flip paths to match the orientation of the paths listed one per line in FILE.", {'r', "ref-flips"});
args::Group threading_opts(parser, "[ Threading ]");
args::ValueFlag<uint64_t> nthreads(threading_opts, "N", "Number of threads to use for parallel operations.",
{'t', "threads"});
Expand Down Expand Up @@ -80,7 +81,8 @@ int main_flip(int argc, char **argv) {
graph.set_number_of_threads(num_threads);

std::vector<path_handle_t> no_flips;
if (_no_flips) {
std::vector<path_handle_t> ref_flips;
if (_no_flips || _ref_flips) {
// path loading
auto load_paths = [&](const std::string& path_names_file) {
std::ifstream path_names_in(path_names_file);
Expand Down Expand Up @@ -116,11 +118,16 @@ int main_flip(int argc, char **argv) {
return paths;
};

no_flips = load_paths(args::get(_no_flips));
if (_no_flips) {
no_flips = load_paths(args::get(_no_flips));
}
if (_ref_flips) {
ref_flips = load_paths(args::get(_ref_flips));
}
}

graph_t into;
algorithms::flip_paths(graph, into, no_flips);
algorithms::flip_paths(graph, into, no_flips, ref_flips);

const std::string outfile = args::get(og_out_file);
if (outfile == "-") {
Expand Down