Skip to content

Commit

Permalink
rename discontinuous path chunks coming out of vg chunk
Browse files Browse the repository at this point in the history
  • Loading branch information
glennhickey committed Oct 11, 2019
1 parent 6f0e168 commit 7ee5daf
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 6 deletions.
43 changes: 40 additions & 3 deletions src/algorithms/subgraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,8 @@ void extract_path_range(const PathPositionHandleGraph& source, path_handle_t pat
/// add subpaths to the subgraph, providing a concatenation of subpaths that are discontiguous over the subgraph
/// based on their order in the path position index provided by the source graph
/// will clear any path found in both graphs before writing the new steps into it
void add_subpaths_to_subgraph(const PathPositionHandleGraph& source, MutablePathHandleGraph& subgraph) {
void add_subpaths_to_subgraph(const PathPositionHandleGraph& source, MutablePathHandleGraph& subgraph,
bool rename_discontinuous_path_chunks) {
std::unordered_map<std::string, std::map<uint64_t, handle_t> > subpaths;
subgraph.for_each_handle([&](const handle_t& h) {
handlegraph::nid_t id = subgraph.get_id(h);
Expand All @@ -306,16 +307,52 @@ void add_subpaths_to_subgraph(const PathPositionHandleGraph& source, MutablePath
});
}
});

function<path_handle_t(const string&, bool, size_t&)> new_subpath =
[&subgraph](const string& path_name, bool is_circular, size_t& sub_i) {
while (true) {
string subpath_name = path_name + ".chunk" + std::to_string(sub_i++);
if (!subgraph.has_path(subpath_name)) {
return subgraph.create_path_handle(subpath_name, is_circular);
}
}
};

for (auto& subpath : subpaths) {
const std::string& path_name = subpath.first;
// destroy the path if it exists
if (subgraph.has_path(path_name)) {
subgraph.destroy_path(subgraph.get_path_handle(path_name));
}
size_t chunk_num = 0;
// fill in the path information
path_handle_t path = subgraph.create_path_handle(path_name);
for (auto& p : subpath.second) {
const handle_t& handle = p.second;
for (auto p = subpath.second.begin(); p != subpath.second.end(); ++p) {
const handle_t& handle = p->second;
if (p != subpath.second.begin()) {
auto prev = p;
--prev;
const handle_t& prev_handle = prev->second;
// distance from map
size_t delta = max(p->first, prev->first) - min(p->first, prev->first);
// what the distance should be if they're contiguous depends on relative orienations
size_t cont_delta;
bool r1 = subgraph.get_is_reverse(prev_handle);
bool r2 = subgraph.get_is_reverse(handle);
if (r1 && r2) {
cont_delta = subgraph.get_length(handle);
} else if (!r1 && !r2) {
cont_delta = subgraph.get_length(prev_handle);
} else if (!r1 && r2) {
cont_delta = subgraph.get_length(prev_handle) + subgraph.get_length(handle) - 1;
} else {
cont_delta = 1;
}
if (delta != cont_delta) {
// we have a discontinuity! we'll make a new path can continue from there
path = new_subpath(path_name, subgraph.get_is_circular(path), chunk_num);
}
}
subgraph.append_step(path, handle);
}
}
Expand Down
5 changes: 4 additions & 1 deletion src/algorithms/subgraph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,10 @@ void extract_path_range(const PathPositionHandleGraph& source, path_handle_t pat
/// add subpaths to the subgraph, providing a concatenation of subpaths that are discontiguous over the subgraph
/// based on their order in the path position index provided by the source graph
/// will clear any path found in both graphs before writing the new steps into it
void add_subpaths_to_subgraph(const PathPositionHandleGraph& source, MutablePathHandleGraph& subgraph);
/// if rename_discontinuous_path_chunks is true, after each detected path discontinuity in the
/// subgraph it will start a new path with a .chunki suffix
void add_subpaths_to_subgraph(const PathPositionHandleGraph& source, MutablePathHandleGraph& subgraph,
bool rename_discontinuous_path_chunks = false);

/// We can accumulate a subgraph without accumulating all the edges between its nodes
/// this helper ensures that we get the full set
Expand Down
4 changes: 2 additions & 2 deletions src/chunker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ void PathChunker::extract_subgraph(const Region& region, int context, int length
else if (context == 0 && length == 0) {
algorithms::add_connecting_edges_to_subgraph(*graph, subgraph);
}
algorithms::add_subpaths_to_subgraph(*graph, subgraph);
algorithms::add_subpaths_to_subgraph(*graph, subgraph, true);

// build the vg of the subgraph
subgraph.remove_orphan_edges();
Expand Down Expand Up @@ -282,7 +282,7 @@ void PathChunker::extract_id_range(vg::id_t start, vg::id_t end, int context, in
if (length) {
algorithms::expand_subgraph_by_length(*graph, vg_g, context, forward_only);
}
algorithms::add_subpaths_to_subgraph(*graph, vg_g);
algorithms::add_subpaths_to_subgraph(*graph, vg_g, true);

// build the vg
subgraph.extend(vg_g);
Expand Down

1 comment on commit 7ee5daf

@adamnovak
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vg CI tests complete for branch glenn. View the full report here.

19 tests passed, 0 tests failed and 0 tests skipped in 13015 seconds

Tests produced 361 warnings. 361 were for lower-than-expected alignment scores

Please sign in to comment.