Skip to content

Commit

Permalink
Merge branch 'master' into untangle_for_annotation
Browse files Browse the repository at this point in the history
  • Loading branch information
AndreaGuarracino committed Jul 3, 2023
2 parents c367e17 + cffe70a commit f5e80f6
Show file tree
Hide file tree
Showing 12 changed files with 88 additions and 117 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ docs/sphinx_build
docs/sphinx_build_man
docs/_build
Testing/
.idea/
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
url = https://github.com/jeizenga/structures.git
[submodule "deps/libbf"]
path = deps/libbf
url = https://github.com/mavam/libbf.git
url = https://github.com/subwaystation/libbf.git
[submodule "deps/cpp-httplib"]
path = deps/cpp-httplib
url = https://github.com/yhirose/cpp-httplib.git
Expand Down
2 changes: 1 addition & 1 deletion deps/libbf
Submodule libbf updated 1 files
+1 −0 src/bitvector.cpp
2 changes: 1 addition & 1 deletion deps/libhandlegraph
Submodule libhandlegraph updated 33 files
+7 −2 CMakeLists.txt
+908 −0 src/chop.cpp
+26 −9 src/copy_graph.cpp
+5 −5 src/count_walks.cpp
+70 −0 src/dagify.cpp
+248 −0 src/dfs.cpp
+199 −20 src/dijkstra.cpp
+47 −0 src/include/handlegraph/algorithms/chop.hpp
+6 −1 src/include/handlegraph/algorithms/copy_graph.hpp
+34 −6 src/include/handlegraph/algorithms/dagify.hpp
+15 −2 src/include/handlegraph/algorithms/dijkstra.hpp
+47 −0 src/include/handlegraph/algorithms/internal/dfs.hpp
+0 −23 src/include/handlegraph/algorithms/unchop.hpp
+3 −3 src/include/handlegraph/handle_graph.hpp
+57 −24 src/include/handlegraph/iteratee.hpp
+7 −2 src/include/handlegraph/mutable_handle_graph.hpp
+19 −2 src/include/handlegraph/mutable_path_handle_graph.hpp
+70 −0 src/include/handlegraph/mutable_path_metadata.hpp
+52 −0 src/include/handlegraph/named_node_back_translation.hpp
+21 −4 src/include/handlegraph/path_handle_graph.hpp
+313 −0 src/include/handlegraph/path_metadata.hpp
+2 −0 src/include/handlegraph/serializable.hpp
+18 −16 src/include/handlegraph/snarl_decomposition.hpp
+26 −16 src/include/handlegraph/trivially_serializable.hpp
+19 −10 src/include/handlegraph/types.hpp
+18 −13 src/include/handlegraph/util.hpp
+38 −0 src/mutable_path_handle_graph.cpp
+22 −0 src/mutable_path_metadata.cpp
+367 −0 src/path_metadata.cpp
+3 −234 src/strongly_connected_components.cpp
+16 −0 src/trivially_serializable.cpp
+1 −1 src/types.cpp
+0 −500 src/unchop.cpp
2 changes: 1 addition & 1 deletion deps/pybind11
Submodule pybind11 updated 238 files
4 changes: 2 additions & 2 deletions scripts/heaps_fit.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,6 @@ n <- max(x$nth.genome)
print(z * (f(n) - f(n-1)))
print(z * (f(2) - f(1)))
#print(f(n) - f(n-1))

ggplot(x, aes(x=nth.genome, y=base.pairs/1e9)) + geom_point(alpha=I(1/10)) + stat_function(fun=function(x) (fit$par[1] * x^fit$par[2] + fit$par[3]) * m) + scale_y_continuous("observed pangenome size (Gbp)") + scale_x_continuous("Nth included genome (200 permutations)")
pdf(NULL)
ggplot(x, aes(x=nth.genome, y=base.pairs/1e9)) + geom_point(alpha=I(1/10)) + stat_function(fun=function(x) (fit$par[1] * x^fit$par[2] + fit$par[3]) * m) + scale_y_continuous("observed pangenome size (Gbp)") + scale_x_continuous(paste("Nth included genome (", max(x$permutation)+1 ," permutations) with gamma=", round(fit$par[2], digits=3), sep = "")) + expand_limits(x = 0, y = 0)
ggsave(args[2], height=5, width=9)
147 changes: 71 additions & 76 deletions src/algorithms/progress.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,86 +15,81 @@ namespace algorithms {

namespace progress_meter {

class ProgressMeter {
public:
std::string banner;
std::atomic<uint64_t> total;
std::atomic<uint64_t> completed;
std::chrono::time_point<std::chrono::steady_clock> start_time;
std::thread logger;
ProgressMeter(uint64_t _total, const std::string& _banner)
: total(_total), banner(_banner) {
start_time = std::chrono::steady_clock::now();
completed = 0;

logger = std::thread(
[&]() {
bool has_ever_printed = false;

while (completed < total) {
if (completed > 0) {
class ProgressMeter {
public:
std::string banner;
std::atomic<uint64_t> total;
std::atomic<uint64_t> completed;
std::chrono::time_point<std::chrono::steady_clock> start_time;
std::thread logger;
ProgressMeter(uint64_t _total, const std::string& _banner)
: total(_total), banner(_banner) {
start_time = std::chrono::steady_clock::now();
completed = 0;
logger = std::thread(
[&](void) {
do_print();
has_ever_printed = true;
}
if (has_ever_printed && completed < total) {
std::this_thread::sleep_for(std::chrono::milliseconds(250));
} else {
std::this_thread::sleep_for(std::chrono::nanoseconds(100));
}
}
});
auto last = 0;
while (completed < total) {
auto curr = completed - last;
if (curr > 0) {
do_print();
last = completed;
}
std::this_thread::sleep_for(std::chrono::milliseconds(500));
}
});
};
void do_print(void) {
auto curr = std::chrono::steady_clock::now();
std::chrono::duration<double> elapsed_seconds = curr-start_time;
double rate = completed / elapsed_seconds.count();
double seconds_to_completion = (completed > 0 ? (total - completed) / rate : 0);
std::cerr << "\r" << banner << " "
<< std::defaultfloat
<< std::setfill(' ')
<< std::setw(5)
<< std::fixed
<< std::setprecision(2)
<< 100.0 * ((double)completed / (double)total) << "%"
<< " @ "
<< std::setw(4) << std::scientific << rate << " bp/s "
<< "elapsed: " << print_time(elapsed_seconds.count()) << " "
<< "remain: " << print_time(seconds_to_completion);
}
void finish(void) {
completed.store(total);
logger.join();
do_print();
std::cerr << std::endl;
}
std::string print_time(const double& _seconds) {
int days = 0, hours = 0, minutes = 0, seconds = 0;
distribute_seconds(days, hours, minutes, seconds, _seconds);
std::stringstream buffer;
buffer << std::setfill('0') << std::setw(2) << days << ":"
<< std::setfill('0') << std::setw(2) << hours << ":"
<< std::setfill('0') << std::setw(2) << minutes << ":"
<< std::setfill('0') << std::setw(2) << seconds;
return buffer.str();
}
void distribute_seconds(int& days, int& hours, int& minutes, int& seconds, const double& input_seconds) {
const int cseconds_in_day = 86400;
const int cseconds_in_hour = 3600;
const int cseconds_in_minute = 60;
const int cseconds = 1;
days = std::floor(input_seconds / cseconds_in_day);
hours = std::floor(((int)input_seconds % cseconds_in_day) / cseconds_in_hour);
minutes = std::floor((((int)input_seconds % cseconds_in_day) % cseconds_in_hour) / cseconds_in_minute);
seconds = ((((int)input_seconds % cseconds_in_day) % cseconds_in_hour) % cseconds_in_minute) / cseconds; // + (input_seconds - std::floor(input_seconds));
//std::cerr << input_seconds << " seconds is " << days << " days, " << hours << " hours, " << minutes << " minutes, and " << seconds << " seconds." << std::endl;
}
void increment(const uint64_t& incr) {
completed += incr;
}
};
void do_print() {
auto curr = std::chrono::steady_clock::now();
std::chrono::duration<double> elapsed_seconds = curr-start_time;
double rate = completed / elapsed_seconds.count();
double seconds_to_completion = (total - completed) / rate;
std::cerr << "\r" << banner << " "
<< std::defaultfloat
<< std::setfill(' ')
<< std::setw(5)
<< std::fixed
<< std::setprecision(2)
<< 100.0 * ((double)completed / (double)total) << "%"
<< " @ "
<< std::setw(4) << std::scientific << rate << "/s "
<< "elapsed: " << print_time(elapsed_seconds.count()) << " "
<< "remain: " << print_time(seconds_to_completion);
}
void finish() {
completed.store(total);
logger.join();
do_print();
std::cerr << std::endl;
}
std::string print_time(const double& _seconds) {
int days = 0, hours = 0, minutes = 0, seconds = 0;
distribute_seconds(days, hours, minutes, seconds, _seconds);
std::stringstream buffer;
buffer << std::setfill('0') << std::setw(2) << days << ":"
<< std::setfill('0') << std::setw(2) << hours << ":"
<< std::setfill('0') << std::setw(2) << minutes << ":"
<< std::setfill('0') << std::setw(2) << seconds;
return buffer.str();
}
void distribute_seconds(int& days, int& hours, int& minutes, int& seconds, const double& input_seconds) {
const int cseconds_in_day = 86400;
const int cseconds_in_hour = 3600;
const int cseconds_in_minute = 60;
const int cseconds = 1;
days = std::floor(input_seconds / cseconds_in_day);
hours = std::floor(((int)input_seconds % cseconds_in_day) / cseconds_in_hour);
minutes = std::floor((((int)input_seconds % cseconds_in_day) % cseconds_in_hour) / cseconds_in_minute);
seconds = ((((int)input_seconds % cseconds_in_day) % cseconds_in_hour) % cseconds_in_minute) / cseconds; // + (input_seconds - std::floor(input_seconds));
//std::cerr << input_seconds << " seconds is " << days << " days, " << hours << " hours, " << minutes << " minutes, and " << seconds << " seconds." << std::endl;
}
void increment(const uint64_t& incr) {
completed += incr;
}
};

}

}

}
4 changes: 3 additions & 1 deletion src/odgi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -781,7 +781,7 @@ void graph_t::reassign_node_ids(const std::function<nid_t(const nid_t&)>& get_ne

/// Reorder the graph's internal structure to match that given.
/// Optionally compact the id space of the graph to match the ordering, from 1->|ordering|.
void graph_t::apply_ordering(const std::vector<handle_t>& order_in, bool compact_ids) {
bool graph_t::apply_ordering(const std::vector<handle_t>& order_in, bool compact_ids) {
// get mapping from old to new id
// if we're given an empty order, just compact the ids based on our ordering
const std::vector<handle_t>* order;
Expand Down Expand Up @@ -894,6 +894,8 @@ void graph_t::apply_ordering(const std::vector<handle_t>& order_in, bool compact
}
node_v = new_node_v;
deleted_nodes.clear();

return true;
}

void graph_t::apply_path_ordering(const std::vector<path_handle_t>& order) {
Expand Down
2 changes: 1 addition & 1 deletion src/odgi.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ class graph_t : public MutablePathDeletableHandleGraph, public SerializableHandl

/// Reorder the graph's internal structure to match that given.
/// Optionally compact the id space of the graph to match the ordering, from 1->|ordering|.
void apply_ordering(const std::vector<handle_t>& order, bool compact_ids = false);
bool apply_ordering(const std::vector<handle_t>& order, bool compact_ids = false);

/// Organize the graph for better performance and memory use
void optimize(bool allow_id_reassignment = true);
Expand Down
5 changes: 5 additions & 0 deletions src/subcommand/heaps_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,11 @@ int main_heaps(int argc, char **argv) {
auto vals = split(path_name, '#');
path_groups_map[vals.front()].push_back(p);
});
} else {
// no groups
graph.for_each_path_handle([&](const path_handle_t& p) {
path_groups_map[graph.get_path_name(p)].push_back(p);
});
}
path_groups.reserve(path_groups_map.size());
for (auto& g : path_groups_map) {
Expand Down
33 changes: 0 additions & 33 deletions src/subcommand/overlap_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,39 +125,6 @@ namespace odgi {
}
}

auto get_graph_pos = [](const odgi::graph_t &graph,
const path_pos_t &pos) {
auto path_end = graph.path_end(pos.path);
uint64_t walked = 0;
for (step_handle_t s = graph.path_begin(pos.path);
s != path_end; s = graph.get_next_step(s)) {
handle_t h = graph.get_handle_of_step(s);
uint64_t node_length = graph.get_length(h);
if (walked + node_length > pos.offset) {
return make_pos_t(graph.get_id(h), graph.get_is_reverse(h), pos.offset - walked);
}
walked += node_length;
}

#pragma omp critical (cout)
std::cerr << "[odgi::overlap] warning: position " << graph.get_path_name(pos.path) << ":" << pos.offset
<< " outside of path" << std::endl;
return make_pos_t(0, false, 0);
};

auto get_offset_in_path = [](const odgi::graph_t &graph,
const path_handle_t &path, const step_handle_t &target) {
auto path_end = graph.path_end(path);
uint64_t walked = 0;
step_handle_t s = graph.path_begin(path);
for (; s != target; s = graph.get_next_step(s)) {
handle_t h = graph.get_handle_of_step(s);
walked += graph.get_length(h);
}
assert(s != path_end);
return walked;
};

if (!path_ranges.empty()) {
std::cout << "#path\tstart\tend\tpath.touched" << std::endl;

Expand Down
1 change: 1 addition & 0 deletions src/subcommand/version_main.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "subcommand.hpp"
#include "args.hxx"
#include "../version.hpp"
#include <cstdint>

namespace odgi {

Expand Down

0 comments on commit f5e80f6

Please sign in to comment.