Skip to content

Commit

Permalink
Merge pull request #38 from pangenome/tweaks
Browse files Browse the repository at this point in the history
Merge overlapping/contiguous ranges to visit in `query_transitive` to strongly improve performance
  • Loading branch information
AndreaGuarracino authored Jan 3, 2025
2 parents d1d150d + b14d1e2 commit 4cc6a98
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 13 deletions.
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

36 changes: 25 additions & 11 deletions src/impg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,22 +140,22 @@ impl SortedRanges {
} else {
(new_range.1, new_range.0)
};

// Return regions that don't overlap with existing ranges
let mut non_overlapping = Vec::new();
let mut current = start;

// Find the first range that could overlap
let mut i = match self.ranges.binary_search_by_key(&start, |&(s, _)| s) {
Ok(pos) => pos,
Err(pos) => pos,
};

// Check previous range for overlap
if i > 0 && self.ranges[i - 1].1 > start {
i -= 1;
}

// Process all potentially overlapping ranges
while i < self.ranges.len() && current < end {
let (range_start, range_end) = self.ranges[i];
Expand All @@ -168,11 +168,11 @@ impl SortedRanges {
current = max(current, range_end);
i += 1;
}

if current < end {
non_overlapping.push((current, end));
}

// Now insert the range while maintaining sorted order and merging overlaps
match self.ranges.binary_search_by_key(&start, |&(s, _)| s) {
Ok(pos) | Err(pos) => {
Expand All @@ -184,12 +184,12 @@ impl SortedRanges {
self.ranges[pos].0 = min(start, self.ranges[pos].0);
self.ranges[pos].1 = max(end, self.ranges[pos].1);
self.merge_forward_from(pos);
} else {
} else {
self.ranges.insert(pos, (start, end));
}
}
}

non_overlapping
}

Expand Down Expand Up @@ -384,14 +384,14 @@ impl Impg {
.map(|(&k, v)| (k, (*v).clone()))
.collect()
} else {
FxHashMap::default()
FxHashMap::with_capacity_and_hasher(self.seq_index.len(), Default::default())
};
// Initialize first visited range for target_id if not already present
visited_ranges.entry(target_id)
.or_default()
.insert((range_start, range_end));

while let Some((current_target_id, current_target_start, current_target_end)) = stack.pop() {
while let Some((current_target_id, current_target_start, current_target_end)) = stack.pop() {
if let Some(tree) = self.trees.get(&current_target_id) {
tree.query(current_target_start, current_target_end, |interval| {
let metadata = &interval.metadata;
Expand Down Expand Up @@ -430,7 +430,21 @@ impl Impg {
}
}
});
}

// Merge contiguous/overlapping ranges with same sequence_id
stack.sort_by_key(|(id, start, _)| (*id, *start));
let mut write = 0;
for read in 1..stack.len() {
if stack[write].0 == stack[read].0 && // Same sequence_id
stack[write].2 >= stack[read].1 { // Overlapping or contiguous
stack[write].2 = stack[write].2.max(stack[read].2);
} else {
write += 1;
stack.swap(write, read);
}
}
stack.truncate(write + 1);
}
}

results
Expand Down

0 comments on commit 4cc6a98

Please sign in to comment.