Skip to content

Commit

Permalink
turn off abpoa progressive for >5k rows
Browse files Browse the repository at this point in the history
  • Loading branch information
glennhickey committed Jul 26, 2024
1 parent e7b5c43 commit f90e86b
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 16 deletions.
3 changes: 2 additions & 1 deletion bar/impl/bar.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ void bar(stList *flowers, CactusParams *params, CactusDisk *cactusDisk, stList *
// Note that poa uses about N^2 memory, so maximum value is generally in 10s of kb
int64_t poaWindow = cactusParams_get_int(params, 3, "bar", "poa", "partialOrderAlignmentWindow");
int64_t maskFilter = cactusParams_get_int(params, 3, "bar", "poa", "partialOrderAlignmentMaskFilter");
int64_t poaMaxProgRows = cactusParams_get_int(params, 3, "bar", "poa", "partialOrderAlignmentProgressiveMaxRows");
abpoa_para_t *poaParameters = usePoa ? abpoaParamaters_constructFromCactusParams(params) : NULL;

//////////////////////////////////////////////
Expand Down Expand Up @@ -101,7 +102,7 @@ void bar(stList *flowers, CactusParams *params, CactusDisk *cactusDisk, stList *
*
* It does not use any precomputed alignments, if they are provided they will be ignored
*/
alignments = make_flower_alignment_poa(flower, maximumLength, poaWindow, maskFilter, poaParameters);
alignments = make_flower_alignment_poa(flower, maximumLength, poaWindow, maskFilter, poaMaxProgRows, poaParameters);
st_logDebug("Created the poa alignments: %" PRIi64 " poa alignment blocks for flower\n", stList_length(alignments));
} else {
alignments = makeFlowerAlignment3(sM, flower, listOfEndAlignmentFiles, spanningTrees, maximumLength,
Expand Down
15 changes: 9 additions & 6 deletions bar/impl/poaBarAligner.c
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,7 @@ static void msa_fix_trimmed(Msa* msa) {
}

Msa *msa_make_partial_order_alignment(char **seqs, int *seq_lens, int64_t seq_no, int64_t window_size,
abpoa_para_t *poa_parameters) {
int64_t max_prog_rows, abpoa_para_t *poa_parameters) {

assert(seq_no > 0);

Expand Down Expand Up @@ -564,6 +564,9 @@ Msa *msa_make_partial_order_alignment(char **seqs, int *seq_lens, int64_t seq_no
// init abpoa
abpoa_t *ab = abpoa_init();
abpoa_para_t *abpt = copy_abpoa_params(poa_parameters);
if (msa->seq_no > max_prog_rows) {
abpt->progressive_poa = 0;
}

#ifdef CACTUS_ABPOA_MSA_DUMP_DIR
// dump the input to file
Expand Down Expand Up @@ -745,7 +748,7 @@ Msa *msa_make_partial_order_alignment(char **seqs, int *seq_lens, int64_t seq_no

Msa **make_consistent_partial_order_alignments(int64_t end_no, int64_t *end_lengths, char ***end_strings,
int **end_string_lengths, int64_t **right_end_indexes, int64_t **right_end_row_indexes, int64_t **overlaps,
int64_t window_size, abpoa_para_t *poa_parameters) {
int64_t window_size, int64_t max_prog_rows, abpoa_para_t *poa_parameters) {
// Calculate the initial, potentially inconsistent msas and column scores for each msa
float *column_scores[end_no];
Msa **msas = st_malloc(sizeof(Msa *) * end_no);
Expand All @@ -754,7 +757,7 @@ Msa **make_consistent_partial_order_alignments(int64_t end_no, int64_t *end_leng
//#endif
for(int64_t i=0; i<end_no; i++) {
msas[i] = msa_make_partial_order_alignment(end_strings[i], end_string_lengths[i], end_lengths[i], window_size,
poa_parameters);
max_prog_rows, poa_parameters);
column_scores[i] = make_column_scores(msas[i]);
}

Expand Down Expand Up @@ -1094,7 +1097,7 @@ int64_t getMaxSequenceLength(End *end) {
}

stList *make_flower_alignment_poa(Flower *flower, int64_t max_seq_length, int64_t window_size, int64_t mask_filter,
abpoa_para_t * poa_parameters) {
int64_t max_prog_rows, abpoa_para_t * poa_parameters) {
End *dominantEnd = getDominantEnd(flower);
int64_t seq_no = dominantEnd != NULL ? end_getInstanceNumber(dominantEnd) : -1;
if(dominantEnd != NULL && getMaxSequenceLength(dominantEnd) < max_seq_length) {
Expand All @@ -1110,7 +1113,7 @@ stList *make_flower_alignment_poa(Flower *flower, int64_t max_seq_length, int64_
Cap *indices_to_caps[seq_no];

get_end_sequences(dominantEnd, end_strings, end_string_lengths, overlaps, indices_to_caps, max_seq_length, mask_filter);
Msa *msa = msa_make_partial_order_alignment(end_strings, end_string_lengths, seq_no, window_size, poa_parameters);
Msa *msa = msa_make_partial_order_alignment(end_strings, end_string_lengths, seq_no, window_size, max_prog_rows, poa_parameters);

//Now convert to set of alignment blocks
stList *alignment_blocks = stList_construct3(0, (void (*)(void *))alignmentBlock_destruct);
Expand Down Expand Up @@ -1184,7 +1187,7 @@ stList *make_flower_alignment_poa(Flower *flower, int64_t max_seq_length, int64_
// Now make the consistent MSAs
Msa **msas = make_consistent_partial_order_alignments(end_no, end_lengths, end_strings, end_string_lengths,
right_end_indexes, right_end_row_indexes, overlaps, window_size,
poa_parameters);
max_prog_rows, poa_parameters);

// Temp debug output
//for(int64_t i=0; i<end_no; i++) {
Expand Down
11 changes: 7 additions & 4 deletions bar/inc/poaBarAligner.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,15 @@ void msa_print(Msa *msa, FILE *f);
* @param seq_lens An array giving the string lengths
* @param seq_no The number of strings
* @param window_size Sliding window size which limits length of poa sub-alignments. Memory usage is quardatic in this.
* @param max_prog_rows Disable abpoas progressive alignment if there are more than this many rows (avoid quadratic dist mat blowup)
* @param poa_parameters abpoa parameters
* @return An msa of the strings.
*/
Msa *msa_make_partial_order_alignment(char **seqs,
int *seq_lens,
int64_t seq_no,
int64_t window_size,
int64_t max_prog_rows,
abpoa_para_t *poa_parameters);

/**
Expand All @@ -96,12 +98,13 @@ Msa *msa_make_partial_order_alignment(char **seqs,
* @param right_end_row_indexes For each string, the index of the row of its reverse complement
* @param overlaps For each prefix string, the length of the overlap with its reverse complement adjacency
* @param window_size Sliding window size which limits length of poa sub-alignments. Memory usage is quardatic in this.
* @param max_prog_rows Disable abpoas progressive alignment if there are more than this many rows (avoid quadratic dist mat blowup)
* @param poa_parameters abpoa parameters
* @return A consistent Msa for each end
*/
Msa **make_consistent_partial_order_alignments(int64_t end_no, int64_t *end_lengths, char ***end_strings,
int **end_string_lengths, int64_t **right_end_indexes, int64_t **right_end_row_indexes, int64_t **overlaps,
int64_t window_size, abpoa_para_t *poa_parameters);
int64_t window_size, int64_t max_prog_rows, abpoa_para_t *poa_parameters);

/**
* Represents a gapless alignment of a set of sequences.
Expand Down Expand Up @@ -136,14 +139,14 @@ char *get_adjacency_string(Cap *cap, int *length, bool return_string);
* to attempt to align.
* @param window_size Sliding window size which limits length of poa sub-alignments. Memory usage is quardatic in this.
* @param mask_filter Trim input sequences if encountering this many consecutive soft of hard masked bases (0 = disabled)
* @param poa_band_constant abpoa "b" parameter, where adaptive band is b+f*<length> (b < 0 = disabled)
* @param poa_band_fraction abpoa "f" parameter, where adaptive band is b+f*<length> (b < 0 = disabled)
* Returns a list of AlignmentBlock ojects
* @param max_prog_rows Disable abpoas progressive alignment if there are more than this many rows (avoid quadratic dist mat blowup)
* @param poa_parameters abpoa parameters
*/
stList *make_flower_alignment_poa(Flower *flower,
int64_t max_seq_length,
int64_t window_size,
int64_t mask_filter,
int64_t max_prog_rows,
abpoa_para_t * poa_parameters);

/**
Expand Down
8 changes: 4 additions & 4 deletions bar/tests/poaBarTest.c
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ void test_make_partial_order_alignment(CuTest *testCase) {
}

// generate the alignment
Msa *msa = msa_make_partial_order_alignment(seqs, seq_lens, seq_no, poa_window_size, abpt);
Msa *msa = msa_make_partial_order_alignment(seqs, seq_lens, seq_no, poa_window_size, 1000, abpt);

// print the msa
#ifdef stderr_logging
Expand Down Expand Up @@ -145,7 +145,7 @@ void test_make_consistent_partial_order_alignments_two_ends(CuTest *testCase) {

// generate the alignments
Msa **msas = make_consistent_partial_order_alignments(end_no, end_lengths, end_strings, end_string_lengths,
right_end_indexes, right_end_row_indexes, overlaps, 1000000, abpt);
right_end_indexes, right_end_row_indexes, overlaps, 1000000, 100, abpt);

// print the msas
#ifdef stderr_logging
Expand Down Expand Up @@ -212,7 +212,7 @@ void test_make_flower_alignment_poa(CuTest *testCase) {
}
flower_destructEndIterator(endIterator);

stList *alignment_blocks = make_flower_alignment_poa(flower, 2, 1000000, 5, abpt);
stList *alignment_blocks = make_flower_alignment_poa(flower, 2, 1000000, 5, 1000, abpt);

for(int64_t i=0; i<stList_length(alignment_blocks); i++) {
AlignmentBlock *b = stList_get(alignment_blocks, i);
Expand All @@ -233,7 +233,7 @@ void test_alignment_block_iterator(CuTest *testCase) {
abpt->wf = 0.01;
abpoa_post_set_para(abpt);

stList *alignment_blocks = make_flower_alignment_poa(flower, 10000, 1000000, 5, abpt);
stList *alignment_blocks = make_flower_alignment_poa(flower, 10000, 1000000, 5, 50, abpt);

abpoa_free_para(abpt);
#ifdef stderr_logging
Expand Down
4 changes: 3 additions & 1 deletion src/cactus/cactus_progressive_config.xml
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,8 @@
<!-- partialOrderAlignmentMinimizerK abpoa kmer size for minimizer seeding. -->
<!-- partialOrderAlignmentMinimizerW abpoa window size for minimizer seeding. -->
<!-- partialOrderAlignmentMinimizerMinW abpoa minimum window size. -->
<!-- partialOrderAlignmentProgressiveMode= use guide tree from jaccard distance matrix to determine poa order -->
<!-- partialOrderAlignmentProgressiveMode use guide tree from jaccard distance matrix to determine poa order -->
<!-- partialOrderAlignmentProgressiveMaxRows disable progressive mode if there are more than this many rows to align -->
<poa
partialOrderAlignmentWindow="10000"
partialOrderAlignmentMaskFilter="-1"
Expand All @@ -264,6 +265,7 @@
partialOrderAlignmentMinimizerW="10"
partialOrderAlignmentMinimizerMinW="500"
partialOrderAlignmentProgressiveMode="1"
partialOrderAlignmentProgressiveMaxRows="5000"
/>
</bar>

Expand Down

0 comments on commit f90e86b

Please sign in to comment.