Skip to content

Commit

Permalink
fix up tests
Browse files Browse the repository at this point in the history
  • Loading branch information
glennhickey committed Nov 4, 2022
1 parent d81d73c commit d0ad829
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 17 deletions.
31 changes: 21 additions & 10 deletions hal2vg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ using namespace handlegraph;

static void initParser(CLParser* optionsParser) {
optionsParser->addArgument("halFile", "input hal file");
optionsParser->addOption("refGenome",
"if specifed, treat this genome as a reference path and all others as haplotype paths (by default all are generic)",
optionsParser->addOption("refGenomes",
"comma-separated (no spaces) genomes to treat as reference paths with all others as haplotype paths (default=all genomes)",
"\"\"");
optionsParser->addOption("rootGenome",
"process only genomes in clade with specified root"
Expand Down Expand Up @@ -87,7 +87,7 @@ static void pinch_to_handle(const Genome* genome,
const unordered_map<string, int64_t>& nameToID,
unordered_map<stPinchBlock*, nid_t>& blockToNode,
MutablePathMutableHandleGraph& graph,
const string& refGenomeName);
const vector<string>& refNames);

static void chop_graph(MutablePathMutableHandleGraph& graph, size_t maxNodeLength);

Expand All @@ -99,7 +99,7 @@ int main(int argc, char** argv) {
CLParser optionsParser;
initParser(&optionsParser);
string halPath;
string refGenomeName;
string refGenomes;
string rootGenomeName;
string targetGenomes;
bool noAncestors;
Expand All @@ -110,7 +110,7 @@ int main(int argc, char** argv) {
try {
optionsParser.parseOptions(argc, argv);
halPath = optionsParser.getArgument<string>("halFile");
refGenomeName = optionsParser.getOption<string>("refGenome");
refGenomes = optionsParser.getOption<string>("refGenomes");
rootGenomeName = optionsParser.getOption<string>("rootGenome");
targetGenomes = optionsParser.getOption<string>("targetGenomes");
noAncestors = optionsParser.getFlag("noAncestors");
Expand Down Expand Up @@ -138,6 +138,12 @@ int main(int argc, char** argv) {
throw hal_exception("input hal alignmenet is empty");
}

vector<string> refNames;
if (refGenomes != "\"\"") {
refNames = chopString(refGenomes, ",");
std::sort(refNames.begin(), refNames.end());
}

// default to alignment root if none specified
bool givenRoot = true;
if (rootGenomeName == "\"\"") {
Expand Down Expand Up @@ -344,7 +350,7 @@ int main(int argc, char** argv) {
cerr << "converting " << genomeName << " with " << genome->getNumSequences()
<< " sequences and total length " << genome->getSequenceLength() << endl;
}
pinch_to_handle(genome, threadSet, IDToName, nameToID, blockToNode, *graph, refGenomeName);
pinch_to_handle(genome, threadSet, IDToName, nameToID, blockToNode, *graph, refNames);

alignment->closeGenome(genome);
}
Expand Down Expand Up @@ -628,12 +634,15 @@ void pinch_to_handle(const Genome* genome,
const unordered_map<string, int64_t>& nameToID,
unordered_map<stPinchBlock*, nid_t>& blockToNode,
MutablePathMutableHandleGraph& graph,
const string& refGenomeName) {
const vector<string>& refNames) {

// iterate over the sequences of the genome
for (SequenceIteratorPtr seqIt = genome->getSequenceIterator(); not seqIt->atEnd(); seqIt->toNext()) {
const Sequence *sequence = seqIt->getSequence();
PathSense sense = refGenomeName.empty() ? PathSense::GENERIC : genome->getName() == refGenomeName ? PathSense::REFERENCE : PathSense::HAPLOTYPE;
PathSense sense = PathSense::REFERENCE;
if (!refNames.empty() && !std::binary_search(refNames.begin(), refNames.end(), genome->getName())) {
sense = PathSense::HAPLOTYPE;
}
int64_t seqID = nameToID.find(sequence->getFullName())->second;
stPinchThread* thread = stPinchThreadSet_getThread(threadSet, seqID);

Expand All @@ -644,7 +653,9 @@ void pinch_to_handle(const Genome* genome,
subrange_t subpath = resolve_subpath_naming(parsed_name);
string parsed_genome_name = genome->getName();
size_t haplotype = resolve_haplotype_naming(parsed_genome_name);

if (sense == PathSense::HAPLOTYPE && haplotype == PathMetadata::NO_HAPLOTYPE) {
haplotype = 0;
}
// create the path
path_handle_t pathHandle = graph.create_path(sense,
parsed_genome_name,
Expand Down Expand Up @@ -813,7 +824,7 @@ subrange_t resolve_subpath_naming(string& path_name) {
}

size_t resolve_haplotype_naming(string& genome_name) {
size_t haplotype = 0;
size_t haplotype = PathMetadata::NO_HAPLOTYPE;
size_t dp = genome_name.rfind(".");
if (dp != string::npos) {
try {
Expand Down
6 changes: 3 additions & 3 deletions tests/small/truth.json
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@
"rank": "6"
}
],
"name": "cat.3"
"name": "cat#3"
},
{
"mapping": [
Expand Down Expand Up @@ -264,7 +264,7 @@
"rank": "7"
}
],
"name": "chimp.2"
"name": "chimp#2"
},
{
"mapping": [
Expand Down Expand Up @@ -368,7 +368,7 @@
"rank": "8"
}
],
"name": "human.1"
"name": "human#1"
}
]
}
8 changes: 4 additions & 4 deletions tests/t/merge.t
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ hal2vg small2.hal | vg mod -O - | vg ids -s - > small2.vg
hal2vg merged1.hal | vg mod -O - | vg ids -s - > merged1.vg
vg view small.vg | sort > small.gfa
vg view small2.vg | sort > small2.gfa
vg find -x merged1.vg -p cat.3:1 -c 1000 | vg ids -s - | vg view - | sort | sed -e 's/_0//g' | sed -e 's/_1//g' > merged1.comp1.gfa
vg find -x merged1.vg -p cow.3:1 -c 1000 | vg ids -s - | vg view - | sort | sed -e 's/_0//g' | sed -e 's/_1//g' > merged1.comp2.gfa
vg find -x merged1.vg -p cat#3:1 -c 1000 | vg ids -s - | vg view - | sort | sed -e 's/_0//g' | sed -e 's/_1//g' | sed -e "s/cat human chimp/human chimp cat/g" > merged1.comp1.gfa
vg find -x merged1.vg -p cow#3:1 -c 1000 | vg ids -s - | vg view - | sort | sed -e 's/_0//g' | sed -e 's/_1//g' | sed -e "s/human chimp cow/cow human chimp/g" > merged1.comp2.gfa
diff small.gfa merged1.comp1.gfa
is $? 0 "First component of merged graph identical to first input graph"
diff small2.gfa merged1.comp2.gfa
Expand Down Expand Up @@ -60,8 +60,8 @@ hal2vg small2.hal | vg mod -O - | vg ids -s - > small2.vg
hal2vg merged1.hal | vg mod -O - | vg ids -s - > merged1.vg
vg view small.vg | sort > small.gfa
vg view small2.vg | sort > small2.gfa
vg find -x merged1.vg -p cat.3:1 -c 1000 | vg ids -s - | vg view - | sort | sed -e 's/_0//g' | sed -e 's/_1//g' > merged1.comp1.gfa
vg find -x merged1.vg -p cow.3:1 -c 1000 | vg ids -s - | vg view - | sort | sed -e 's/_0//g' | sed -e 's/_1//g' > merged1.comp2.gfa
vg find -x merged1.vg -p cat#3:1 -c 1000 | vg ids -s - | vg view - | sort | sed -e 's/_0//g' | sed -e 's/_1//g' | sed -e "s/cat human chimp/human chimp cat/g" > merged1.comp1.gfa
vg find -x merged1.vg -p cow#3:1 -c 1000 | vg ids -s - | vg view - | sort | sed -e 's/_0//g' | sed -e 's/_1//g' | sed -e "s/human chimp cow/cow human chimp/g" > merged1.comp2.gfa
diff small.gfa merged1.comp1.gfa
is $? 0 "First component of merged graph identical to first input graph"
diff small2.gfa merged1.comp2.gfa
Expand Down

0 comments on commit d0ad829

Please sign in to comment.