diff --git a/.gitmodules b/.gitmodules index b719f4a..c8f40b2 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,9 +1,12 @@ [submodule "deps/hal"] path = deps/hal - url = https://github.com/glennhickey/hal.git + url = https://github.com/ComparativeGenomicsToolkit/hal.git [submodule "deps/sonLib"] path = deps/sonLib - url = https://github.com/benedictpaten/sonLib.git + url = https://github.com/ComparativeGenomicsToolkit/sonLib.git [submodule "deps/hal2sg"] path = deps/hal2sg url = https://github.com/glennhickey/hal2sg.git +[submodule "deps/libbdsg-easy"] + path = deps/libbdsg-easy + url = https://github.com/vgteam/libbdsg-easy.git diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..bdaa04c --- /dev/null +++ b/.travis.yml @@ -0,0 +1,31 @@ +# Control file for continuous integration testing at http://travis-ci.org/ + +language: cpp +compiler: gcc + +before_install: + - git submodule update --init --recursive + - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libomp; fi + - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then sudo apt-get -qq update; fi + - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then sudo apt-get install -y libhdf5-serial-dev python3 python3-pip libpython3-dev wget; fi + - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install hdf5 python3.6 python3-pip || echo "a brew error code when installing gcc is expected"; fi + +install: + - sudo pip3 install setuptools --upgrade + - wget https://github.com/vgteam/vg/releases/download/v1.24.0/vg && chmod u+x vg + +script: + - export PATH=$(pwd):$PATH + - export PATH=$(pwd)/deps/hal/bin:$PATH + - make test + +dist: bionic +osx_image: xcode10.1 + +matrix: + include: + - os: linux + compiler: gcc + #- os: osx + # compiler: clang + diff --git a/Dockerfile b/Dockerfile index 8315e24..f719d2d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,10 +1,10 @@ # creates an image containing vg and hal2vg # build on compatible vg image -FROM quay.io/vgteam/vg:v1.17.0-0-gaa0b37860-t315-build +FROM quay.io/vgteam/vg:v1.24.0 # update system and install dependencies not present in vg image -RUN apt-get -qq update && apt-get -qq install -y libhdf5-serial-dev +RUN apt-get -qq update && apt-get -qq install -y libhdf5-dev build-essential python3-dev python3-pip # copy current directory to docker ADD . /hal2vg @@ -16,4 +16,4 @@ WORKDIR /hal2vg RUN make # add hal2vg to the PATH -ENV PATH /hal2vg:$PATH +ENV PATH /hal2vg:/hal2vg/deps/hal/bin:$PATH diff --git a/Makefile b/Makefile index 9f9218f..7e9f0cd 100644 --- a/Makefile +++ b/Makefile @@ -7,18 +7,19 @@ sidegraphInc = ${sgExportPath}/sidegraph.h ${sgExportPath}/sgcommon.h ${sgExport all : hal2vg cleanFast : - rm -f hal2vg hal2vg.o sg2vgproto.o + rm -f hal2vg hal2vg.o sg2vghandle.o clean : rm -f hal2vg hal2vg.o cd deps/sonLib && make clean cd deps/hal && make clean cd deps/hal2sg && make clean + cd deps/libbdsg-easy && make clean -sg2vgproto.o : sg2vgproto.cpp sg2vgproto.h ${sidegraphInc} ${basicLibsDependencies} - ${cpp} ${cppflags} -I . sg2vgproto.cpp -c +sg2vghandle.o : sg2vghandle.cpp sg2vghandle.h ${sidegraphInc} ${basicLibsDependencies} + ${cpp} ${cppflags} -I . sg2vghandle.cpp -c -hal2vg.o : hal2vg.cpp sg2vgproto.h ${sidegraphInc} ${basicLibsDependencies} +hal2vg.o : hal2vg.cpp sg2vghandle.h ${sidegraphInc} ${basicLibsDependencies} ${cpp} ${cppflags} -I . hal2vg.cpp -c ${sonLibPath}/sonLib.a : @@ -30,9 +31,20 @@ ${halPath}/halLib.a : ${sonLibPath}/sonLib.a ${hal2sgPath}/libhal2sg.a : ${halPath}/halLib.a cd deps/hal2sg && make -hal2vg : hal2vg.o sg2vgproto.o ${basicLibsDependencies} +${libbdsgPath}/lib/libbdsg.a : + cd deps/libbdsg-easy && make + +${libbdsgPath}/lib/lib/libhandlegraph.a : ${libbdsgPath}/lib/libbdsg.a + +${libbdsgPath}/lib/lib/libsdsl.a : ${libbdsgPath}/lib/libbdsg.a + +${libbdsgPath}/lib/lib/libdivsufsort.a : ${libbdsgPath}/lib/libbdsg.a + +${libbdsgPath}/lib/lib/libdivsufsort64.a : ${libbdsgPath}/lib/libbdsg.a + +hal2vg : hal2vg.o sg2vghandle.o ${basicLibsDependencies} cd deps/hal2sg && make - ${cpp} ${cppflags} -pthread hal2vg.o sg2vgproto.o ${basicLibs} -o hal2vg + ${cpp} ${cppflags} -fopenmp -pthread hal2vg.o sg2vghandle.o ${basicLibs} -o hal2vg test : hal2vg - cd tests && VGDIR=${PWD}/${VGDIR} prove -v small.t + cd tests && prove -v small.t diff --git a/README.md b/README.md index 535dad9..beea133 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # hal2vg -Prototype code for converting [HAL](https://github.com/glennhickey/hal) to [vg](https://github.com/vgteam/vg). +[![Build Status](https://travis-ci.org/ComparativeGenomicsToolkit/hal2vg.svg?branch=master)](https://travis-ci.org/ComparativeGenomicsToolkit/hal2vg) -(c) 2016 Glenn Hickey. See [LICENSE](https://github.com/glennhickey/hal2vg/blob/master/LICENSE) for details. +Prototype code for converting [HAL](https://github.com/glennhickey/hal) to [vg](https://github.com/vgteam/vg). See also: * [hal2sg](https://github.com/glennhickey/hal2sg): Convert [HAL](https://github.com/glennhickey/hal) (output by [Cactus](https://github.com/glennhickey/progressiveCactus) and [CAMEL](https://github.com/adamnovak/sequence-graphs)) to [Side Graph SQL](https://github.com/ga4gh/schemas/wiki/Human-Genome-Variation-Reference-(HGVR)-Pilot-Project#graph-format) @@ -19,20 +19,29 @@ This tool is a composition of `hal2sg` and `sg2vg`. It converts HAL into an in- ## Installing Dependencies -#### vg +#### HDF5 1.10.1 with C++ API enabled -* [vg](https://github.com/vgteam/vg) must be downloaded and built before hal2vg -* Edit hal2vg/include.mk and make sure that VGDIR points to the correct vg directory +* Using apt (Ubuntu 18.04) -#### HDF5 1.10.1 with C++ API enabled + `sudo apt install libhdf5-dev` + +* Using [MacPorts](http://www.macports.org/): -* Use build from [Progressive Cactus](https://github.com/glennhickey/progressiveCactus) by downloading and building Progressive Cactus *then* running `. environment` in the progressive cactus directory before building hal2vg. + `sudo port install hdf5 @1.10.1 +cxx` -* Or Local install from source into DIR (do not need root password) +* From [Source](http://www.hdfgroup.org/ftp/HDF5/releases/hdf5-1.10/hdf5-1.10.1/src/): - `mkdir DIR/hdf5` `wget http://www.hdfgroup.org/ftp/HDF5/releases/hdf5-1.10/hdf5-1.10.1/src/hdf5-1.10.1.tar.gz` - `tar xzf hdf5-1.10.1.tar.gz` + `tar xzf hdf5-1.10.1.tar.gz` + `cd hdf5-1.10.1` + `./configure --enable-cxx` + `make && make install` + +* Local install from source into DIR (do not need root password) + + `mkdir DIR/hdf5` + `wget http://www.hdfgroup.org/ftp/HDF5/releases/hdf5-1.10/hdf5-1.10.1/src/hdf5-1.10.1.tar.gz` + `tar xzf hdf5-1.10.1.tar.gz` `cd hdf5-1.10.1` `./configure --enable-cxx --prefix DIR/hdf5` `make && make install` @@ -41,18 +50,16 @@ This tool is a composition of `hal2sg` and `sg2vg`. It converts HAL into an in- `export PATH=DIR/hdf5/bin:${PATH}` `export h5prefix=-prefix=DIR/hdf5` + + or set these in include.local.mk. -* Or From [Source](http://www.hdfgroup.org/ftp/HDF5/releases/hdf5-1.10/hdf5-1.10.1/src/): - - `wget http://www.hdfgroup.org/ftp/HDF5/releases/hdf5-1.10/hdf5-1.10.1/src/hdf5-1.10.1.tar.gz` - `tar xzf hdf5-1.10.1.tar.gz` - `cd hdf5-1.10.1` - `./configure --enable-cxx` - `make && make install` - -* Or Using [MacPorts](http://www.macports.org/): - - sudo port install hdf5 @1.10.1 +cxx + If you are using older version of HDF5, such as installed on Centos, + you may need to set + + `export CXX_ABI_DEF='-D_GLIBCXX_USE_CXX11_ABI=1' + + If you get undefined functions base on string type with errors about + `std::__cxx11::basic_string` vs `std::basic_string`. ## Instructions @@ -60,20 +67,18 @@ This tool is a composition of `hal2sg` and `sg2vg`. It converts HAL into an in- git clone https://github.com/glennhickey/hal2vg.git --recursive -**Setting your VG path:** - -* Compile `vg` with `make static` -* Edit `include.mk` so that `VGDIR` points to where you've built [vg](https://github.com/vgteam/vg). By default it will be `../vg` -* Change `LIBPROTOBUF=$(VGLIBDIR)/libprotobuf.a` to the system library that was used to build vg. For example: `LIBPROTOBUF=/usr/lib/x86_64-linux-gnu/libprotobuf.a` in `include.mk`. You can find it on Ubuntu with `dpkg -L libprotobuf-dev` - **Compiling:** make To run the converter: - hal2vg input.hal > output.vg + hal2vg input.hal > output.pg To see all the options, run with no args or use `--help`. -Note: The output vg may have nodes with sequence length up to 1MB, and will need to be chopped (ex `vg mod -X 32`) before indexing with `vg index`. +Note: The output graph may have nodes with sequence length up to 1MB, and will need to be chopped (ex `vg mod -X 32`) before indexing with `vg index`. + +Note: The output graph is only readable by vg version 1.24.0 and greater. + +(c) 2016 Glenn Hickey. See [LICENSE](https://github.com/glennhickey/hal2vg/blob/master/LICENSE) for details. diff --git a/deps/libbdsg-easy b/deps/libbdsg-easy new file mode 160000 index 0000000..537567a --- /dev/null +++ b/deps/libbdsg-easy @@ -0,0 +1 @@ +Subproject commit 537567a7461c84964ba04d426487b2efd5a44761 diff --git a/hal2vg.cpp b/hal2vg.cpp index 205e8f1..a8d7101 100644 --- a/hal2vg.cpp +++ b/hal2vg.cpp @@ -15,11 +15,15 @@ #include "sgbuilder.h" #include "side2seq.h" -#include "sg2vgproto.h" -#include "vg.pb.h" +#include "sg2vghandle.h" +#include "bdsg/packed_graph.hpp" +#include "bdsg/hash_graph.hpp" +#include "bdsg/odgi.hpp" using namespace std; using namespace hal; +using namespace handlegraph; +using namespace bdsg; static bool isCamelHal(AlignmentConstPtr aligment); static void breadthFirstGenomeSearch(const Genome* reference, @@ -64,15 +68,15 @@ static void initParser(CLParser* optionsParser) optionsParser->addOptionFlag("keepCase", "don't convert all nucleotides to upper case", false); - optionsParser->addOption("protoChunk", - "maximum size (approx) of output protobuf chunks (bytes)", - 30000000); optionsParser->addOption("refSequenceFile", "white-space delimited list of sequence names in the " "reference genome which will *not* be collapsed by duplications." " Overrides --refDupes", "\"\""); + optionsParser->addOption("outputFormat", + "output graph format in {pg, hg, odgi} [default=pg]", + "pg"); - optionsParser->setDescription("Convert HAL alignment to vg protobuf"); + optionsParser->setDescription("Convert HAL alignment to handle graph"); } @@ -93,8 +97,8 @@ int main(int argc, char** argv) // larger graphs. So we only use to make sure we don't overflow protobuf. // Todo: tune down? const int chop = 1000000; - int protoChunk; string refSequenceFile; + string outputFormat; try { optionsParser.parseOptions(argc, argv); @@ -106,22 +110,21 @@ int main(int argc, char** argv) refDupes = optionsParser.getFlag("refDupes"); onlySequenceNames = optionsParser.getFlag("onlySequenceNames"); keepCase = optionsParser.getFlag("keepCase"); - protoChunk = optionsParser.getOption("protoChunk"); + outputFormat = optionsParser.getOption("outputFormat"); refSequenceFile = optionsParser.getOption("refSequenceFile"); if (rootGenomeName != "\"\"" && targetGenomes != "\"\"") { throw hal_exception("--rootGenome and --targetGenomes options are " "mutually exclusive"); } - if (protoChunk > 60000000) - { - cerr << "Warning: --protoChunk parameter set dangerously high." << endl; - } if (refSequenceFile != "\"\"" && refGenomeName == "\"\"") { throw hal_exception("--refSequenceFile must be used in conjunction " " with --refGenome"); } + if (outputFormat != "pg" && outputFormat != "hg" && outputFormat != "odgi") { + throw hal_exception("--outputFormat must be one of {pg, hg, odgi}"); + } } catch(exception& e) { @@ -308,21 +311,26 @@ int main(int argc, char** argv) const vector& outPaths = converter.getOutPaths(); - // write to vg proto - cerr << "Writing VG protobuf to stdout" << endl; - SG2VGProto vgWriter; - vgWriter.init(&cout); - //vgWriter.writeGraph(outGraph, outBases, outPaths); - - // chunking parameters designed to keep well under protobuf limit - int nodeCount = max(1UL, protoChunk / (sizeof(vg::Node) + chop)); - int edgeCount = max(1UL, protoChunk / sizeof(vg::Edge)); - // very conservative here assuming avg path size of 1 - int segmentCount = max(1UL, protoChunk / - (sizeof(vg::Mapping) + sizeof(vg::Path))); + // convert to vg handle + cerr << "Converting SideGraph to HandleGraph" << endl; - vgWriter.writeChunkedGraph(outGraph, outBases, outPaths, - nodeCount, edgeCount, segmentCount); + unique_ptr graph; + if (outputFormat == "pg") { + graph = unique_ptr(new PackedGraph()); + } else if (outputFormat == "hg") { + graph = unique_ptr(new HashGraph()); + } else if (outputFormat == "odgi") { + graph = unique_ptr(new ODGI()); + } else { + assert(false); + } + + SG2VGHandle vgConverter; + vgConverter.convert(outGraph, outBases, outPaths, graph.get()); + + // write tot stdout + cerr << "Writing HandleGraph to stdout" << endl; + dynamic_cast(graph.get())->serialize(cout); //cout << *sgbuild.getSideGraph() << endl; diff --git a/include.mk b/include.mk index fe94187..85463e3 100644 --- a/include.mk +++ b/include.mk @@ -1,17 +1,6 @@ binPath=${rootPath} libPath=${rootPath} -#IMPORTANT: must change this to where you built vg -VGDIR=../vg -# Since we're writing protobuf directly for now (to avoid making a whole-graph in memory index), -# we only link against the bare minimum to write the proto objects. -VGLIBDIR=$(VGDIR)/lib -LIBPROTOBUF=$(VGLIBDIR)/libprotobuf.a -LIBVG=$(VGLIBDIR)/libvg.a -LIBHTS=$(VGLIBDIR)/libhts.a -LIBDEFLATE=$(VGLIBDIR)/libdeflate.a -VGLIBS=$(LIBVG) $(VGLIBDIR)/libvgio.a $(LIBPROTOBUF) $(LIBHTS) $(LIBDEFLATE) -llzma -lbz2 - sonLibRootPath=deps/sonLib sonLibPath=${sonLibRootPath}/lib @@ -24,12 +13,13 @@ hal2sgPath=${rootPath}/deps/hal2sg sg2vgPath=${rootPath}/deps/sg2vg rapidJsonPath=${sg2vgPath}/rapidjson sgExportPath=${hal2sgPath}/sgExport +libbdsgPath=${rootPath}/deps/libbdsg-easy include ${sonLibRootPath}/include.mk cflags += -I ${sonLibPath} -I ${halPath} -I ${halIncPath} -I ${halLIIncPath} -I ${sgExportPath} -I ${hal2sgPath} -cppflags += -std=c++11 -I ${sonLibPath} -I ${halPath} -I ${halIncPath} -I ${halLIIncPath} -I ${sgExportPath} -I ${hal2sgPath} -I ${VGDIR}/include -I ${VGDIR}/include/vg -I ${VGDIR}/include/vg/io -I ${VGDIR}/src -UNDEBUG -basicLibs = ${hal2sgPath}/libhal2sg.a ${sgExportPath}/sgExport.a ${halPath}/libHalLiftover.a ${halPath}/libHal.a ${VGLIBS} ${sonLibPath}/sonLib.a ${sonLibPath}/cuTest.a +cppflags += -std=c++11 -I ${sonLibPath} -I ${halPath} -I ${halIncPath} -I ${halLIIncPath} -I ${sgExportPath} -I ${hal2sgPath} -I ${libbdsgPath}/include -UNDEBUG +basicLibs = ${hal2sgPath}/libhal2sg.a ${sgExportPath}/sgExport.a ${halPath}/libHalLiftover.a ${halPath}/libHal.a ${VGLIBS} ${sonLibPath}/sonLib.a ${sonLibPath}/cuTest.a ${libbdsgPath}/lib/libbdsg.a ${libbdsgPath}/lib/libhandlegraph.a ${libbdsgPath}/lib/libsdsl.a ${libbdsgPath}/lib/libdivsufsort.a ${libbdsgPath}/lib/libdivsufsort64.a basicLibsDependencies = ${basicLibs} # hdf5 compilation is done through its wrappers. diff --git a/sg2vghandle.cpp b/sg2vghandle.cpp new file mode 100644 index 0000000..af6f8a8 --- /dev/null +++ b/sg2vghandle.cpp @@ -0,0 +1,111 @@ +/* + * Copyright (C) 2015 by Glenn Hickey (hickey@soe.ucsc.edu) + * + * Released under the MIT license, see LICENSE.cactus + */ + +#include +#include +#include + +#include "sg2vghandle.h" + +using namespace std; +using namespace handlegraph; + +SG2VGHandle::SG2VGHandle() : _graph(0), _sg(0), _bases(0), _paths(0) +{ +} + +SG2VGHandle::~SG2VGHandle() +{ + +} + +void SG2VGHandle::convert(const SideGraph* sg, + const vector& bases, + const vector& paths, + MutablePathMutableHandleGraph* graph) +{ + _graph = graph; + _sg = sg; + _bases = &bases; + _paths = &paths; + + // add every node to handle doc + for (int i = 0; i < _sg->getNumSequences(); ++i) + { + addNode(_sg->getSequence(i)); + } + + // add every edge to handle doc + const SideGraph::JoinSet* joinSet = _sg->getJoinSet(); + for (SideGraph::JoinSet::const_iterator i = joinSet->begin(); + i != joinSet->end(); ++i) + { + addEdge(*i); + } + + // add every path to handle doc + for (int i = 0; i < paths.size(); ++i) + { + addPath(paths[i].first, paths[i].second); + } +} + + +void SG2VGHandle::addNode(const SGSequence* seq) +{ + _graph->create_handle(_bases->at(seq->getID()), + // node id's are 1-based in VG! + seq->getID() + 1); +} + +void SG2VGHandle::addEdge(const SGJoin* join) +{ + handle_t node1 = _graph->get_handle(join->getSide1().getBase().getSeqID() + 1, + join->getSide1().getForward() == true); + handle_t node2 = _graph->get_handle(join->getSide2().getBase().getSeqID() + 1, + join->getSide2().getForward() == false); + _graph->create_edge(node1, node2); +} + +void SG2VGHandle::addPath(const string& name, const vector& path, + int rank) +{ + path_handle_t path_handle = _graph->create_path_handle(name); + + int inputPathLength = 0; + int outputPathLength = 0; + for (int i = 0; i < path.size(); ++i) + { + sg_int_t sgSeqID = path[i].getSide().getBase().getSeqID(); + + if (path[i].getLength() != _sg->getSequence(sgSeqID)->getLength()) + { + stringstream ss; + ss << "Sanity check fail for Mapping " << i << " of path " << name + << ": Segment size " << path[i].getLength() << " does not span " + << "all of node " << (sgSeqID + 1) << " which has length " + << _sg->getSequence(sgSeqID)->getLength(); + throw runtime_error(ss.str()); + } + inputPathLength += path[i].getLength(); + + handle_t handle = _graph->get_handle(sgSeqID + 1, + !path[i].getSide().getForward()); + step_handle_t step_handle = _graph->append_step(path_handle, handle); + + size_t step_len = _sg->getSequence(sgSeqID)->getLength(); + assert(step_len == _graph->get_length(handle)); + + outputPathLength += _sg->getSequence(sgSeqID)->getLength(); + } + if (inputPathLength != outputPathLength) + { + stringstream ss; + ss << "Sanity check fail for path " << name << ": input length (" + << inputPathLength << ") != output length (" << outputPathLength << ")"; + throw runtime_error(ss.str()); + } +} diff --git a/sg2vghandle.h b/sg2vghandle.h new file mode 100644 index 0000000..37bc56f --- /dev/null +++ b/sg2vghandle.h @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2016 by Glenn Hickey (hickey@soe.ucsc.edu) + * + * Released under the MIT license, see LICENSE.cactus + */ + +#ifndef _SG2VGHANDLE_H +#define _SG2VGHANDLE_H + +#include +#include +#include +#include "handlegraph/mutable_path_mutable_handle_graph.hpp" +#include "sidegraph.h" + +/** This class replaces the old sg2vgproto.h which itself replaced + +https://github.com/glennhickey/sg2vg/blob/master/sg2vgjson.h + +changing the JSON/protobuf output to any handle graph implementation in libbdsg + +It will require more memory than the old streaming implementation, but will +write something that's more useful and compatible. +*/ + +class SG2VGHandle +{ +public: + SG2VGHandle(); + ~SG2VGHandle(); + + /** write nodes and edges and paths*/ + void convert(const SideGraph* sg, + const std::vector& bases, + const std::vector& paths, + handlegraph::MutablePathMutableHandleGraph* graph); + +protected: + + handlegraph::MutablePathMutableHandleGraph* _graph; + + // add to handle + void addNode(const SGSequence* seq); + void addEdge(const SGJoin* join); + void addPath(const std::string& name, const std::vector& path, + int rank = 0); + + const SideGraph* _sg; + const std::vector* _bases; + const std::vector > >* _paths; +}; + + +#endif diff --git a/sg2vgproto.cpp b/sg2vgproto.cpp deleted file mode 100644 index 18da30d..0000000 --- a/sg2vgproto.cpp +++ /dev/null @@ -1,250 +0,0 @@ -/* - * Copyright (C) 2015 by Glenn Hickey (hickey@soe.ucsc.edu) - * - * Released under the MIT license, see LICENSE.cactus - */ - -#include -#include -#include - -#include "sg2vgproto.h" -#include "stream.hpp" - -using namespace std; -using namespace vg; - -SG2VGProto::SG2VGProto() : _os(0), _sg(0), _bases(0), _paths(0) -{ -} - -SG2VGProto::~SG2VGProto() -{ - -} - -void SG2VGProto::init(ostream* os) -{ - _os = os; - _graph = Graph(); -} - -void SG2VGProto::writeGraph(const SideGraph* sg, - const vector& bases, - const vector& paths) -{ - _sg = sg; - _bases = &bases; - _paths = &paths; - - // add every node to proto doc - for (int i = 0; i < _sg->getNumSequences(); ++i) - { - addNode(_sg->getSequence(i)); - } - - // add every edge to proto doc - const SideGraph::JoinSet* joinSet = _sg->getJoinSet(); - for (SideGraph::JoinSet::const_iterator i = joinSet->begin(); - i != joinSet->end(); ++i) - { - addEdge(*i); - } - - // add every path to proto doc - for (int i = 0; i < paths.size(); ++i) - { - addPath(paths[i].first, paths[i].second); - } - - function lambda = [this](uint64_t i) -> Graph { - return _graph; - }; - - vg::io::write(*_os, 1, lambda); - vg::io::finish(*_os); -} - -void SG2VGProto::writeChunkedGraph(const SideGraph* sg, - const std::vector& bases, - const std::vector& paths, - int sequencesPerChunk, - int joinsPerChunk, - int pathSegsPerChunk) -{ - _sg = sg; - _bases = &bases; - _paths = &paths; - - // we begin by counting up how many chunks we'll write, using - // the three difference chunking parameters - const SideGraph::JoinSet* joinSet = _sg->getJoinSet(); - int totalPathSegments = 0; - for (int i = 0; i < paths.size(); ++i) - { - totalPathSegments += paths[i].second.size(); - } - int seqChunks = (int)ceil((double)_sg->getNumSequences() / sequencesPerChunk); - int joinChunks = (int)ceil((double)joinSet->size() / joinsPerChunk); - int segmentChunks = (int)ceil((double)totalPathSegments / pathSegsPerChunk); - int totalChunks = seqChunks + joinChunks + segmentChunks; - - // need to keep track of where we're at in the input - int curSequence = 0; - SideGraph::JoinSet::const_iterator curJoin = joinSet->begin(); - int curPath = 0; - int curSegment = 0; - - // fill in the graph and write it to the stream - function lambda = [&](uint64_t i) -> Graph { - init(_os); - - // write a chunk's worth of nodes if we're still in the node chunks - if (i < seqChunks) - { - if (i == 0) - { - cerr << "Writing " << seqChunks << " chunks of up to " - << sequencesPerChunk << " nodes" << endl; - } - for (int count = 0; count < sequencesPerChunk && - curSequence < _sg->getNumSequences(); - ++count, ++curSequence) - { - addNode(_sg->getSequence(curSequence)); - } - } - - // write a chunk's worth of edges if we're still in the edge chunks - else if (i < joinChunks + seqChunks) - { - if (i == seqChunks) - { - cerr << "Writing " << joinChunks << " chunks of up to " - << joinsPerChunk << " edges" << endl; - } - for (int count = 0; count < joinsPerChunk && curJoin != joinSet->end(); - ++count, ++curJoin) - { - addEdge(*curJoin); - } - } - - // write a chunk's worth of path segments if we're done nodes and edges - else - { - assert(i < totalChunks); - if (i == joinChunks + seqChunks) - { - cerr << "Writing " << segmentChunks << " chunks of up to " - << pathSegsPerChunk << " path segments" << endl; - } - - // iterate paths - for (int count = 0; count < pathSegsPerChunk && curPath < paths.size();) - { - const SGNamedPath& path = paths[curPath]; - if (curSegment == 0 && pathSegsPerChunk - count >= path.second.size()) - { - // we're adding entire path - addPath(path.first, path.second); - count += path.second.size(); - curSegment += path.second.size(); - } - else - { - // we need to chop path - while (curSegment < path.second.size() && count < pathSegsPerChunk) - { - int pathChunkSize = min(pathSegsPerChunk, - (int)path.second.size() - curSegment); - vector::const_iterator a = - path.second.begin() + curSegment; - vector::const_iterator b = a + pathChunkSize; - addPath(path.first, vector(a, b), curSegment); - count += pathChunkSize; - curSegment += pathChunkSize; - } - } - // finished writing path, reset to segment zero on next path - if (curSegment >= path.second.size()) - { - curSegment = 0; - ++curPath; - } - } - } - return _graph; - }; - - vg::io::write(*_os, totalChunks, lambda); - vg::io::finish(*_os); -} - - -void SG2VGProto::addNode(const SGSequence* seq) -{ - Node* node = _graph.add_node(); - // node id's are 1-based in VG! - node->set_id(seq->getID() + 1); - node->set_sequence(_bases->at(seq->getID())); -} - -void SG2VGProto::addEdge(const SGJoin* join) -{ - Edge* edge = _graph.add_edge(); - // node id's are 1-based in VG! - edge->set_from(join->getSide1().getBase().getSeqID() + 1); - edge->set_to(join->getSide2().getBase().getSeqID() + 1); - edge->set_from_start(join->getSide1().getForward() == true); - edge->set_to_end(join->getSide2().getForward() == false); -} - -void SG2VGProto::addPath(const string& name, const vector& path, - int rank) -{ - Path* vgPath = _graph.add_path(); - vgPath->set_name(name); - - int inputPathLength = 0; - int outputPathLength = 0; - for (int i = 0; i < path.size(); ++i) - { - sg_int_t sgSeqID = path[i].getSide().getBase().getSeqID(); - - if (path[i].getLength() != _sg->getSequence(sgSeqID)->getLength()) - { - stringstream ss; - ss << "Sanity check fail for Mapping " << i << " of path " << name - << ": Segment size " << path[i].getLength() << " does not span " - << "all of node " << (sgSeqID + 1) << " which has length " - << _sg->getSequence(sgSeqID)->getLength(); - throw runtime_error(ss.str()); - } - inputPathLength += path[i].getLength(); - - Mapping* mapping = vgPath->add_mapping(); - mapping->set_rank(rank + i + 1); - Position* position = mapping->mutable_position(); - // node id's are 1-based in VG! - position->set_node_id(sgSeqID + 1); - - Edit* edit = mapping->add_edit(); - edit->set_from_length(path[i].getLength()); - edit->set_to_length(path[i].getLength()); - - // Offsets are along the strand of the node that is being visited. - // We always use the whole node. - position->set_offset(0); - position->set_is_reverse(!path[i].getSide().getForward()); - - outputPathLength += _sg->getSequence(sgSeqID)->getLength(); - } - if (inputPathLength != outputPathLength) - { - stringstream ss; - ss << "Sanity check fail for path " << name << ": input length (" - << inputPathLength << ") != output length (" << outputPathLength << ")"; - throw runtime_error(ss.str()); - } -} diff --git a/sg2vgproto.h b/sg2vgproto.h deleted file mode 100644 index 50ccade..0000000 --- a/sg2vgproto.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (C) 2016 by Glenn Hickey (hickey@soe.ucsc.edu) - * - * Released under the MIT license, see LICENSE.cactus - */ - -#ifndef _SG2VGPROTO_H -#define _SG2VGPROTO_H - -#include -#include -#include - -#include "vg.pb.h" - -#include "sidegraph.h" - -/** This class replaces - -https://github.com/glennhickey/sg2vg/blob/master/sg2vgjson.h - -changing the JSON output to protobuf. We write protobuf -directly instead of going through the VG class to save memory -and preserve some of the old (naive) chunking logic from sg2vgjson. - -We are writing a SideGraph object, but one that was created with -side2seq -- ie all joins are to ends of sequences, so can be translated -directly to vg sequence graph... -*/ - -class SG2VGProto -{ -public: - SG2VGProto(); - ~SG2VGProto(); - - /** init output stream and proto document */ - void init(std::ostream* os); - - /** write nodes and edges and paths*/ - void writeGraph(const SideGraph* sg, - const std::vector& bases, - const std::vector& paths); - - /** write a graph chunk by chunk. chunks are not subgraphs, they are just - * bags of nodes, then bags of edges, then bags of paths (streamed out in - * that order) */ - void writeChunkedGraph(const SideGraph* sg, - const std::vector& bases, - const std::vector& paths, - int sequencesPerChunk = 5000, - int joinsPerChunk = 100000, - int pathSegsPerChunk = 10000); - -protected: - - vg::Graph _graph; - - // add to proto - void addNode(const SGSequence* seq); - void addEdge(const SGJoin* join); - void addPath(const std::string& name, const std::vector& path, - int rank = 0); - - std::ostream* _os; - const SideGraph* _sg; - const std::vector* _bases; - const std::vector > >* _paths; -}; - - -#endif diff --git a/tests/bash-tap/Changes b/tests/bash-tap/Changes new file mode 100644 index 0000000..ebdf998 --- /dev/null +++ b/tests/bash-tap/Changes @@ -0,0 +1,12 @@ +* bash-tap 1.0.2 (2013-05-19-00:59) + * Make bash-tap-mock work with -e. + Contributed by Daniel Nephin (@dnephin). + +* bash-tap 1.0.1 (2012-07-14-20:59) + * Clearer diagnostics for like/unlike. + * Correct syntax for bash 3.2 regexp matching in like/unlike. + +* bash-tap 1.0.0 (2012-06-20-15:31) + * TAP-compliant testing for bash. + * Function and command mocks for bash. + * In-process output capture helpers for testing. diff --git a/tests/bash-tap/README.mkdn b/tests/bash-tap/README.mkdn new file mode 100644 index 0000000..80b33b5 --- /dev/null +++ b/tests/bash-tap/README.mkdn @@ -0,0 +1,234 @@ +Bash-TAP +======== + +Bash-TAP allows you to perform TAP-compliant tests within bash +using a similar test syntax to Perl's Test::More and Test::Builder, +suitable to run with `prove` or any other TAP-consuming test harness. + +For more information about TAP (the Test Anything Protocol) visit: +http://testanything.org/ + +Installation and Usage +---------------------- + +1. Install the bash-tap files somewhere convenient for you. + The default location of `../../bash-tap` relative to your + test files is the easiest zero-conf way, but you can set + the `$BASH_TAP_ROOT` environment variable if you want to + install elsewhere. +2. If you're writing tests then copy `bash-tap-bootstrap` + into your tests dir and source it inside your tests with: + +```bash +. $(dirname $0)/bash-tap-bootstrap +``` + +3. Run your tests with `prove my_test_dir` or your favourite + TAP-consuming test harness, or run them manually as a + script if you just want to see the raw TAP output. + +Example test file +----------------- + +Here's example test file `01_read_rows_from_key_value_lines.t` from +https://github.com/illusori/bash-snippets + +```bash +#!/bin/bash + +. $(dirname $0)/bash-tap-bootstrap +. $(dirname $0)/../read_rows_from_key_value_lines + +columns_per_row=6 +max_rows_per_rowset=3 +total_rowsets=2 + +plan tests $(((columns_per_row * max_rows_per_rowset * total_rowsets) + total_rowsets)) + +# Test data, resultset 1 +results1="artist Assemblage 23 +track Naked (God Module RMX) +album Addendum +year 2001 +rating 80 +tracktime 5:22 +artist Ayria +track Sapphire +album Debris +year +rating 100 +tracktime 6:14 +artist Apoptygma Berzerk +track Kathy's Song +album Welcome To Earth \"Extra bit for testing\" +year +rating 100 +tracktime 6:35" + +# Test data, resultset 2 +results2="artist Colony 5 +track The Bottle +album Lifeline +year +rating 80 +tracktime 4:34" + +output=$(_read_rows_from_key_value_lines "track" "$results1" 2>&1) +is "$output" "" "Read of rowset 1 should produce no output" +# Since $() runs in a subshell, we need to run it "for real" now +_read_rows_from_key_value_lines "track" "$results1" &>/dev/null + +# Track 1 +is "${track_artist[0]}" "Assemblage 23" "rowset 1 track 1 artist" +is "${track_track[0]}" "Naked (God Module RMX)" "rowset 1 track 1 track" +is "${track_album[0]}" "Addendum" "rowset 1 track 1 album" +is "${track_year[0]}" "2001" "rowset 1 track 1 year" +is "${track_rating[0]}" "80" "rowset 1 track 1 rating" +is "${track_tracktime[0]}" "5:22" "rowset 1 track 1 tracktime" + +# Track 2 +is "${track_artist[1]}" "Ayria" "rowset 1 track 2 artist" +is "${track_track[1]}" "Sapphire" "rowset 1 track 2 track" +is "${track_album[1]}" "Debris" "rowset 1 track 2 album" +is "${track_year[1]}" "" "rowset 1 track 2 year" +is "${track_rating[1]}" "100" "rowset 1 track 2 rating" +is "${track_tracktime[1]}" "6:14" "rowset 1 track 2 tracktime" + +# Track 3 +is "${track_artist[2]}" "Apoptygma Berzerk" "rowset 1 track 3 artist" +is "${track_track[2]}" "Kathy's Song" "rowset 1 track 3 track" +is "${track_album[2]}" "Welcome To Earth \"Extra bit for testing\"" "rowset 1 track 3 album" +is "${track_year[2]}" "" "rowset 1 track 3 year" +is "${track_rating[2]}" "100" "rowset 1 track 3 rating" +is "${track_tracktime[2]}" "6:35" "rowset 1 track 3 tracktime" + +output=$(_read_rows_from_key_value_lines "track" "$results2" 2>&1) +is "$output" "" "Read of rowset 2 should produce no output" +# Since $() runs in a subshell, we need to run it "for real now +_read_rows_from_key_value_lines "track" "$results2" &>/dev/null + +# Track 1 +is "${track_artist[0]}" "Colony 5" "rowset 2 track 1 artist" +is "${track_track[0]}" "The Bottle" "rowset 2 track 1 track" +is "${track_album[0]}" "Lifeline" "rowset 2 track 1 album" +is "${track_year[0]}" "" "rowset 2 track 1 year" +is "${track_rating[0]}" "80" "rowset 2 track 1 rating" +is "${track_tracktime[0]}" "4:34" "rowset 2 track 1 tracktime" + +# Track 2 +is "${track_artist[1]}" "" "rowset 2 track 2 artist" +is "${track_track[1]}" "" "rowset 2 track 2 track" +is "${track_album[1]}" "" "rowset 2 track 2 album" +is "${track_year[1]}" "" "rowset 2 track 2 year" +is "${track_rating[1]}" "" "rowset 2 track 2 rating" +is "${track_tracktime[1]}" "" "rowset 2 track 2 tracktime" + +# Track 3 +is "${track_artist[2]}" "" "rowset 2 track 3 artist" +is "${track_track[2]}" "" "rowset 2 track 3 track" +is "${track_album[2]}" "" "rowset 2 track 3 album" +is "${track_year[2]}" "" "rowset 2 track 3 year" +is "${track_rating[2]}" "" "rowset 2 track 3 rating" +is "${track_tracktime[2]}" "" "rowset 2 track 3 tracktime" +``` + +Running this gives output: + +``` +$ prove ~/projects/bash-snippets/t +/Users/illusori/projects/bash-snippets/t/01_read_rows_from_key_value_lines.t .. ok +All tests successful. +Files=1, Tests=38, 0 wallclock secs ( 0.04 usr 0.00 sys + 0.04 cusr 0.02 csys = 0.10 CPU) +Result: PASS +``` + +Or the verbose output: + +``` +$ prove -v ~/projects/bash-snippets/t +/Users/illusori/projects/bash-snippets/t/01_read_rows_from_key_value_lines.t .. +1..38 +ok 1 - Read of rowset 1 should produce no output +ok 2 - rowset 1 track 1 artist +ok 3 - rowset 1 track 1 track +ok 4 - rowset 1 track 1 album +ok 5 - rowset 1 track 1 year +ok 6 - rowset 1 track 1 rating +ok 7 - rowset 1 track 1 tracktime +ok 8 - rowset 1 track 2 artist +ok 9 - rowset 1 track 2 track +ok 10 - rowset 1 track 2 album +ok 11 - rowset 1 track 2 year +ok 12 - rowset 1 track 2 rating +ok 13 - rowset 1 track 2 tracktime +ok 14 - rowset 1 track 3 artist +ok 15 - rowset 1 track 3 track +ok 16 - rowset 1 track 3 album +ok 17 - rowset 1 track 3 year +ok 18 - rowset 1 track 3 rating +ok 19 - rowset 1 track 3 tracktime +ok 20 - Read of rowset 2 should produce no output +ok 21 - rowset 2 track 1 artist +ok 22 - rowset 2 track 1 track +ok 23 - rowset 2 track 1 album +ok 24 - rowset 2 track 1 year +ok 25 - rowset 2 track 1 rating +ok 26 - rowset 2 track 1 tracktime +ok 27 - rowset 2 track 2 artist +ok 28 - rowset 2 track 2 track +ok 29 - rowset 2 track 2 album +ok 30 - rowset 2 track 2 year +ok 31 - rowset 2 track 2 rating +ok 32 - rowset 2 track 2 tracktime +ok 33 - rowset 2 track 3 artist +ok 34 - rowset 2 track 3 track +ok 35 - rowset 2 track 3 album +ok 36 - rowset 2 track 3 year +ok 37 - rowset 2 track 3 rating +ok 38 - rowset 2 track 3 tracktime +ok +All tests successful. +Files=1, Tests=38, 0 wallclock secs ( 0.04 usr 0.01 sys + 0.04 cusr 0.02 csys = 0.11 CPU) +Result: PASS +``` + +Mocking with bash-tap-mock +-------------------------- + +Also included in `bash-tap` is a simple function mocking framework +`bash-tap-mock`, it lets you mock commands and functions with +`mock_command` and `restore_mocked_command`. + +If you particularly care to only mock functions rather than commands +(a good safeguard against typos), use `mock_function` and +`restore_mocked_function`, which have some extended error checking +ensuring the function you're mocking exists in the first place. + +An example from https://github.com/illusori/bash-itunes is clearer: + +```bash +#!/bin/bash + +. $(dirname $0)/bash-tap-bootstrap +. "$BASH_TAP_ROOT/bash-tap-mock" +. $(dirname $0)/../itunes + +plan tests 4 + +sent_command='' +function mock_osascript() { + sent_command="$*" + restore_mocked_function "_osascript" +} +mock_function "_osascript" "mock_osascript" + +start_output_capture +_dispatch "stop" +finish_output_capture stdout stderr + +like "$sent_command" 'stop' "sent command should contain 'stop'" +like "$sent_command" 'tell application "iTunes"' "sent command should contain 'tell application \"iTunes\"'" + +is "$stdout" "Stopping iTunes." "stdout should tell user what happened" +is "$stderr" "" "stderr should be empty" +``` diff --git a/tests/bash-tap/bash-tap b/tests/bash-tap/bash-tap new file mode 100755 index 0000000..e71fe9b --- /dev/null +++ b/tests/bash-tap/bash-tap @@ -0,0 +1,369 @@ +#!/bin/bash + +bash_tap_version='1.0.2' + +# Our state. + +_bt_plan='' +_bt_expected_tests=0 +_bt_plan_output=0 +_bt_current_test=0 +_bt_tap_output='' +_bt_has_output_plan=0 +_bt_done_testing=0 +_bt_output_capture=0 + +# Our test results so far +unset _bt_test_ok +unset _bt_test_actual_ok +unset _bt_test_name +unset _bt_test_type +unset _bt_test_reason + +# Cleanup stuff. +declare -a _bt_on_exit_cmds +trap "_bt_on_exit" EXIT + +# Planning functions. + +function _bt_output_plan() { + local num_tests="$1" + local directive="$2" + local reason="$3" + + if [ "$_bt_has_output_plan" = 1 ]; then + _caller_error "The plan was already output" + fi + + _bt_clear_out + _bt_out "1..$num_tests" + if [ -n "$directive" ]; then + _bt_out " # $directive" + fi + if [ -n "$reason" ]; then + _bt_out " $reason" + fi + _bt_print_out + _bt_has_output_plan=1 +} + +function plan() { + local plan="$1" + + case "$plan" in + no_plan) no_plan ;; + skip_all) skip_all "$2" ;; + tests) expected_tests "$2" ;; + *) _bt_die "Unknown or missing plan: '$plan'" ;; + esac +} + +function expected_tests() { + local num="$1" + + if [ -z "$num" ]; then + echo $_bt_expected_tests + else + if [ -n "$_bt_plan" ]; then + _bt_caller_error "Plan is already defined" + fi + # TODO: validate + _bt_plan="$num" + _bt_expected_tests="$num" + _bt_output_plan "$_bt_expected_tests" + fi +} + +function no_plan() { + if [ -n "$_bt_plan" ]; then + _bt_caller_error "Plan is already defined" + fi + _bt_plan="no plan" +} + +function done_testing() { + local num_tests="$1" + + if [ -z "$num_tests" ]; then + num_tests="$_bt_current_test" + fi + + if [ "$_bt_done_testing" = 1 ]; then + _bt_caller_error "done_testing was already called" + fi + + if [ "$_bt_expected_tests" != 0 -a "$num_tests" != "$_bt_expected_tests" ]; then + ok 0 "planned to run $_bt_expected_tests but done_testing expects $num_tests" + else + _bt_expected_tests="$num_tests" + fi + + if [ "$_bt_has_output_plan" = 0 ]; then + _bt_plan="done testing" + _bt_output_plan "$num_tests" + fi +} + +function has_plan() { + test -n "$_bt_plan" +} + +function skip_all() { + local reason="${*:?}" + + _bt_output_plan 0 SKIP "$reason" +} + +# Test functions. + +function ok() { + local result="$1" + local name="$2" + + _bt_current_test=$((_bt_current_test + 1)) + + # TODO: validate $name + if [ -z "$name" ]; then + name='unnamed test' + fi + name="${name//#/\\#}" + + _bt_clear_out + if [ "$result" = 0 ]; then + _bt_out "not ok" + if [ -n "$TODO" ]; then + _bt_test_ok[$_bt_current_test]=1 + else + _bt_test_ok[$_bt_current_test]=0 + fi + _bt_test_actual_ok[$_bt_current_test]=0 + else + _bt_out "ok" + _bt_test_ok[$_bt_current_test]=1 + _bt_test_actual_ok[$_bt_current_test]="$result" + fi + + _bt_out " $_bt_current_test - $name" + _bt_test_name[$_bt_current_test]="$name" + + if [ -n "$TODO" ]; then + _bt_out " # TODO $TODO" + _bt_test_reason[$_bt_current_test]="$TODO" + _bt_test_type[$_bt_current_test]="todo" + else + _bt_test_reason[$_bt_current_test]='' + _bt_test_type[$_bt_current_test]='' + fi + + _bt_print_out +} + +function _is_diag() { + local result="$1" + local expected="$2" + + diag " got: '$result'" + diag " expected: '$expected'" +} + +function is() { + local result="$1" + local expected="$2" + local name="$3" + + if [ "$result" = "$expected" ]; then + ok 1 "$name" + else + ok 0 "$name" + _is_diag "$result" "$expected" + fi +} + +function _isnt_diag() { + local result="$1" + local expected="$2" + + diag " got: '$result'" + diag " expected: anything else" +} + +function isnt() { + local result="$1" + local expected="$2" + local name="$3" + + if [ "$result" != "$expected" ]; then + ok 1 "$name" + else + ok 0 "$name" + _isnt_diag "$result" "$expected" + fi +} + +function like() { + local result="$1" + local pattern="$2" + local name="$3" + + # NOTE: leave $pattern unquoted, see http://stackoverflow.com/a/218217/870000 + if [[ "$result" =~ $pattern ]]; then + ok 1 "$name" + else + ok 0 "$name" + diag " got: '$result'" + diag " expected: match for '$pattern'" + fi +} + +function unlike() { + local result="$1" + local pattern="$2" + local name="$3" + + # NOTE: leave $pattern unquoted, see http://stackoverflow.com/a/218217/870000 + if [[ ! "$result" =~ $pattern ]]; then + ok 1 "$name" + else + ok 0 "$name" + diag " got: '$result'" + diag " expected: no match for '$pattern'" + fi +} + +function cmp_ok() { + echo TODO +} + +# Other helper functions + +function BAIL_OUT() { + echo TODO +} + +function skip() { + echo TODO +} + +function todo_skip() { + echo TODO +} + +function todo_start() { + echo TODO +} + +function todo_end() { + echo TODO +} + +# Output + +function diag() { + local message="$1" + + if [ -n "$message" ]; then + _bt_escaped_echo "# $message" + fi +} + +# Util functions for output capture within current shell + +function start_output_capture() { + if [ $_bt_output_capture = 1 ]; then + finish_output_capture + _bt_caller_error "Can't start output capture while already active" + fi + local stdout_tmpfile="/tmp/bash-itunes-test-out.$$" + local stderr_tmpfile="/tmp/bash-itunes-test-err.$$" + _bt_add_on_exit_cmd "rm -f '$stdout_tmpfile' '$stderr_tmpfile'" + _bt_output_capture=1 + exec 3>&1 >$stdout_tmpfile 4>&2 2>$stderr_tmpfile +} + +function finish_output_capture() { + local capture_stdout_varname="$1" + local capture_stderr_varname="$2" + if [ $_bt_output_capture != 1 ]; then + _bt_caller_error "Can't finish output capture when it wasn't started" + fi + exec 1>&3 3>&- 2>&4 4>&- + _bt_output_capture=0 + if [ -n "$capture_stdout_varname" ]; then + local stdout_tmpfile="/tmp/bash-itunes-test-out.$$" + eval "$capture_stdout_varname=\$(< $stdout_tmpfile)" + fi + if [ -n "$capture_stderr_varname" ]; then + local stderr_tmpfile="/tmp/bash-itunes-test-err.$$" + eval "$capture_stderr_varname=\$(< $stderr_tmpfile)" + fi +} + +# Internals + +function _bt_stdout() { + echo "$@" +} + +function _bt_stderr() { + echo "$@" >&2 +} + +function _bt_die() { + _bt_stderr "$@" + exit 255 +} + +# Report an error from the POV of the first calling point outside this file +function _bt_caller_error() { + local message="$*" + + local thisfile="${BASH_SOURCE[0]}" + local file="$thisfile" + local frame_num=2 + until [ "$file" != "$thisfile" ]; do + frame=$(caller "$frame_num") + IFS=' ' read line func file <<<"$frame" + done + + _bt_die "Error: $message, on line $line of $file" +} + +# Echo the supplied message with lines after the +# first escaped as TAP comments. +function _bt_escaped_echo() { + local message="$*" + + local output='' + while IFS= read -r line; do + output="$output\n# $line" + done <<<"$message" + echo -e "${output:4}" +} + +function _bt_clear_out() { + _bt_tap_output="" +} + +function _bt_out() { + _bt_tap_output="$_bt_tap_output$*" +} + +function _bt_print_out() { + _bt_escaped_echo "$_bt_tap_output" +} + +# Cleanup stuff +function _bt_add_on_exit_cmd() { + _bt_on_exit_cmds[${#_bt_on_exit_cmds[*]}]="$*" +} + +function _bt_on_exit() { + if [ $_bt_output_capture = 1 ]; then + finish_output_capture + fi + for exit_cmd in "${_bt_on_exit_cmds[@]}"; do + diag "cleanup: $exit_cmd" + eval "$exit_cmd" + done + # TODO: check that we've output a plan/results +} diff --git a/tests/bash-tap/bash-tap-bootstrap b/tests/bash-tap/bash-tap-bootstrap new file mode 100755 index 0000000..23074de --- /dev/null +++ b/tests/bash-tap/bash-tap-bootstrap @@ -0,0 +1,28 @@ +#!/bin/bash +# +# Bash TAP Bootstrap: +# Copy this file into your project tests dir and source it +# from each test file with: +# . $(dirname $0)/bash-tap-bootstrap +# It takes care of finding bash-tap or outputing a usage message. +# + +bash_tap_bootstrap_version='1.0.2' + +if [ "${BASH_SOURCE[0]}" = "$0" ]; then + # Being run directly, probably by test harness running entire dir. + echo "1..0 # SKIP bash-tap-bootstrap isn't a test file" + exit 0 +fi + +if [ -z "$BASH_TAP_ROOT" ]; then + # TODO: search likely locations. + BASH_TAP_ROOT="$(dirname ${BASH_SOURCE[0]})/../../bash-tap" +fi + +if [ -f "$BASH_TAP_ROOT/bash-tap" ]; then + . "$BASH_TAP_ROOT/bash-tap" +else + echo "Bail out! Unable to find bash-tap. Install from https://github.com/illusori/bash-tap or set \$BASH_TAP_ROOT if you have it installed somewhere unusual." + exit 255 +fi diff --git a/tests/bash-tap/bash-tap-mock b/tests/bash-tap/bash-tap-mock new file mode 100755 index 0000000..1800198 --- /dev/null +++ b/tests/bash-tap/bash-tap-mock @@ -0,0 +1,106 @@ +#!/bin/bash +# +# While not directly TAP-specific, being able to mock stuff +# in tests is pretty useful. +# +# If you're using bash-tap-bootstrap, then just source this +# file in your tests from the bash-tap directory found by +# the bootstrap by including this line after you've sourced +# bash-tap-bootstrap: +# +# . "$BASH_TAP_ROOT/bash-tap-mock" +# +# If you're not using bash-tap-bootstrap then copy this file +# to your test directory and source it with: +# +# . $(dirname $0)/bash-tap-mock +# +# It's important to note that if you're capturing the arguments +# passed to your mock function in a variable, and want that +# variable to be accessible to your tests, you must ensure that +# the mocked function is executed in the current shell and not +# a subshell. In particular, this means you cannot use $() or +# `` to capture output of the function at the same time, as these +# invoke a subshell - the mock will happen, but any variables you +# set within your mock will only exist within the subshell. +# If you wish to capture output at the same time, you need to +# make use of the start_output_capture and finish_output_capture +# helper functons in bash-tap, or manually use file-descriptor +# redirects yourself to achieve the same effect. + +bash_tap_mock_version='1.0.2' + +if [ "${BASH_SOURCE[0]}" = "$0" ]; then + # Being run directly, probably by test harness running entire dir. + echo "1..0 # SKIP bash-tap-mock isn't a test file" + exit 0 +fi + +function mock_function() { + local original_name="$1" + local mock_name="$2" + local save_original_as="_btm_mocked_${original_name}" + + if [ -z $(declare -F "$save_original_as") ]; then + _btm_copy_function "$original_name" "$save_original_as" + fi + _btm_copy_function "$mock_name" "$original_name" +} + +function restore_mocked_function() { + local original_name="$1" + local save_original_as="_btm_mocked_${original_name}" + + if [ ! -z $(declare -F "$save_original_as") ]; then + _btm_copy_function "$save_original_as" "$original_name" + unset -f "$save_original_as" + else + _btm_caller_error "Can't find saved original function '$original_name' to restore" + fi +} + +function mock_command() { + local command_name="$1" + local mock_name="$2" + + if [ ! -z $(declare -F "$command_name") ]; then + # It's not actually a command, it's a function, mock that + mock_function "$command_name" "$mock_name" + else + _btm_copy_function "$mock_name" "$command_name" + fi +} + +function restore_mocked_command() { + local command_name="$1" + + local save_original_as="_btm_mocked_${command_name}" + if [ ! -z $(declare -F "$save_original_as") ]; then + # Was actually a function mock not a command mock. + restore_mocked_function "$command_name" + else + unset -f "$command_name" >/dev/null + fi +} + +# Copied from http://stackoverflow.com/a/1203628/870000 +function _btm_copy_function() { + declare -F $1 >/dev/null || _btm_caller_error "Can't find function '$1' to copy" + eval "$(echo "${2}()"; declare -f ${1} | tail -n +2)" +} + +# Report an error from the POV of the first calling point outside this file +function _btm_caller_error() { + local message="$*" + + local thisfile="${BASH_SOURCE[0]}" + local file="$thisfile" + local frame_num=2 + until [ "$file" != "$thisfile" ]; do + frame=$(caller "$frame_num") + IFS=' ' read line func file <<<"$frame" + done + + echo "Error: $message, on line $line of $file" >&2 + exit 255 +} diff --git a/tests/small.t b/tests/small.t index 1bec6a2..41a1148 100644 --- a/tests/small.t +++ b/tests/small.t @@ -1,12 +1,12 @@ #!/usr/bin/env bash -BASH_TAP_ROOT=${VGDIR}/deps/bash-tap -. ${VGDIR}/deps/bash-tap/bash-tap-bootstrap +BASH_TAP_ROOT=./bash-tap +. ${BASH_TAP_ROOT}/bash-tap-bootstrap PATH=../bin:$PATH PATH=../deps/hal:$PATH -plan tests 5 +plan tests 4 maf2hal small/small.maf small.hal hal2vg small.hal > small.vg @@ -14,21 +14,17 @@ vg view -j small.vg | jq . > small.json is $(vg validate small.vg | wc -l) 0 "output vg validates" -is $(jq --argfile a small.json --argfile b small/truth.json -n '($a == $b)') true "output graph identical to manually verified truth graph" +# jq craziness from https://stackoverflow.com/questions/31930041/using-jq-or-alternative-command-line-tools-to-compare-json-files +is $(jq --argfile a small.json --argfile b small/truth.json -n 'def post_recurse(f): def r: (f | select(. != null) | r), .; r; def post_recurse: post_recurse(.[]?); ($a | (post_recurse | arrays) |= sort) as $a | ($b | (post_recurse | arrays) |= sort) as $b | $a == $b') true "output graph identical to manually verified truth graph" -hal2vg small.hal --protoChunk 500 > small_chunk500.vg -vg view -j small_chunk500.vg | jq . > small_chunk500.json - -is $(jq --argfile a small_chunk500.json --argfile b small/truth.json -n '($a == $b)') true "output graph using --protoChunk 500 identical to manually verified truth graph" - -rm -f small.vg small.json small_chunk500.vg small_chunk500.json +rm -f small.vg small.json hal2vg small.hal --refGenome cat > small_cat.vg vg view -j small_cat.vg | jq . > small_cat.json is $(vg validate small_cat.vg | wc -l) 0 "output cat-referenced vg validates" -is $(jq --argfile a small_cat.json --argfile b small/truth_cat.json -n '($a == $b)') true "output cat-referenced graph identical to manually verified truth graph" +is $(jq --argfile a small_cat.json --argfile b small/truth_cat.json -n 'def post_recurse(f): def r: (f | select(. != null) | r), .; r; def post_recurse: post_recurse(.[]?); ($a | (post_recurse | arrays) |= sort) as $a | ($b | (post_recurse | arrays) |= sort) as $b | $a == $b') true "output cat-referenced graph identical to manually verified truth graph" rm -f small_cat.vg small_cat.json