Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for indexed GAF files #456

Merged
merged 4 commits into from
Sep 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,12 @@ RUN apt-get -qq update && apt-get -qq install -y \
npm \
nano

RUN npm cache clean -f

RUN npm install -g n && n stable

# download vg binary
RUN wget --quiet --no-check-certificate https://github.com/vgteam/vg/releases/download/v1.48.0/vg \
RUN wget --quiet --no-check-certificate https://github.com/vgteam/vg/releases/download/v1.59.0/vg \
&& mv vg /bin/vg && chmod +x /bin/vg

WORKDIR /build
Expand All @@ -29,10 +33,6 @@ COPY docker/config.json /build/sequenceTubeMap/src/

WORKDIR /build/sequenceTubeMap

RUN npm cache clean -f

RUN npm install -g n && n stable

RUN npm install

RUN npx browserslist@latest --update-db
Expand Down
93 changes: 59 additions & 34 deletions docker/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,53 +4,67 @@
{
"name": "snp1kg-BRCA1",
"tracks": [
{"files": [{"type": "graph", "name": "snp1kg-BRCA1.vg.xg"}]},
{"files": [{"type": "read", "name": "NA12878-BRCA1.sorted.gam"}]}
{"trackFile": "exampleData/internal/snp1kg-BRCA1.vg.xg", "trackType": "graph", "trackColorSettings": {"mainPalette": "greys", "auxPalette": "ygreys"}},
{"trackFile": "exampleData/internal/NA12878-BRCA1.sorted.gam", "trackType": "read"}
],
"region": "17:1-100",
"bedFile": "snp1kg-BRCA1.bed",
"dataType": "built-in"
},
{
"name": "vg \"small\" example",
"tracks": [
{"files": [{"type": "graph", "name": "x.vg.xg"}]},
{"files": [{"type": "haplotype", "name": "x.vg.gbwt"}]}
],
"bedFile": "exampleData/internal/snp1kg-BRCA1.bed",
"dataType": "built-in",
"region": "x:1-100"
},
{
"name": "cactus",
"tracks": [
{"files": [{"type": "graph", "name": "cactus.vg.xg"}]},
{"files": [{"type": "read", "name": "cactus-NA12879.sorted.gam"}]}
],
"bedFile": "cactus.bed",
"region": "ref:1-100",
"dataType": "built-in"
"simplify": false,
"removeSequences": false
},
{
"name": "cactus multiple reads",
"name": "Lancet example",
"tracks": [
{"files": [{"type": "graph", "name": "cactus.vg.xg"}]},
{"files": [{"type": "read", "name": "cactus0_10.sorted.gam"}]},
{"files": [{"type": "read", "name": "cactus10_20.sorted.gam"}]}
{
"trackFile": null,
"trackType": "graph",
"trackColorSettings": {
"mainPalette": "#000000",
"auxPalette": "greys",
"colorReadsByMappingQuality": false
}
},
{
"trackFile": null,
"trackType": "read",
"trackColorSettings": {
"mainPalette": "blues",
"auxPalette": "blues"
}
},
{
"trackFile": null,
"trackType": "read",
"trackColorSettings": {
"mainPalette": "reds",
"auxPalette": "reds"
}
}
],
"bedFile": "cactus.bed",
"region": "ref:1-100",
"bedFile": "https://public.gi.ucsc.edu/~anovak/vg-data/lancet_2023-11-07/index.bed",
"region": "chr1:7290357-7290857",
"dataType": "built-in"
}
],
"vgPath": "",
"dataPath": "/data",
"internalDataPath": "./exampleData/internal",
"internalDataPath": "exampleData/internal/",
"tempDirPath": "temp",
"fetchTimeout": 15,
"maxFileSizeBytes": 1000000000,

"defaultHaplotypeColorPalette" : {
"mainPalette": "greys",
"defaultGraphColorPalette" : {
"mainPalette": "#000000",
"auxPalette": "greys",
"colorReadsByMappingQuality": false
},

"defaultHaplotypeColorPalette" : {
"mainPalette": "plainColors",
"auxPalette": "lightColors",
"colorReadsByMappingQuality": false
},

"defaultReadColorPalette" : {
"mainPalette": "blues",
Expand All @@ -61,10 +75,21 @@
"defaultTrackProps" : {
"trackType": "graph",
"trackColorSettings": {
"mainPalette": "blues",
"auxPalette": "reds",
"mainPalette": "#000000",
"auxPalette": "greys",
"colorReadsByMappingQuality": false
}
}
},

"fileTypeToExtensions": {
"graph": ".xg,.vg,.hg,.gbz,.pg,.db",
"haplotype": ".gbwt,.gbz",
"read": ".gam,gaf.gz"
},

"MAXUPLOADSIZE": 5242880,
"pickerTypeOptions": ["mounted", "upload"],
"fileExpirationTime": 86400


}
Binary file added exampleData/cactus-NA12879.gaf.gz
Binary file not shown.
Binary file added exampleData/cactus-NA12879.gaf.gz.tbi
Binary file not shown.
2 changes: 1 addition & 1 deletion src/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@
"fileTypeToExtensions": {
"graph": ".xg,.vg,.hg,.gbz,.pg,.db",
"haplotype": ".gbwt,.gbz",
"read": ".gam"
"read": ".gam,.gaf.gz"
},

"MAXUPLOADSIZE": 5242880,
Expand Down
93 changes: 79 additions & 14 deletions src/server.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -618,16 +618,35 @@ async function getChunkedData(req, res, next) {
}

// push all gam files
let anyGam = false;
let anyGaf = false;
for (const gamFile of gamFiles) {
if (!gamFile.endsWith(".gam")) {
throw new BadRequestError("GAM file doesn't end in .gam: " + gamFile);
if (!gamFile.endsWith(".gam") && !gamFile.endsWith(".gaf.gz")) {
throw new BadRequestError("GAM/GAF file doesn't end in .gam or .gaf.gz: " + gamFile);
}
if (!isAllowedPath(gamFile)) {
throw new BadRequestError("GAM file path not allowed: " + gamFile);
throw new BadRequestError("GAM/GAF file path not allowed: " + gamFile);
}
// Use a GAM index
console.log("pushing gam file", gamFile);
vgChunkParams.push("-a", gamFile, "-g");
if (gamFile.endsWith(".gam")) {
// Use a GAM index
console.log("pushing gam file", gamFile);
anyGam = true;
}
if (gamFile.endsWith(".gaf.gz")) {
// Use a GAF with index
console.log("pushing gaf file", gamFile);
anyGaf = true;
}
vgChunkParams.push("-a", gamFile);
}
if (anyGam && anyGaf){
throw new BadRequestError("Reads must be either GAM files or GAF files, not mix both.");
}
if (anyGaf){
vgChunkParams.push("-F", "-g");
}
if (anyGam){
vgChunkParams.push("-g");
}

// to seach by node ID use "node" for the sequence name, e.g. 'node:1-10'
Expand Down Expand Up @@ -1176,11 +1195,54 @@ function processGamFile(req, res, next, gamFile, gamFileNumber) {
try {
if (!isAllowedPath(gamFile)) {
// This is probably under SCRATCH_DATA_PATH
throw new BadRequestError("Path to GAM file not allowed: " + req.gamFile);
throw new BadRequestError("Path to GAM/GAF file not allowed: " + req.gamFile);
}

const vgViewChild = spawn(`${VG_PATH}vg`, ["view", "-j", "-a", gamFile]);
let vgViewParams = ["view", "-j", "-a"];
let vgConvertParams = ["convert"];

if (gamFile.endsWith(".gaf")) {
// if input is GAF, vg convert will be piped into vg view
vgViewParams.push("-");
// vg convert needs the graph to convert GAF to GAM
const graphFile = getFirstFileOfType(req.body.tracks, fileTypes.GRAPH);
vgConvertParams.push("-F", gamFile, graphFile);
}
if (gamFile.endsWith(".gam")) {
// if input is GAM, no need to convert input to vg view is the file
vgViewParams.push(gamFile);
}

const vgViewChild = spawn(`${VG_PATH}vg`, vgViewParams);

if (gamFile.endsWith(".gaf")) {
// if input was a GAF, run vg convert and pipe stdout to vg view
const vgConvertChild = spawn(`${VG_PATH}vg`, vgConvertParams);

vgConvertChild.stdout.on("data", function (data) {
vgViewChild.stdin.write(data);
});

vgConvertChild.stderr.on("data", (data) => {
console.log(`vg convert err data: ${data}`);
req.error += data;
});

vgConvertChild.on("close", (code) => {
console.log(`vg convert exited with code ${code}`);
vgViewChild.stdin.end();
if (code !== 0) {
console.log("Error from " + VG_PATH + "vg " + vgConvertParams.join(" "));
// Execution failed
if (!sentResponse) {
sentResponse = true;
return next(new VgExecutionError("vg convert failed"));
}
}
});

}

vgViewChild.stderr.on("data", (data) => {
console.log(`err data: ${data}`);
});
Expand Down Expand Up @@ -1214,24 +1276,24 @@ function processGamFile(req, res, next, gamFile, gamFileNumber) {
function processGamFiles(req, res, next) {
try {
console.time("processing gam files");
// Find gam files
// Find gam/gaf files
let gamFiles = [];
fs.readdirSync(req.chunkDir).forEach((file) => {
console.log(file);
if (file.endsWith(".gam")) {
if (file.endsWith(".gam") || file.endsWith(".gaf")) {
gamFiles.push(req.chunkDir + "/" + file);
}
});

// Parse a GAM chunk name and get the GAM number from it
// Names are like:
// Names are like, with either .gam or .gaf suffixes:
// */chunk_*.gam for 0
// */chunk-1_*.gam for 1, 2, 3, etc.
let gamNameToNumber = (gamName) => {
const pattern = /.*\/chunk(-([0-9])+)?_.*\.gam/;
let matches = gamName.match(pattern);
const pattern = /.*\/chunk(-([0-9])+)?_.*\.ga[mf]/
let matches = gamName.match(pattern)
if (!matches) {
throw new InternalServerError("Bad GAM name " + gamName);
throw new InternalServerError("Bad GAM/GAF name " + gamName)
}
if (matches[2] !== undefined) {
// We have a number
Expand Down Expand Up @@ -1503,6 +1565,9 @@ api.get("/getFilenames", (req, res) => {
if (file.endsWith(".sorted.gam")) {
result.files.push({ trackFile: clientPath, trackType: "read" });
}
if (file.endsWith(".gaf.gz")) {
result.files.push({"trackFile": file, "trackType": "read"});
}
if (file.endsWith(".bed")) {
result.bedFiles.push(clientPath);
}
Expand Down
Loading