Skip to content

Commit

Permalink
add chainnet tool from ucsc/kent (#6284)
Browse files Browse the repository at this point in the history
  • Loading branch information
martenson authored Aug 30, 2024
1 parent 21578f9 commit 09ce055
Show file tree
Hide file tree
Showing 13 changed files with 447 additions and 0 deletions.
9 changes: 9 additions & 0 deletions tools/ucsc_tools/ucsc_chainnet/.shed.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
name: ucsc_chainnet
owner: iuc
description: Make alignment nets out of chains
long_description:
homepage_url: http://hgdownload.cse.ucsc.edu/admin/exe/
remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/master/tools/ucsc-tools/ucsc_chainnet
type: unrestricted
categories:
- Sequence Analysis
146 changes: 146 additions & 0 deletions tools/ucsc_tools/ucsc_chainnet/chainnet.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
<tool id="ucsc_chainnet" name="chainNet" version="@TOOL_VERSION@+galaxy0" profile="21.05" license="MIT">
<description>
make alignment nets out of alignment chains
</description>
<macros>
<token name="@TOOL_VERSION@">469</token>
</macros>
<xrefs>
<xref type="bio.tools">UCSC_Genome_Browser_Utilities</xref>
</xrefs>
<requirements>
<requirement type="package" version="@TOOL_VERSION@">ucsc-chainnet</requirement>
</requirements>
<version_command><![CDATA[ echo "@TOOL_VERSION@" ]]></version_command>
<command detect_errors="exit_code"><![CDATA[
chainNet
'$in_chain'
#if str( $target_reference_index_source.target_reference_index_source_selector ) == "history":
'${target_reference_index_source.in_tar_ref_index}'
#else:
'$target_reference_index_source.tar_ref_index_file.fields.path'
#end if
#if str( $query_reference_index_source.query_reference_index_source_selector ) == "history":
'${query_reference_index_source.in_que_ref_index}'
#else:
'$query_reference_index_source.que_ref_index_file.fields.path'
#end if
'$targetNet'
'$queryNet'
#if str($minSpace)
-minSpace=$minSpace
#end if
#if str($minFill)
-minFill=$minFill
#end if
#if str($minScore)
-minScore=$minScore
#end if
$inclHap
#if str($verbose)
-verbose=$verbose
#end if
]]></command>
<inputs>
<param name="in_chain" format="chain" type="data" label="Chain dataset" help="Select chain dataset" />

<conditional name="target_reference_index_source">
<param name="target_reference_index_source_selector" type="select" label="Choose the source for the target reference genome index">
<option value="cached">Built-in</option>
<option value="history">From history</option>
</param>
<when value="cached">
<param name="tar_ref_index_file" type="select" label="Using reference genome index">
<options from_data_table="fasta_indexes" />
</param>
</when>
<when value="history">
<param name="in_tar_ref_index" format="tabular" type="data" label="Dataset with chrom sizes for target sequence. Typically a '.fai' file." />
</when>
</conditional>

<conditional name="query_reference_index_source">
<param name="query_reference_index_source_selector" type="select" label="Choose the source for the query reference genome index">
<option value="cached">Built-in</option>
<option value="history">From history</option>
</param>
<when value="cached">
<param name="que_ref_index_file" type="select" label="Using reference genome index">
<options from_data_table="fasta_indexes" />
</param>
</when>
<when value="history">
<param name="in_que_ref_index" format="tabular" type="data" label="Dataset with chrom sizes for query sequence. Typically a '.fai' file." />
</when>
</conditional>

<param argument="-minSpace" type="integer" optional="true" min="0" label="Minimum gap size to fill" help="default 25" />
<param argument="-minFill" type="integer" optional="true" min="0" label="Minimum fill to record" help="default half of minSpace" />
<param argument="-minScore" type="integer" optional="true" label="Minimum chain score to consider" help="default 2000" />
<param argument="-inclHap" type="boolean" optional="true" truevalue="-inclHap" falsevalue="" label="Include query sequences name in the form *_hap*|*_alt*. Normally these are excluded from nets as being haplotype pseudochromosomes." />
<param argument="-verbose" type="integer" optional="true" label="Verbosity" help="default 1" />

</inputs>
<outputs>
<data name="targetNet" format="ucsc.net" label="${tool.name} on ${on_string}: target.net"/>
<data name="queryNet" format="ucsc.net" label="${tool.name} on ${on_string}: query.net" />
</outputs>
<tests>
<test expect_num_outputs="2">
<param name="in_chain" value="input.chain" />
<conditional name="target_reference_index_source">
<param name="target_reference_index_source_selector" value="history" />
<param name="in_tar_ref_index" value="hg38.chr20.chunk.fasta.fai"/>
</conditional>
<conditional name="query_reference_index_source">
<param name="query_reference_index_source_selector" value="history" />
<param name="in_que_ref_index" value="mm39.chr2.chunk.fasta.fai"/>
</conditional>
<output name="targetNet" file="target.net"/>
<output name="queryNet" file="query.net"/>
</test>
<test expect_num_outputs="2">
<param name="in_chain" value="input.chain" />
<conditional name="target_reference_index_source">
<param name="target_reference_index_source_selector" value="cached" />
<param name="tar_ref_index_file" value="hg38.chr20.chunk"/>
</conditional>
<conditional name="query_reference_index_source">
<param name="query_reference_index_source_selector" value="cached" />
<param name="que_ref_index_file" value="mm39.chr2.chunk"/>
</conditional>
<output name="targetNet" file="target.net"/>
<output name="queryNet" file="query.net"/>
</test>
<test expect_num_outputs="2">
<param name="in_chain" value="input.chain" />
<param name="minScore" value="1200" />
<conditional name="target_reference_index_source">
<param name="target_reference_index_source_selector" value="cached" />
<param name="tar_ref_index_file" value="hg38.chr20.chunk"/>
</conditional>
<conditional name="query_reference_index_source">
<param name="query_reference_index_source_selector" value="history" />
<param name="in_que_ref_index" value="mm39.chr2.chunk.fasta.fai"/>
</conditional>
<output name="targetNet" file="target.1200.net"/>
<output name="queryNet" file="query.1200.net"/>
</test>
</tests>
<help><![CDATA[
**What it does**
`chainNet`_ is a tool that makes alignment nets out of `alignment chains`_.
It outputs two files in the `ucsc.net`_ format -- one for the target sequence and one for the query sequence.
For implementation details see ChainNet's `source code`_.
.. _chainNet: http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/FOOTER.txt
.. _ucsc.net: https://genome.ucsc.edu/goldenPath/help/net.html
.. _alignment chains: https://genome.ucsc.edu/goldenPath/help/chain.html
.. _source code: https://github.com/ucscGenomeBrowser/kent/blob/master/src/hg/mouseStuff/chainNet/chainNet.c
]]> </help>
<citations>
<citation type="doi">10.1093/bib/bbs038</citation>
</citations>
</tool>
2 changes: 2 additions & 0 deletions tools/ucsc_tools/ucsc_chainnet/test-data/fasta_indexes.loc
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
hg38.chr20.chunk hg38 hg38.chr20.chunk ${__HERE__}/hg38.chr20.chunk.fasta.fai
mm39.chr2.chunk mm39 mm39.chr2.chunk ${__HERE__}/mm39.chr2.chunk.fasta.fai
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
hg38.chr20:10004760-11615810 1611050 30 1611050 1611051
159 changes: 159 additions & 0 deletions tools/ucsc_tools/ucsc_chainnet/test-data/input.chain
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
##matrix=axtChain 16 91,-114,-31,-123,-114,100,-125,-31,-31,-125,100,-114,-123,-31,-114,91
##gapPenalties=axtChain O=400 E=30
chain 64329 hg38.chr20:10004760-11615810 1611050 + 879848 882025 mm39.chr2:136351215-137822786 1471571 - 648557 650527 2
24 0 1
52 12 0
46 0 6
24 1 0
63 0 1
6 0 9
78 14 0
25 0 4
14 4 0
4 19 0
12 9 0
43 77 0
19 2 0
37 2 0
61 1 0
16 0 4
74 0 7
51 1 0
30 17 0
18 1 0
28 0 2
125 0 13
8 0 14
8 0 3
88 5 0
35 0 1
5 0 1
6 0 7
21 0 1
27 4 0
64 10 0
46 2 0
42 1 0
54 0 1
12 1 0
115 3 0
10 0 1
27 1 0
20 1 0
42 21 0
20 2 0
50 11 0
8 27 0
45 1 0
26 11 0
5 3 0
28 2 0
34 1 0
24 0 2
39 1 0
29 3 0
68 17 0
4 0 3
29

chain 13332 hg38.chr20:10004760-11615810 1611050 + 266744 267005 mm39.chr2:136351215-137822786 1471571 - 1226958 1227276 3
17 0 14
111 2 0
42 3 0
39 0 47
31 0 1
16

chain 9256 hg38.chr20:10004760-11615810 1611050 + 288278 288554 mm39.chr2:136351215-137822786 1471571 + 260289 260601 4
34 8 0
38 0 2
9 0 17
11 0 24
15 0 1
161

chain 8091 hg38.chr20:10004760-11615810 1611050 + 288114 288275 mm39.chr2:136351215-137822786 1471571 + 260754 260915 5
161

chain 6961 hg38.chr20:10004760-11615810 1611050 + 84589 84726 mm39.chr2:136351215-137822786 1471571 + 87751 87887 6
83 1 0
53

chain 6916 hg38.chr20:10004760-11615810 1611050 + 87669 87836 mm39.chr2:136351215-137822786 1471571 + 87101 87269 7
58 0 1
109

chain 5978 hg38.chr20:10004760-11615810 1611050 + 600994 601124 mm39.chr2:136351215-137822786 1471571 + 546215 546345 8
130

chain 5958 hg38.chr20:10004760-11615810 1611050 + 288132 288220 mm39.chr2:136351215-137822786 1471571 + 258580 258668 9
88

chain 5873 hg38.chr20:10004760-11615810 1611050 + 796556 796658 mm39.chr2:136351215-137822786 1471571 + 722616 722717 10
31 1 0
70

chain 5421 hg38.chr20:10004760-11615810 1611050 + 890884 891032 mm39.chr2:136351215-137822786 1471571 - 646945 647095 11
25 0 2
123

chain 4864 hg38.chr20:10004760-11615810 1611050 + 1425778 1425898 mm39.chr2:136351215-137822786 1471571 + 1342678 1342798 12
120

chain 4568 hg38.chr20:10004760-11615810 1611050 + 49202 49378 mm39.chr2:136351215-137822786 1471571 + 42266 42438 13
99 4 0
73

chain 3681 hg38.chr20:10004760-11615810 1611050 + 640387 640507 mm39.chr2:136351215-137822786 1471571 + 579921 580041 14
120

chain 2152 hg38.chr20:10004760-11615810 1611050 + 148667 148736 mm39.chr2:136351215-137822786 1471571 + 158262 158331 15
10 1 0
38 0 1
20

chain 1838 hg38.chr20:10004760-11615810 1611050 + 469454 469477 mm39.chr2:136351215-137822786 1471571 + 409807 409830 16
23

chain 1399 hg38.chr20:10004760-11615810 1611050 + 796671 796752 mm39.chr2:136351215-137822786 1471571 + 722717 722799 17
30 5 0
8 3 0
27 0 9
8

chain 1336 hg38.chr20:10004760-11615810 1611050 + 195854 195904 mm39.chr2:136351215-137822786 1471571 + 189780 189830 18
50

chain 1280 hg38.chr20:10004760-11615810 1611050 + 398052 398107 mm39.chr2:136351215-137822786 1471571 + 364812 364861 19
11 1 0
7 0 1
15 6 0
15

chain 1248 hg38.chr20:10004760-11615810 1611050 + 195952 195990 mm39.chr2:136351215-137822786 1471571 + 189050 189087 20
22 2 0
5 0 1
9

chain 1208 hg38.chr20:10004760-11615810 1611050 + 1121156 1121254 mm39.chr2:136351215-137822786 1471571 + 1062631 1062720 21
9 0 1
12 7 0
12 4 0
13 0 1
41

chain 1201 hg38.chr20:10004760-11615810 1611050 + 580524 580629 mm39.chr2:136351215-137822786 1471571 + 516292 516362 22
14 3 0
16 19 0
19 13 0
21

chain 1170 hg38.chr20:10004760-11615810 1611050 + 806859 806881 mm39.chr2:136351215-137822786 1471571 + 737566 737588 23
22

chain 1030 hg38.chr20:10004760-11615810 1611050 + 1532147 1532161 mm39.chr2:136351215-137822786 1471571 + 1423235 1423249 24
14

chain 1012 hg38.chr20:10004760-11615810 1611050 + 361832 361870 mm39.chr2:136351215-137822786 1471571 + 345076 345108 25
20 6 0
12
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
mm39.chr2:136351215-137822786 1471571 31 1471571 1471572
25 changes: 25 additions & 0 deletions tools/ucsc_tools/ucsc_chainnet/test-data/query.1200.net
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
##matrix=axtChain 16 91,-114,-31,-123,-114,100,-125,-31,-31,-125,100,-114,-123,-31,-114,91
##gapPenalties=axtChain O=400 E=30
net mm39.chr2:136351215-137822786 1471571
fill 42266 172 hg38.chr20:10004760-11615810 + 49202 176 id 13 score 4568 ali 172
fill 87101 168 hg38.chr20:10004760-11615810 + 87669 167 id 7 score 6916 ali 167
fill 87751 136 hg38.chr20:10004760-11615810 + 84589 137 id 6 score 6961 ali 136
fill 158262 69 hg38.chr20:10004760-11615810 + 148667 69 id 15 score 2152 ali 68
fill 189050 37 hg38.chr20:10004760-11615810 + 195952 38 id 20 score 1248 ali 36
fill 189780 50 hg38.chr20:10004760-11615810 + 195854 50 id 18 score 1336 ali 50
fill 244295 318 hg38.chr20:10004760-11615810 - 266744 261 id 3 score 13332 ali 256
gap 244343 47 hg38.chr20:10004760-11615810 - 266919 70
fill 258580 88 hg38.chr20:10004760-11615810 + 288132 88 id 9 score 5958 ali 88
fill 260289 312 hg38.chr20:10004760-11615810 + 288278 276 id 4 score 9256 ali 268
fill 260754 161 hg38.chr20:10004760-11615810 + 288114 161 id 5 score 8091 ali 161
fill 364812 49 hg38.chr20:10004760-11615810 + 398052 55 id 19 score 1280 ali 48
fill 409807 23 hg38.chr20:10004760-11615810 + 469454 23 id 16 score 1838 ali 23
fill 516292 70 hg38.chr20:10004760-11615810 + 580524 105 id 22 score 1201 ali 70
fill 546215 130 hg38.chr20:10004760-11615810 + 600994 130 id 8 score 5978 ali 130
fill 579921 120 hg38.chr20:10004760-11615810 + 640387 120 id 14 score 3681 ali 120
fill 722616 101 hg38.chr20:10004760-11615810 + 796556 102 id 10 score 5873 ali 101
fill 722717 82 hg38.chr20:10004760-11615810 + 796671 81 id 17 score 1399 ali 73
fill 821044 1970 hg38.chr20:10004760-11615810 - 879848 2177 id 2 score 64329 ali 1889
fill 824476 150 hg38.chr20:10004760-11615810 - 890884 148 id 11 score 5421 ali 148
fill 1062631 89 hg38.chr20:10004760-11615810 + 1121156 98 id 21 score 1208 ali 87
fill 1342678 120 hg38.chr20:10004760-11615810 + 1425778 120 id 12 score 4864 ali 120
18 changes: 18 additions & 0 deletions tools/ucsc_tools/ucsc_chainnet/test-data/query.net
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
##matrix=axtChain 16 91,-114,-31,-123,-114,100,-125,-31,-31,-125,100,-114,-123,-31,-114,91
##gapPenalties=axtChain O=400 E=30
net mm39.chr2:136351215-137822786 1471571
fill 42266 172 hg38.chr20:10004760-11615810 + 49202 176 id 13 score 4568 ali 172
fill 87101 168 hg38.chr20:10004760-11615810 + 87669 167 id 7 score 6916 ali 167
fill 87751 136 hg38.chr20:10004760-11615810 + 84589 137 id 6 score 6961 ali 136
fill 158262 69 hg38.chr20:10004760-11615810 + 148667 69 id 15 score 2152 ali 68
fill 244295 318 hg38.chr20:10004760-11615810 - 266744 261 id 3 score 13332 ali 256
gap 244343 47 hg38.chr20:10004760-11615810 - 266919 70
fill 258580 88 hg38.chr20:10004760-11615810 + 288132 88 id 9 score 5958 ali 88
fill 260289 312 hg38.chr20:10004760-11615810 + 288278 276 id 4 score 9256 ali 268
fill 260754 161 hg38.chr20:10004760-11615810 + 288114 161 id 5 score 8091 ali 161
fill 546215 130 hg38.chr20:10004760-11615810 + 600994 130 id 8 score 5978 ali 130
fill 579921 120 hg38.chr20:10004760-11615810 + 640387 120 id 14 score 3681 ali 120
fill 722616 101 hg38.chr20:10004760-11615810 + 796556 102 id 10 score 5873 ali 101
fill 821044 1970 hg38.chr20:10004760-11615810 - 879848 2177 id 2 score 64329 ali 1889
fill 824476 150 hg38.chr20:10004760-11615810 - 890884 148 id 11 score 5421 ali 148
fill 1342678 120 hg38.chr20:10004760-11615810 + 1425778 120 id 12 score 4864 ali 120
25 changes: 25 additions & 0 deletions tools/ucsc_tools/ucsc_chainnet/test-data/target.1200.net
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
##matrix=axtChain 16 91,-114,-31,-123,-114,100,-125,-31,-31,-125,100,-114,-123,-31,-114,91
##gapPenalties=axtChain O=400 E=30
net hg38.chr20:10004760-11615810 1611050
fill 49202 176 mm39.chr2:136351215-137822786 + 42266 172 id 13 score 4568 ali 172
fill 84589 137 mm39.chr2:136351215-137822786 + 87751 136 id 6 score 6961 ali 136
fill 87669 167 mm39.chr2:136351215-137822786 + 87101 168 id 7 score 6916 ali 167
fill 148667 69 mm39.chr2:136351215-137822786 + 158262 69 id 15 score 2152 ali 68
fill 195854 50 mm39.chr2:136351215-137822786 + 189780 50 id 18 score 1336 ali 50
fill 195952 38 mm39.chr2:136351215-137822786 + 189050 37 id 20 score 1248 ali 36
fill 266744 261 mm39.chr2:136351215-137822786 - 244295 318 id 3 score 13332 ali 256
fill 288114 161 mm39.chr2:136351215-137822786 + 260754 161 id 5 score 8091 ali 161
fill 288278 276 mm39.chr2:136351215-137822786 + 260289 312 id 4 score 9256 ali 268
fill 398052 55 mm39.chr2:136351215-137822786 + 364812 49 id 19 score 1280 ali 48
fill 469454 23 mm39.chr2:136351215-137822786 + 409807 23 id 16 score 1838 ali 23
fill 580524 105 mm39.chr2:136351215-137822786 + 516292 70 id 22 score 1201 ali 70
fill 600994 130 mm39.chr2:136351215-137822786 + 546215 130 id 8 score 5978 ali 130
fill 640387 120 mm39.chr2:136351215-137822786 + 579921 120 id 14 score 3681 ali 120
fill 796556 102 mm39.chr2:136351215-137822786 + 722616 101 id 10 score 5873 ali 101
fill 796671 81 mm39.chr2:136351215-137822786 + 722717 82 id 17 score 1399 ali 73
fill 879848 2177 mm39.chr2:136351215-137822786 - 821044 1970 id 2 score 64329 ali 1889
gap 880298 77 mm39.chr2:136351215-137822786 - 822602 0
gap 881628 27 mm39.chr2:136351215-137822786 - 821380 0
fill 890884 148 mm39.chr2:136351215-137822786 - 824476 150 id 11 score 5421 ali 148
fill 1121156 98 mm39.chr2:136351215-137822786 + 1062631 89 id 21 score 1208 ali 87
fill 1425778 120 mm39.chr2:136351215-137822786 + 1342678 120 id 12 score 4864 ali 120
Loading

0 comments on commit 09ce055

Please sign in to comment.