Skip to content

Commit

Permalink
Merge pull request #6648 from pavanvidem/adata-filter-to-scanpy
Browse files Browse the repository at this point in the history
Move filtering functions from Anndata to Scanpy tool
  • Loading branch information
bgruening authored Jan 9, 2025
2 parents 5de5039 + a4da0ac commit 637a6eb
Showing 1 changed file with 181 additions and 19 deletions.
200 changes: 181 additions & 19 deletions tools/scanpy/filter.xml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<tool id="scanpy_filter" name="Scanpy filter" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
<tool id="scanpy_filter" name="Scanpy filter" version="@TOOL_VERSION@+galaxy3" profile="@PROFILE@">
<description>mark and subsample</description>
<macros>
<import>macros.xml</import>
Expand Down Expand Up @@ -49,6 +49,58 @@ sc.pp.filter_genes(
@CMD_ANNDATA_WRITE_OUTPUTS@
#else if $method.method == 'filter_any'
#if $method.filter.filter == 'key'
#if $method.var_obs == 'var'
filtered = adata.var['$method.filter.key']
#else if $method.var_obs == 'obs'
filtered = adata.obs['$method.filter.key']
#end if
#if $method.filter.filter_key.type == 'number'
#if $method.filter.filter_key.filter == 'equal'
filtered = filtered == $method.filter.filter_key.value
#else if $method.filter.filter_key.filter == 'equal'
filtered = filtered != $method.filter.filter_key.value
#else if $method.filter.filter_key.filter == 'less'
filtered = filtered < $method.filter.filter_key.value
#else if $method.filter.filter_key.filter == 'less_or_equal'
filtered = filtered <= $method.filter.filter_key.value
#else if $method.filter.filter_key.filter == 'greater'
filtered = filtered > $method.filter.filter_key.value
#else if $method.filter.filter_key.filter == 'greater_or_equal'
filtered = filtered >= $method.filter.filter_key.value
#end if
#else if $method.filter.filter_key.type == 'text'
#if $method.filter.filter_key.filter == 'equal'
filtered = filtered == '$method.filter.filter_key.value'
#else
filtered = filtered != '$method.filter.filter_key.value'
#end if
#else if $method.filter.filter_key.type == 'boolean'
filtered = filtered == $method.filter.filter_key.value
#end if
#else if $method.filter.filter == 'index'
#if str($method.filter.index.format) == 'file'
with open('$method.filter.index.file', 'r') as filter_f:
filters = [str(x.strip()) for x in filter_f.readlines()]
filtered = filters
#else
#set $filters = [str(x.strip()) for x in $method.filter.index.text.split(',')]
filtered = $filters
#end if
#end if
print(filtered)
#if $method.var_obs == 'var'
adata = adata[:,filtered]
#else if $method.var_obs == 'obs'
adata = adata[filtered, :]
#end if
@CMD_ANNDATA_WRITE_OUTPUTS@
#else if $method.method == 'tl.filter_rank_genes_groups'
sc.tl.filter_rank_genes_groups(
adata,
Expand Down Expand Up @@ -218,6 +270,7 @@ sc.pp.scrublet(
<param argument="method" type="select" label="Method used for filtering">
<option value="pp.filter_cells">Filter cell outliers based on counts and numbers of genes expressed, using 'pp.filter_cells'</option>
<option value="pp.filter_genes">Filter genes based on number of cells or counts, using 'pp.filter_genes'</option>
<option value="filter_any">Filter on any column of observations or variables</option>
<option value="tl.filter_rank_genes_groups">Filters out genes based on fold change and fraction of genes expressing the gene within and outside the groupby categories, using 'tl.filter_rank_genes_groups'</option>
<option value="pp.highly_variable_genes">Annotate (and filter) highly variable genes, using 'pp.highly_variable_genes'</option>
<option value="pp.subsample">Subsample to a fraction of the number of observations, using 'pp.subsample'</option>
Expand All @@ -234,16 +287,16 @@ sc.pp.scrublet(
<option value="max_genes">Maximum number of genes expressed</option>
</param>
<when value="min_counts">
<param argument="min_counts" type="integer" min="0" value="" label="Minimum number of counts required for a cell to pass filtering"/>
<param argument="min_counts" type="integer" min="0" value="0" label="Minimum number of counts required for a cell to pass filtering"/>
</when>
<when value="max_counts">
<param argument="max_counts" type="integer" min="0" value="" label="Maximum number of counts required for a cell to pass filtering"/>
<param argument="max_counts" type="integer" min="0" value="100000000" label="Maximum number of counts required for a cell to pass filtering"/>
</when>
<when value="min_genes">
<param argument="min_genes" type="integer" min="0" value="" label="Minimum number of genes expressed required for a cell to pass filtering"/>
<param argument="min_genes" type="integer" min="0" value="0" label="Minimum number of genes expressed required for a cell to pass filtering"/>
</when>
<when value="max_genes">
<param argument="max_genes" type="integer" min="0" value="" label="Maximum number of genes expressed required for a cell to pass filtering"/>
<param argument="max_genes" type="integer" min="0" value="100000000" label="Maximum number of genes expressed required for a cell to pass filtering"/>
</when>
</conditional>
</when>
Expand All @@ -256,16 +309,72 @@ sc.pp.scrublet(
<option value="max_cells">Maximum number of cells expressed</option>
</param>
<when value="min_counts">
<param argument="min_counts" type="integer" min="0" value="" label="Minimum number of counts required for a gene to pass filtering"/>
<param argument="min_counts" type="integer" min="0" value="" optional="true" label="Minimum number of counts required for a gene to pass filtering"/>
</when>
<when value="max_counts">
<param argument="max_counts" type="integer" min="0" value="" label="Maximum number of counts required for a gene to pass filtering"/>
<param argument="max_counts" type="integer" min="0" value="" optional="true" label="Maximum number of counts required for a gene to pass filtering"/>
</when>
<when value="min_cells">
<param argument="min_cells" type="integer" min="0" value="" label="Minimum number of cells expressed required for a gene to pass filtering"/>
<param argument="min_cells" type="integer" min="0" value="" optional="true" label="Minimum number of cells expressed required for a gene to pass filtering"/>
</when>
<when value="max_cells">
<param argument="max_cells" type="integer" min="0" value="" label="Maximum number of cells expressed required for a gene to pass filtering"/>
<param argument="max_cells" type="integer" min="0" value="" optional="true" label="Maximum number of cells expressed required for a gene to pass filtering"/>
</when>
</conditional>
</when>
<when value="filter_any">
<param name="var_obs" type="select" label="What to filter?">
<option value="var">Variables (var)</option>
<option value="obs">Observations (obs)</option>
</param>
<conditional name="filter">
<param name="filter" type="select" label="Type of filtering?">
<option value="key">By key (column) values</option>
<option value="index">By index (row)</option>
</param>
<when value="key">
<param name="key" type="text" value="n_genes" label="Key to filter"/>
<conditional name="filter_key">
<param name="type" type="select" label="Type of value to filter">
<option value="number">Number</option>
<option value="text">Text</option>
<option value="boolean">Boolean</option>
</param>
<when value="number">
<param name="filter" type="select" label="Filter">
<option value="equal">equal to</option>
<option value="not_equal">not equal to</option>
<option value="less">less than</option>
<option value="less_or_equal">less than or equal to</option>
<option value="greater">greater than</option>
<option value="greater_or_equal">greater than or equal to</option>
</param>
<param name="value" type="float" value="2500" label="Value"/>
</when>
<when value="text">
<param name="filter" type="select" label="Filter">
<option value="equal">equal to</option>
<option value="not_equal">not equal to</option></param>
<param name="value" type="text" value="2500" label="Value"/>
</when>
<when value="boolean">
<param name="value" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Value to keep"/>
</when>
</conditional>
</when>
<when value="index">
<conditional name="index">
<param name="format" type="select" label="Format for the filter by index">
<option value="file">File</option>
<option value="text" selected="true">Text</option>
</param>
<when value="text">
<param name="text" type="text" value="" label="List of index to keep" help="Indexes separated by a comma"/>
</when>
<when value="file">
<param name="file" type="data" format="txt" label="File with the list of index to keep" help="One index per line"/>
</when>
</conditional>
</when>
</conditional>
</when>
Expand Down Expand Up @@ -478,7 +587,60 @@ sc.pp.scrublet(
</output>
</test>

<!-- test 4 -->
<test expect_num_outputs="1">
<!-- test 4 -->
<param name="adata" value="krumsiek11.h5ad"/>
<conditional name="method">
<param name="method" value="filter_any"/>
<param name="var_obs" value="var"/>
<conditional name="filter">
<param name="filter" value="index"/>
<conditional name="index">
<param name="format" value="text"/>
<param name="text" value="Gata2,EKLF"/>
</conditional>
</conditional>
</conditional>
<assert_stdout>
<has_text_matching expression="640 × 2"/>
</assert_stdout>
<output name="anndata_out" ftype="h5ad">
<assert_contents>
<has_h5_keys keys="obs/cell_type"/>
<has_h5_keys keys="uns/highlights"/>
<has_h5_keys keys="uns/iroot"/>
</assert_contents>
</output>
</test>
<test expect_num_outputs="1">
<!-- test 5 -->
<param name="adata" value="krumsiek11.h5ad"/>
<conditional name="method">
<param name="method" value="filter_any"/>
<param name="var_obs" value="obs"/>
<conditional name="filter">
<param name="filter" value="key"/>
<param name="key" value="cell_type"/>
<conditional name="filter_key">
<param name="type" value="text"/>
<param name="filter" value="equal"/>
<param name="value" value="progenitor"/>
</conditional>
</conditional>
</conditional>
<assert_stdout>
<has_text_matching expression="320 × 11"/>
</assert_stdout>
<output name="anndata_out" ftype="h5ad">
<assert_contents>
<has_h5_keys keys="obs/cell_type"/>
<has_h5_keys keys="uns/highlights"/>
<has_h5_keys keys="uns/iroot"/>
</assert_contents>
</output>
</test>

<!-- test 6 -->
<!-- Fails to write to anndata after tl.filter_rank_genes_groups
Issue has been reported here: https://github.com/scverse/anndata/issues/726
The current fix is: del adata.uns['rank_genes_groups_filtered'] -->
Expand Down Expand Up @@ -511,7 +673,7 @@ sc.pp.scrublet(
</output>
</test>

<!-- test 5 -->
<!-- test 7 -->
<test expect_num_outputs="2">
<param name="adata" value="blobs.h5ad"/>
<conditional name="method">
Expand Down Expand Up @@ -539,7 +701,7 @@ sc.pp.scrublet(
</output>
</test>

<!-- test 6 -->
<!-- test 8 -->
<test expect_num_outputs="2">
<param name="adata" value="krumsiek11.h5ad"/>
<conditional name="method">
Expand Down Expand Up @@ -570,7 +732,7 @@ sc.pp.scrublet(
</output>
</test>

<!-- test 7 -->
<!-- test 9 -->
<test expect_num_outputs="2">
<param name="adata" value="krumsiek11.h5ad"/>
<conditional name="method">
Expand Down Expand Up @@ -600,8 +762,8 @@ sc.pp.scrublet(
</output>
</test>

<!-- test 8 -->
<test expect_num_outputs="2">
<!-- test 10 -->
<test expect_num_outputs="2">
<param name="adata" value="krumsiek11.h5ad"/>
<conditional name="method">
<param name="method" value="pp.subsample"/>
Expand Down Expand Up @@ -630,7 +792,7 @@ sc.pp.scrublet(
</output>
</test>

<!-- test 9 -->
<!-- test 11 -->
<test expect_num_outputs="2">
<param name="adata" value="random-randint.h5ad"/>
<conditional name="method">
Expand All @@ -657,7 +819,7 @@ sc.pp.scrublet(
</output>
</test>

<!-- test 10 -->
<!-- test 12 -->
<test expect_num_outputs="2">
<param name="adata" value="random-randint.h5ad"/>
<conditional name="method">
Expand Down Expand Up @@ -686,7 +848,7 @@ sc.pp.scrublet(
</output>
</test>

<!-- test 10 -->
<!-- test 13 -->
<test expect_num_outputs="2">
<param name="adata" value="cosg.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_1.h5ad"/>
<conditional name="method">
Expand Down Expand Up @@ -716,7 +878,7 @@ sc.pp.scrublet(
</output>
</test>

<!-- test 11 -->
<!-- test 14 -->
<test expect_num_outputs="2">
<param name="adata" value="krumsiek11.h5ad"/>
<conditional name="method">
Expand Down

0 comments on commit 637a6eb

Please sign in to comment.