Skip to content

Commit

Permalink
Merge branch 'official-master' into issue-345
Browse files Browse the repository at this point in the history
  • Loading branch information
padix-key committed Aug 27, 2021
2 parents 3ba2ac5 + e147d65 commit e8b9ea1
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 23 deletions.
3 changes: 2 additions & 1 deletion doc/tutorial/src/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@
# :class:`FieldQuery`.
# A complete list of the available fields and its supported operators
# is documented
# `on this page <https://search.rcsb.org/search-attributes.html>`_.
# `on this page <https://search.rcsb.org/structure-search-attributes.html>`_
# and `on that page <https://search.rcsb.org/chemical-search-attributes.html>`.

# Query for 'lacA' gene
query1 = rcsb.FieldQuery(
Expand Down
61 changes: 43 additions & 18 deletions src/biotite/database/rcsb/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,13 @@ def get_content(self):
"""
pass

def __and__(self, query):
return CompositeQuery([self, query], "and")

def __or__(self, query):
return CompositeQuery([self, query], "or")



class SingleQuery(Query, metaclass=abc.ABCMeta):
"""
Expand All @@ -68,12 +75,6 @@ def get_content(self):
"node_id": self._node_id,
"parameters": {}
}

def __and__(self, query):
return CompositeQuery([self, query], "and")

def __or__(self, query):
return CompositeQuery([self, query], "or")


class CompositeQuery(Query):
Expand Down Expand Up @@ -102,6 +103,16 @@ def __init__(self, queries, operator):
self._operator = operator

def get_content(self):
"""
A dictionary representation of the query.
This dictionary is the content of the ``'query'`` key in the
JSON query.
Returns
-------
content : dict
The dictionary representation of the query.
"""
content = {
"type": "group",
"logical_operator": self._operator,
Expand Down Expand Up @@ -138,7 +149,7 @@ def __init__(self, term):
def get_content(self):
content = super().get_content()
content["type"] = "terminal"
content["service"] = "text"
content["service"] = "full_text"
content["parameters"]["value"] = f'"{self._term}"'
return content

Expand All @@ -160,6 +171,12 @@ class FieldQuery(SingleQuery):
----------
field : str
The field to search in.
molecular_definition : bool, optional
If set true, this query searches in fields
associated with
`molecular definitions <https://search.rcsb.org/chemical-search-attributes.html>`_.
If false (default), this query searches in fields
associated with `PDB structures <https://search.rcsb.org/structure-search-attributes.html>`_.
exact_match : str, optional
Operator for returning results whose field exactly matches the
given value.
Expand All @@ -183,19 +200,22 @@ class FieldQuery(SingleQuery):
-----
A complete list of the available fields and its supported operators
is documented at
`<https://search.rcsb.org/search-attributes.html>`_.
`<https://search.rcsb.org/structure-search-attributes.html>`_
and
`<https://search.rcsb.org/chemical-search-attributes.html>`_.
Examples
--------
>>> query = FieldQuery("reflns.d_resolution_high", less_or_equal=0.6)
>>> print(sorted(search(query)))
['1EJG', '1I0T', '2GLT', '3NIR', '3P4J', '4JLJ', '5D8V', '5NW3']
['1EJG', '1I0T', '2GLT', '3NIR', '3P4J', '4JLJ', '5D8V', '5NW3', '7ATG']
"""
def __init__(self, field, **kwargs):
def __init__(self, field, molecular_definition=False, **kwargs):
super().__init__()
self._negation = False
self._field = field
self._mol_definition = molecular_definition

if len(kwargs) > 1:
raise TypeError("Only one operator must be given")
Expand Down Expand Up @@ -234,7 +254,10 @@ def __init__(self, field, **kwargs):
def get_content(self):
content = super().get_content()
content["type"] = "terminal"
content["service"] = "text"
if self._mol_definition:
content["service"] = "text_chem"
else:
content["service"] = "text"
content["parameters"]["attribute"] = self._field
content["parameters"]["operator"] = self._operator
content["parameters"]["negation"] = self._negation
Expand Down Expand Up @@ -440,10 +463,10 @@ def count(query, return_type="entry"):
>>> query = FieldQuery("reflns.d_resolution_high", less_or_equal=0.6)
>>> print(count(query))
8
9
>>> ids = search(query)
>>> print(sorted(ids))
['1EJG', '1I0T', '2GLT', '3NIR', '3P4J', '4JLJ', '5D8V', '5NW3']
['1EJG', '1I0T', '2GLT', '3NIR', '3P4J', '4JLJ', '5D8V', '5NW3', '7ATG']
"""
if return_type not in [
"entry", "polymer_instance", "assembly",
Expand Down Expand Up @@ -510,7 +533,9 @@ def search(query, return_type="entry", range=None, sort_by=None):
If specified, the returned PDB IDs are sorted by the values
of the given field name in descending order.
A complete list of the available fields is documented at
`<https://search.rcsb.org/search-attributes.html>`_.
`<https://search.rcsb.org/structure-search-attributes.html>`_.
and
`<https://search.rcsb.org/chemical-search-attributes.html>`_.
Returns
-------
Expand All @@ -523,15 +548,15 @@ def search(query, return_type="entry", range=None, sort_by=None):
>>> query = FieldQuery("reflns.d_resolution_high", less_or_equal=0.6)
>>> print(sorted(search(query)))
['1EJG', '1I0T', '2GLT', '3NIR', '3P4J', '4JLJ', '5D8V', '5NW3']
['1EJG', '1I0T', '2GLT', '3NIR', '3P4J', '4JLJ', '5D8V', '5NW3', '7ATG']
>>> print(search(query, sort_by="rcsb_accession_info.initial_release_date"))
['5NW3', '5D8V', '4JLJ', '3P4J', '3NIR', '1I0T', '1EJG', '2GLT']
['7ATG', '5NW3', '5D8V', '4JLJ', '3P4J', '3NIR', '1I0T', '1EJG', '2GLT']
>>> print(search(
... query, range=(1,4), sort_by="rcsb_accession_info.initial_release_date"
... ))
['5D8V', '4JLJ', '3P4J']
['5NW3', '5D8V', '4JLJ']
>>> print(sorted(search(query, return_type="polymer_instance")))
['1EJG.A', '1I0T.A', '1I0T.B', '2GLT.A', '3NIR.A', '3P4J.A', '3P4J.B', '4JLJ.A', '4JLJ.B', '5D8V.A', '5NW3.A']
['1EJG.A', '1I0T.A', '1I0T.B', '2GLT.A', '3NIR.A', '3P4J.A', '3P4J.B', '4JLJ.A', '4JLJ.B', '5D8V.A', '5NW3.A', '7ATG.A', '7ATG.B']
"""
if return_type not in [
"entry", "polymer_instance", "assembly",
Expand Down
21 changes: 17 additions & 4 deletions tests/database/test_rcsb.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,49 +67,62 @@ def test_search_basic():


@pytest.mark.parametrize(
"field, params, ref_ids",
"field, molecular_definition, params, ref_ids",
[
(
"pdbx_serial_crystallography_sample_delivery_injection.preparation",
False,
{},
["6IG7", "6IG6", "7JRI"]
),
(
"audit_author.name",
False,
{"is_in": ["Neidigh, J.W."]},
["1JRJ", "1L2Y", "2O3P", "2O63", "2O64", "2O65"]
),
(
"rcsb_entity_source_organism.rcsb_gene_name.value",
False,
{"exact_match": "lacA"},
["5JUV", "1KQA", "1KRV", "1KRU", "1KRR", "1TG7", "1XC6", "3U7V",
"4IUG", "4LFK", "4LFL", "4LFM", "4LFN", "5IFP", "5IFT", "5IHR",
"4DUW", "5MGD", "5MGC"]
),
(
"struct.title",
False,
{"contains_words": "tc5b"},
["1L2Y"]
),
(
"reflns.d_resolution_high",
False,
{"less_or_equal": 0.6},
["1EJG", "1I0T", "3NIR", "3P4J", "5D8V", "5NW3", "4JLJ", "2GLT"]
["1EJG", "1I0T", "3NIR", "3P4J", "5D8V", "5NW3", "4JLJ", "2GLT",
"7ATG"]
),
(
"rcsb_entry_info.deposited_model_count",
False,
{"range_closed": (60, 61)},
["1BBO", "1GB1", "1O5P", "1XU6", "2LUM", "2NO8"]
),
(
"rcsb_id",
True,
{"exact_match": "AIN"},
["1OXR", "1TGM", "3IAZ", "3GCL", "6MQF", "2QQT", "4NSB"]
),
]
)
@pytest.mark.skipif(
cannot_connect_to(RCSB_URL),
reason="RCSB PDB is not available"
)
def test_search_field(field, params, ref_ids):
def test_search_field(field, molecular_definition, params, ref_ids):
query = rcsb.FieldQuery(
field, **params
field, molecular_definition, **params
)
test_ids = rcsb.search(query)
test_count = rcsb.count(query)
Expand Down

0 comments on commit e8b9ea1

Please sign in to comment.