From 38aaa401aaa837c6e596d2c10c38cad04249acbc Mon Sep 17 00:00:00 2001 From: Patrick Kunzmann Date: Thu, 26 Aug 2021 13:20:47 +0200 Subject: [PATCH 1/3] Allow combination of more than two queries --- src/biotite/database/rcsb/query.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/biotite/database/rcsb/query.py b/src/biotite/database/rcsb/query.py index 928ac4c49..58ebc036a 100644 --- a/src/biotite/database/rcsb/query.py +++ b/src/biotite/database/rcsb/query.py @@ -44,6 +44,13 @@ def get_content(self): """ pass + def __and__(self, query): + return CompositeQuery([self, query], "and") + + def __or__(self, query): + return CompositeQuery([self, query], "or") + + class SingleQuery(Query, metaclass=abc.ABCMeta): """ @@ -68,12 +75,6 @@ def get_content(self): "node_id": self._node_id, "parameters": {} } - - def __and__(self, query): - return CompositeQuery([self, query], "and") - - def __or__(self, query): - return CompositeQuery([self, query], "or") class CompositeQuery(Query): From 51d4f66eba1731f0a8f89f34d51a98d83d17179e Mon Sep 17 00:00:00 2001 From: Patrick Kunzmann Date: Thu, 26 Aug 2021 17:34:04 +0200 Subject: [PATCH 2/3] Fix #347 --- doc/tutorial/src/database.py | 3 ++- src/biotite/database/rcsb/query.py | 34 +++++++++++++++++++++++++----- tests/database/test_rcsb.py | 21 ++++++++++++++---- 3 files changed, 48 insertions(+), 10 deletions(-) diff --git a/doc/tutorial/src/database.py b/doc/tutorial/src/database.py index 3ffdaf379..e46188edd 100644 --- a/doc/tutorial/src/database.py +++ b/doc/tutorial/src/database.py @@ -81,7 +81,8 @@ # :class:`FieldQuery`. # A complete list of the available fields and its supported operators # is documented -# `on this page `_. +# `on this page `_ +# and `on that page `. # Query for 'lacA' gene query1 = rcsb.FieldQuery( diff --git a/src/biotite/database/rcsb/query.py b/src/biotite/database/rcsb/query.py index 58ebc036a..fad7f5c7c 100644 --- a/src/biotite/database/rcsb/query.py +++ b/src/biotite/database/rcsb/query.py @@ -103,6 +103,16 @@ def __init__(self, queries, operator): self._operator = operator def get_content(self): + """ + A dictionary representation of the query. + This dictionary is the content of the ``'query'`` key in the + JSON query. + + Returns + ------- + content : dict + The dictionary representation of the query. + """ content = { "type": "group", "logical_operator": self._operator, @@ -139,7 +149,7 @@ def __init__(self, term): def get_content(self): content = super().get_content() content["type"] = "terminal" - content["service"] = "text" + content["service"] = "full_text" content["parameters"]["value"] = f'"{self._term}"' return content @@ -161,6 +171,12 @@ class FieldQuery(SingleQuery): ---------- field : str The field to search in. + molecular_definition : bool, optional + If set true, this query searches in fields + associated with + `molecular definitions `_. + If false (default), this query searches in fields + associated with `PDB structures `_. exact_match : str, optional Operator for returning results whose field exactly matches the given value. @@ -184,7 +200,9 @@ class FieldQuery(SingleQuery): ----- A complete list of the available fields and its supported operators is documented at - ``_. + ``_ + and + ``_. Examples -------- @@ -193,10 +211,11 @@ class FieldQuery(SingleQuery): >>> print(sorted(search(query))) ['1EJG', '1I0T', '2GLT', '3NIR', '3P4J', '4JLJ', '5D8V', '5NW3'] """ - def __init__(self, field, **kwargs): + def __init__(self, field, molecular_definition=False, **kwargs): super().__init__() self._negation = False self._field = field + self._mol_definition = molecular_definition if len(kwargs) > 1: raise TypeError("Only one operator must be given") @@ -235,7 +254,10 @@ def __init__(self, field, **kwargs): def get_content(self): content = super().get_content() content["type"] = "terminal" - content["service"] = "text" + if self._mol_definition: + content["service"] = "text_chem" + else: + content["service"] = "text" content["parameters"]["attribute"] = self._field content["parameters"]["operator"] = self._operator content["parameters"]["negation"] = self._negation @@ -511,7 +533,9 @@ def search(query, return_type="entry", range=None, sort_by=None): If specified, the returned PDB IDs are sorted by the values of the given field name in descending order. A complete list of the available fields is documented at - ``_. + ``_. + and + ``_. Returns ------- diff --git a/tests/database/test_rcsb.py b/tests/database/test_rcsb.py index 8de76d2fb..702b8213f 100644 --- a/tests/database/test_rcsb.py +++ b/tests/database/test_rcsb.py @@ -67,20 +67,23 @@ def test_search_basic(): @pytest.mark.parametrize( - "field, params, ref_ids", + "field, molecular_definition, params, ref_ids", [ ( "pdbx_serial_crystallography_sample_delivery_injection.preparation", + False, {}, ["6IG7", "6IG6", "7JRI"] ), ( "audit_author.name", + False, {"is_in": ["Neidigh, J.W."]}, ["1JRJ", "1L2Y", "2O3P", "2O63", "2O64", "2O65"] ), ( "rcsb_entity_source_organism.rcsb_gene_name.value", + False, {"exact_match": "lacA"}, ["5JUV", "1KQA", "1KRV", "1KRU", "1KRR", "1TG7", "1XC6", "3U7V", "4IUG", "4LFK", "4LFL", "4LFM", "4LFN", "5IFP", "5IFT", "5IHR", @@ -88,28 +91,38 @@ def test_search_basic(): ), ( "struct.title", + False, {"contains_words": "tc5b"}, ["1L2Y"] ), ( "reflns.d_resolution_high", + False, {"less_or_equal": 0.6}, - ["1EJG", "1I0T", "3NIR", "3P4J", "5D8V", "5NW3", "4JLJ", "2GLT"] + ["1EJG", "1I0T", "3NIR", "3P4J", "5D8V", "5NW3", "4JLJ", "2GLT", + "7ATG"] ), ( "rcsb_entry_info.deposited_model_count", + False, {"range_closed": (60, 61)}, ["1BBO", "1GB1", "1O5P", "1XU6", "2LUM", "2NO8"] ), + ( + "rcsb_id", + True, + {"exact_match": "AIN"}, + ["1OXR", "1TGM", "3IAZ", "3GCL", "6MQF", "2QQT", "4NSB"] + ), ] ) @pytest.mark.skipif( cannot_connect_to(RCSB_URL), reason="RCSB PDB is not available" ) -def test_search_field(field, params, ref_ids): +def test_search_field(field, molecular_definition, params, ref_ids): query = rcsb.FieldQuery( - field, **params + field, molecular_definition, **params ) test_ids = rcsb.search(query) test_count = rcsb.count(query) From 14ec09dc3ebc204e5fb83803ea993c7296a1e819 Mon Sep 17 00:00:00 2001 From: Patrick Kunzmann Date: Fri, 27 Aug 2021 09:43:14 +0200 Subject: [PATCH 3/3] Update doctest --- src/biotite/database/rcsb/query.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/biotite/database/rcsb/query.py b/src/biotite/database/rcsb/query.py index fad7f5c7c..081ab7218 100644 --- a/src/biotite/database/rcsb/query.py +++ b/src/biotite/database/rcsb/query.py @@ -209,7 +209,7 @@ class FieldQuery(SingleQuery): >>> query = FieldQuery("reflns.d_resolution_high", less_or_equal=0.6) >>> print(sorted(search(query))) - ['1EJG', '1I0T', '2GLT', '3NIR', '3P4J', '4JLJ', '5D8V', '5NW3'] + ['1EJG', '1I0T', '2GLT', '3NIR', '3P4J', '4JLJ', '5D8V', '5NW3', '7ATG'] """ def __init__(self, field, molecular_definition=False, **kwargs): super().__init__() @@ -463,10 +463,10 @@ def count(query, return_type="entry"): >>> query = FieldQuery("reflns.d_resolution_high", less_or_equal=0.6) >>> print(count(query)) - 8 + 9 >>> ids = search(query) >>> print(sorted(ids)) - ['1EJG', '1I0T', '2GLT', '3NIR', '3P4J', '4JLJ', '5D8V', '5NW3'] + ['1EJG', '1I0T', '2GLT', '3NIR', '3P4J', '4JLJ', '5D8V', '5NW3', '7ATG'] """ if return_type not in [ "entry", "polymer_instance", "assembly", @@ -548,15 +548,15 @@ def search(query, return_type="entry", range=None, sort_by=None): >>> query = FieldQuery("reflns.d_resolution_high", less_or_equal=0.6) >>> print(sorted(search(query))) - ['1EJG', '1I0T', '2GLT', '3NIR', '3P4J', '4JLJ', '5D8V', '5NW3'] + ['1EJG', '1I0T', '2GLT', '3NIR', '3P4J', '4JLJ', '5D8V', '5NW3', '7ATG'] >>> print(search(query, sort_by="rcsb_accession_info.initial_release_date")) - ['5NW3', '5D8V', '4JLJ', '3P4J', '3NIR', '1I0T', '1EJG', '2GLT'] + ['7ATG', '5NW3', '5D8V', '4JLJ', '3P4J', '3NIR', '1I0T', '1EJG', '2GLT'] >>> print(search( ... query, range=(1,4), sort_by="rcsb_accession_info.initial_release_date" ... )) - ['5D8V', '4JLJ', '3P4J'] + ['5NW3', '5D8V', '4JLJ'] >>> print(sorted(search(query, return_type="polymer_instance"))) - ['1EJG.A', '1I0T.A', '1I0T.B', '2GLT.A', '3NIR.A', '3P4J.A', '3P4J.B', '4JLJ.A', '4JLJ.B', '5D8V.A', '5NW3.A'] + ['1EJG.A', '1I0T.A', '1I0T.B', '2GLT.A', '3NIR.A', '3P4J.A', '3P4J.B', '4JLJ.A', '4JLJ.B', '5D8V.A', '5NW3.A', '7ATG.A', '7ATG.B'] """ if return_type not in [ "entry", "polymer_instance", "assembly",