From ad1beacff1e4aff6925ef047fd1c1b884cee883e Mon Sep 17 00:00:00 2001 From: Ivana Truong Date: Wed, 13 Nov 2024 14:44:54 -0500 Subject: [PATCH 01/12] add support for sequence coordinates api: alignments --- rcsbapi/config.py | 5 +- rcsbapi/const.py | 6 + rcsbapi/sequence/__init__.py | 13 + rcsbapi/sequence/query.py | 150 ++++++ rcsbapi/sequence/schema.py | 966 +++++++++++++++++++++++++++++++++++ 5 files changed, 1139 insertions(+), 1 deletion(-) create mode 100644 rcsbapi/sequence/__init__.py create mode 100644 rcsbapi/sequence/query.py create mode 100644 rcsbapi/sequence/schema.py diff --git a/rcsbapi/config.py b/rcsbapi/config.py index 380ae96..2dccfad 100644 --- a/rcsbapi/config.py +++ b/rcsbapi/config.py @@ -19,9 +19,12 @@ class Config: DATA_API_TIMEOUT: int = 60 - SEARCH_API_REQUESTS_PER_SECOND: int = 10 SUPPRESS_AUTOCOMPLETE_WARNING: bool = False + SEARCH_API_REQUESTS_PER_SECOND: int = 10 + + SEQ_API_TIMEOUT: int = 60 + def __setattr__(self, name, value): """Verify attribute exists when a user tries to set a configuration parameter, and ensure proper typing. Raises an error if user accidentally tries to create a new, unused attribute (e.g., due to a typo or misspelling), diff --git a/rcsbapi/const.py b/rcsbapi/const.py index 0218edf..db1ed87 100644 --- a/rcsbapi/const.py +++ b/rcsbapi/const.py @@ -97,4 +97,10 @@ class Const: }) +@dataclass(frozen=True) +class SeqConst: + API_ENDPOINT: str = "https://sequence-coordinates.rcsb.org/graphql" + + +seq_const = SeqConst() const = Const() diff --git a/rcsbapi/sequence/__init__.py b/rcsbapi/sequence/__init__.py new file mode 100644 index 0000000..690a759 --- /dev/null +++ b/rcsbapi/sequence/__init__.py @@ -0,0 +1,13 @@ +"""RCSB PDB Sequence Coordinates API""" +from .schema import CoordSchema + +COORD_SCHEMA = CoordSchema() + +from .query import alignments, annotations, AnnotationFilterInput # noqa:E402 (ignore that import is not at top) + +__all__ = [ + "CoordSchema", + "alignments", + "annotations", + "AnnotationFilterInput", +] diff --git a/rcsbapi/sequence/query.py b/rcsbapi/sequence/query.py new file mode 100644 index 0000000..1caae2e --- /dev/null +++ b/rcsbapi/sequence/query.py @@ -0,0 +1,150 @@ +from typing import Dict, Literal, List, Any, Optional +from types import MappingProxyType +from abc import ABC, abstractmethod +from dataclasses import dataclass, fields, is_dataclass +import requests + +from rcsbapi.const import seq_const +from rcsbapi.config import config +from rcsbapi.sequence import COORD_SCHEMA + +# pylint: disable=useless-parent-delegation +# This should be dynamically populated at some point +SequenceReference = Literal["NCBI_GENOME", "NCBI_PROTEIN", "PDB_ENTITY", "PDB_INSTANCE", "UNIPROT"] +FieldName = Literal["TARGET_ID", "TYPE"] +OperationType = Literal["CONTAINS", "EQUALS"] +AnnotationReference = Literal["PDB_ENTITY", "PDB_INSTANCE", "PDB_INTERFACE", "UNIPROT"] + + +@dataclass(frozen=True) +class Query(ABC): + """Base class for all query types""" + + @abstractmethod + def to_dict(self) -> Dict: + """Get dictionary represented query and attributes, skips values of None""" + request_dict: Dict = {} + for field in fields(self): + field_name = field.name + field_value = getattr(self, field_name) + field_name = field_name.replace("_", "") + if field_value: + if is_dataclass(field_value): + field_value = field_value.to_dict() + request_dict[field_name] = field_value + return request_dict + + @abstractmethod + def exec(self) -> Dict: + """execute query and return JSON response""" + + def _parse_gql_error(self, response_json: Dict[str, Any]): + if "error" in response_json.keys(): + raise requests.HTTPError( + f"Status code {response_json["status"]} {response_json["error"]}:\n" + f" Run .get_editor_link() to get a link to GraphiQL editor with query" + ) + + def get_editor_link(self): # TODO + pass + + +@dataclass(frozen=True) +class alignments(Query): + """ + sequence alignments + from_ (SequenceReference): From which query sequence database + to (SequenceReference): To which query sequence database + queryId (str): Database sequence identifier + return_data_list (List[str]): requested data fields + range (Optional, List[]) + """ + from_: SequenceReference # python keyword:( Is this the best way? + to: SequenceReference + queryId: str + return_data_list: List[str] + range: Optional[List[int]] = None + suppress_autocomplete_warning: bool = False + _query: MappingProxyType = MappingProxyType({}) + + def to_dict(self) -> Dict: + return super().to_dict() + + def __post_init__(self): + query = COORD_SCHEMA.construct_query( + query_type="alignments", + query_args=self.to_dict(), + return_data_list=self.return_data_list, + suppress_autocomplete_warning=self.suppress_autocomplete_warning, + ) + object.__setattr__( + self, + "_query", + query, + ) + + def exec(self) -> Dict: + response_json = requests.post( + json=dict(self._query), + url=seq_const.API_ENDPOINT, + timeout=config.DATA_API_TIMEOUT + ).json() + self._parse_gql_error(response_json) + return response_json + + +@dataclass(frozen=True) +class annotations(Query): + queryId: str + sources: List[AnnotationReference] + reference: SequenceReference + return_data_list: List[str] + filters: Optional["AnnotationFilterInput"] = None + range: Optional[List[int]] = None + suppress_autocomplete_warning: bool = False + _query: MappingProxyType = MappingProxyType({}) + + def to_dict(self) -> Dict: + return super().to_dict() + + def __post_init__(self): + query = COORD_SCHEMA.construct_query( + query_type="annotations", + query_args=self.to_dict(), + return_data_list=self.return_data_list, + suppress_autocomplete_warning=self.suppress_autocomplete_warning, + ) + object.__setattr__( + self, + "_query", + query, + ) + print(query) + + def exec(self) -> Dict: + response_json = requests.post( + json=dict(self._query), + url=seq_const.API_ENDPOINT, + timeout=config.DATA_API_TIMEOUT + ).json() + self._parse_gql_error(response_json) + return response_json + + +@dataclass(frozen=True) +class AnnotationFilterInput: + field: FieldName + operation: OperationType + source: AnnotationReference + values: List[str] + + def to_dict(self) -> Dict: + return { + "field": self.field, + "operation": self.operation, + "source": self.source, + "values": self.values, + } + + def to_string(self) -> Dict: + pass diff --git a/rcsbapi/sequence/schema.py b/rcsbapi/sequence/schema.py new file mode 100644 index 0000000..5e9b469 --- /dev/null +++ b/rcsbapi/sequence/schema.py @@ -0,0 +1,966 @@ +import logging +import json +from typing import List, Dict, Union, Any, Optional +import os +import requests +from graphql import build_client_schema +import rustworkx as rx + +from rcsbapi.const import seq_const +from rcsbapi.config import config + +use_networkx: bool = False +# Below section and parts of code involving networkx are commented out +# May implement graph construction through networkx at a later point +# try: +# import rustworkx as rx + +# logging.info("Using rustworkx") +# except ImportError: +# use_networkx = True + +logger = logging.getLogger(__name__) + + +class FieldNode: + """ + Node representing GraphQL field + name (str): field name + description (str): field description + redundant (bool): whether field name is redundant in schema + kind (str): "LIST", "SCALAR, or "OBJECT" + of_kind (str): If "LIST", whether list of "SCALAR" or "OBJECT" + type (str): GraphQL schema type (ex: CoreEntry) + index (int): graph index + """ + + def __init__(self, kind: str, node_type: str, name: str, description: str): + """Initialize FieldNodes + + Args: + kind (str): GraphQL kind, can be "OBJECT", "SCALAR", "LIST" + node_type (str): If applicable, the GraphQL type returned by the field + name (str): Name of field + description (str): Description of field + """ + self.name: str = name + self.description: str = description + self.redundant: bool = False + self.kind: str = kind + self.of_kind: str = "" + self.type: str = node_type + self.index: Optional[int] = None + + def __str__(self) -> str: + return f"Field Object name: {self.name}, Kind: {self.kind}, Type: {self.type}, Index if set: {self.index}, Description: {self.description}" + + def set_index(self, index: int): + """set index that is associated with the FieldNode + + Args: + index (int): index of node in schema_graph + """ + self.index = index + + def set_of_kind(self, of_kind: str): + """Only applicable if kind is LIST. Describes the GraphQL kind of the list (OBJECT, SCALAR) + + Args: + of_kind (str): GraphQL kind of the list returned by a node (a LIST can be "of_kind" OBJECT) + """ + self.of_kind = of_kind + + +class TypeNode: + """ + Class for nodes representing GraphQL Types in the schema graph. + """ + + def __init__(self, name: str): + """Initialize TypeNodes + + Args: + name (str): name of GraphQL type (ex: CoreEntry) + """ + self.name = name + self.index: Optional[int] = None + self.field_list: List[FieldNode] = [] + + def set_index(self, index: int): + """set index that is associated with the TypeNode + + Args: + index (int): index of node in schema_graph + """ + self.index = index + + def set_field_list(self, field_list: List[FieldNode]): + """List of FieldNodes associated with the GraphQL type + + Args: + field_list (Union[None, List[FieldNode]]): list of FieldNodes + """ + self.field_list = field_list + + +class CoordSchema: + """ + GraphQL schema defining available fields, types, and how they are connected. + """ + + def __init__(self) -> None: + """ + GraphQL schema defining available fields, types, and how they are connected. + """ + self.pdb_url: str = seq_const.API_ENDPOINT + self.timeout: int = config.DATA_API_TIMEOUT # TODO: change? + self.schema: Dict = self.fetch_schema() + """JSON resulting from full introspection of the GraphQL schema""" + + self._use_networkx: bool = use_networkx + # if use_networkx: + # self._schema_graph = nx.DiGraph() + # """NetworkX graph representing the GraphQL schema""" + # else: + # self._schema_graph = rx.PyDiGraph() + # """rustworkx graph representing the GraphQL schema""" + + self._type_to_idx_dict: Dict[str, int] = {} + self._field_to_idx_dict: Dict[str, List[int]] = {} + """Dict where keys are field names and values are lists of indices. + Indices of redundant fields are appended to the list under the field name. (ex: {id: [[43, 116, 317...]})""" + self._root_introspection = self._request_root_types() + """Request root types of the GraphQL schema and their required arguments""" + self._client_schema = build_client_schema(self.schema["data"]) + """GraphQLSchema object from graphql package, used for query validation""" + self._type_fields_dict: Dict[str, Dict] = self._construct_type_dict() + """Dict where keys are type names and the values are their associated fields""" + self._field_names_list = self._construct_name_list() + """list of all field names""" + self._root_dict: Dict[str, List[Dict[str, str]]] = self._construct_root_dict() + self._schema_graph: rx.PyDiGraph = rx.PyDiGraph() + self._schema_graph = self._recurse_build_schema(self._schema_graph, "Query") + self._root_to_idx: Dict[str, int] = self._make_root_to_idx() + self._field_names_list = self._construct_name_list() + """Dict where keys are field names and values are indices. Redundant field names are represented as . (ex: {entry.id: 1452})""" + + def _request_root_types(self) -> Dict: + """Make an introspection query to get information about schema's root types + + Returns: + Dict: JSON response of introspection request + """ + root_query = {"query": """query IntrospectionQuery{ __schema{ queryType{ fields{ name args + { name description type{ ofType{ name kind ofType{ inputFields {name type { ofType { kind ofType { ofType { kind name } } } } } kind name ofType{ name kind + } } } } } } } } }"""} + response = requests.post(headers={"Content-Type": "application/json"}, json=root_query, url=self.pdb_url, timeout=self.timeout) + return response.json() + + def _construct_root_dict(self) -> Dict[str, List[Dict[str, str]]]: + """Build a dictionary to organize information about schema root types. + + Returns: + Dict[str, List[Dict]]: Dict where keys are the type names. + Values are lists of dictionaries with information about arguments. + + ex: {"entry": [{'name': 'entry_id', 'description': '', 'kind': 'SCALAR', 'type': 'String'}]} + """ + response = self._root_introspection + root_dict: Dict[str, List[Dict[str, str]]] = {} + root_fields_list = response["data"]["__schema"]["queryType"]["fields"] + for name_arg_dict in root_fields_list: + root_name = name_arg_dict["name"] + arg_dict_list = name_arg_dict["args"] + for arg_dict in arg_dict_list: + arg_name = arg_dict["name"] + arg_description = arg_dict["description"] + arg_kind = arg_dict["type"]["ofType"]["kind"] + arg_of_kind = "" + arg_of_kind_name = "" + if arg_kind == 'LIST': + arg_of_kind = arg_dict["type"]["ofType"]["ofType"]["kind"] + arg_of_kind_name = arg_dict["type"]["ofType"]["ofType"]["name"] + arg_type = self._find_type_name(arg_dict["type"]["ofType"]) + if root_name not in root_dict: + root_dict[root_name] = [] + root_dict[root_name].append({ + "name": arg_name, + "description": arg_description, + "type": arg_type, + "kind": arg_kind, + "of_kind": arg_of_kind, + "of_kind_name": arg_of_kind_name + }) + return root_dict + + def fetch_schema(self) -> Dict: + """Make an introspection query to get full Data API query. + Can also be found in resources folder as "data_api_schema.json" + + Returns: + Dict: JSON response of introspection request + """ + query = { + "query": """query IntrospectionQuery { __schema + { queryType { name } types { kind name description fields(includeDeprecated: true) + { name description args { name description type { kind name ofType { kind name ofType + { kind name ofType { kind name ofType { kind name ofType { kind name ofType { kind name ofType + { kind name } } } } } } } } defaultValue } type { kind name ofType { kind name ofType { kind name + ofType { kind name ofType { kind name ofType { kind name ofType { kind name ofType { kind name } } } } } } } } + isDeprecated deprecationReason } inputFields { name description type { kind name ofType + { kind name ofType { kind name ofType { kind name ofType { kind name ofType { kind name ofType + { kind name ofType { kind name } } } } } } } } defaultValue } interfaces { kind name ofType + { kind name ofType { kind name ofType { kind name ofType { kind name ofType { kind name ofType { kind name ofType + { kind name } } } } } } } } enumValues(includeDeprecated: true) { name description isDeprecated deprecationReason } + possibleTypes { kind name ofType { kind name ofType { kind name ofType { kind name ofType { kind name ofType + { kind name ofType { kind name ofType { kind name } } } } } } } } } directives { name description locations args + { name description type { kind name ofType { kind name ofType { kind name ofType { kind name ofType + { kind name ofType { kind name ofType { kind name ofType { kind name } } } } } } } } defaultValue } } }}""" + } + schema_response = requests.post(headers={"Content-Type": "application/json"}, json=query, url=self.pdb_url, timeout=self.timeout) + if schema_response.status_code == 200: + return schema_response.json() + logger.info("Loading data schema from file") + current_dir = os.path.dirname(os.path.abspath(__file__)) + json_file_path = os.path.join(current_dir, "../", "resources", "data_api_schema.json") + with open(json_file_path, "r", encoding="utf-8") as schema_file: + return json.load(schema_file) + + def _construct_type_dict(self) -> Dict[str, Dict[str, Dict[str, str]]]: + """Construct dictionary of GraphQL types and their associated fields. + + Args: + schema (Dict): GraphQL schema + + Returns: + Dict[str, Dict[str, Dict[str, str]]]: Dict where keys are GraphQL types and values are lists of field names + """ + all_types_dict: Dict = self.schema["data"]["__schema"]["types"] + type_fields_dict = {} + for each_type_dict in all_types_dict: + type_name = str(each_type_dict["name"]) + fields = each_type_dict["fields"] + field_dict = {} + if fields is not None: + for field in fields: + field_dict[str(field["name"])] = dict(field["type"]) + type_fields_dict[type_name] = field_dict + return type_fields_dict + + def _construct_name_list(self) -> List[str]: + """construct a list of all field names in the schema. + Used to determine whether a redundant field and if a field is known. + + Returns: + List[str]: list of all fields + """ + field_names_list = [] + for type_name, field_dict in self._type_fields_dict.items(): + if "__" in type_name: + continue + for field_name in field_dict.keys(): + field_names_list.append(field_name) + return field_names_list + + def make_type_subgraph(self, type_name: str) -> TypeNode: + """Make a subgraph of only one type and its associated fields + + Args: + type_name (str): name of the type for which to construct subgraph + + Returns: + TypeNode: returns TypeNode constructed from type_name + """ + field_name_list = self._type_fields_dict[type_name].keys() + field_node_list = [] + type_node = self._make_type_node(type_name) + for field_name in field_name_list: + parent_type_name = type_name + field_node = self._make_field_node(parent_type_name, field_name) + field_node_list.append(field_node) + type_node.set_field_list(field_node_list) + return type_node + + def _recurse_build_schema(self, schema_graph: rx.PyDiGraph, type_name: str) -> rx.PyDiGraph: + """Build the API schema by iterating through the fields of the given type + and building subgraphs for each one recursively until a scalar (leaf) is reached + + Args: + schema_graph (rx.PyDiGraph): graph object to build into + type_name (str): name of type whose fields will be iterated through + + Returns: + rx.PyDiGraph: returns complete schema graph object + """ + type_node = self.make_type_subgraph(type_name) + for field_node in type_node.field_list: + assert isinstance(field_node.index, int) # for mypy + if field_node.kind == "SCALAR" or field_node.of_kind == "SCALAR": + continue + else: + type_name = field_node.type + if type_name in self._type_to_idx_dict: + type_index = self._type_to_idx_dict[type_name] + if use_networkx: + schema_graph.add_edge(field_node.index, type_index, 1) + else: + schema_graph.add_edge(field_node.index, type_index, 1) + else: + self._recurse_build_schema(schema_graph, type_name) + type_index = self._type_to_idx_dict[type_name] + # if self._use_networkx: + # schema_graph.add_edge(field_node.index, type_index, 1) + if self._use_networkx is False: + schema_graph.add_edge(field_node.index, type_index, 1) + return schema_graph + + # def _apply_weights(self, root_type_list: List[str], weight: int) -> None: + # """applies weight to all edges from a root TypeNode to FieldNodes + + # Args: + # root_type_list (List[str]): list of root fields to apply weights to + # ex: "CoreEntry", "CoreAssembly" + # weight (int): integer weight to apply to edges from specified type(s) + # """ + # for root_type in root_type_list: + # node_idx = self._type_to_idx_dict[root_type] + # if use_networkx is False: + # assert isinstance(self._schema_graph, rx.PyDiGraph) + # out_edge_list = self._schema_graph.incident_edges(node_idx) + # for edge_idx in out_edge_list: + # self._schema_graph.update_edge_by_index(edge_idx, weight) + # # else: + # # out_edge_list = self._schema_graph.edges(node_idx) + # # nx.set_edge_attributes( + # # self._schema_graph, + # # {edge_tuple: {"weight": weight} for edge_tuple in out_edge_list} + # # ) + + def _make_type_node(self, type_name: str) -> TypeNode: + type_node = TypeNode(type_name) + # if self._use_networkx: + # index = len(self._schema_graph.nodes) + # self._schema_graph.add_node(index, type_node=type_node) + # if self._use_networkx is False: + index = self._schema_graph.add_node(type_node) + self._type_to_idx_dict[type_name] = index + type_node.set_index(index) + return type_node + + def _find_kind(self, field_dict: Dict) -> str: + if field_dict["name"] is not None: + return field_dict["kind"] + return self._find_kind(field_dict["ofType"]) + + def _find_type_name(self, field_dict: Dict) -> str: + if field_dict: + if field_dict["name"] is not None: + return field_dict["name"] + return self._find_type_name(field_dict["ofType"]) + return "" + + def _find_description(self, type_name: str, field_name: str) -> str: + for type_dict in self.schema["data"]["__schema"]["types"]: + if type_dict["name"] == type_name: + for field in type_dict["fields"]: + if field["name"] == field_name: + return field["description"] + return "" + + def _make_field_node(self, parent_type: str, field_name: str) -> FieldNode: + kind = self._type_fields_dict[parent_type][field_name]["kind"] + field_type_dict: Dict = self._type_fields_dict[parent_type][field_name] + return_type = self._find_type_name(field_type_dict) + description = self._find_description(parent_type, field_name) + field_node = FieldNode(kind, return_type, field_name, description) + assert field_node.type is not None + if kind == "LIST" or kind == "NON_NULL": + of_kind = self._find_kind(field_type_dict) + field_node.set_of_kind(of_kind) + parent_type_index = self._type_to_idx_dict[parent_type] + # if self._use_networkx: + # index = len(self._schema_graph.nodes + # self._schema_graph.add_node(index, field_node=field_node) + # self._schema_graph.add_edge(parent_type_index, index, weight=1) + # if self._use_networkx is False: + if field_node.kind == "SCALAR" or field_node.of_kind == "SCALAR": + index = self._schema_graph.add_child(parent_type_index, field_node, 1) + else: + index = self._schema_graph.add_child(parent_type_index, field_node, 1) + if self._field_names_list.count(field_name) > 1: + field_node.redundant = True + field_node.set_index(index) + + assert isinstance(field_node.index, int) # for mypy + if field_name not in self._field_to_idx_dict: + self._field_to_idx_dict[field_name] = [field_node.index] + else: + self._field_to_idx_dict[field_name].append(field_node.index) + + return field_node + + def _make_root_to_idx(self) -> Dict[str, int]: + root_to_idx: Dict[str, int] = {} + # Assumes 0 is the index for root Query node. + # Remains true as long as graph building starts from there + for root_node in self._schema_graph.successors(0): + root_to_idx[root_node.name] = root_node.index + return root_to_idx + + def get_input_id_dict(self, input_type: str) -> Dict[str, str]: + if input_type not in self._root_dict.keys(): + raise ValueError("Not a valid input_type, no available input_id dictionary") + root_dict_entry = self._root_dict[input_type] + input_dict = {} + for arg in root_dict_entry: + name = arg["name"] + description = arg["description"] + if (len(root_dict_entry) == 1) and root_dict_entry[0]["name"] == "entry_id": + description = "ID" + input_dict[name] = description + return input_dict + + def _recurse_fields(self, fields: Dict[Any, Any], field_map: Dict[Any, Any]) -> str: + query_str = "" + for target_idx, idx_path in fields.items(): + mapped_path = field_map.get(target_idx, [target_idx]) + mapped_path = mapped_path[: mapped_path.index(target_idx) + 1] # Only take the path up to the field itself + for idx, subfield in enumerate(mapped_path): + query_str += " " + self._idx_to_name(subfield) + if idx < len(mapped_path) - 1 or (isinstance(idx_path, list) and idx_path): + query_str += "{ " + else: + query_str += " " + if isinstance(idx_path, list): + if idx_path: # Only recurse if the list is not empty + for item in idx_path: + if isinstance(item, dict): + query_str += self._recurse_fields(item, field_map) + else: + query_str += " " + self._idx_to_name(item) + else: + query_str += " " + idx_path + for idx, subfield in enumerate(mapped_path): + if idx < len(mapped_path) - 1 or (isinstance(idx_path, list) and idx_path): + query_str += " " + "} " + return query_str + + def _get_descendant_fields(self, node_idx: int, field_name: str, visited=None) -> List[Union[int, Dict]]: + if visited is None: + visited = set() + + result: List[Union[int, Dict]] = [] + children_idx = list(self._schema_graph.neighbors(node_idx)) + + for idx in children_idx: + if idx in visited: + raise ValueError(f"{field_name} in return_data_list is too general, unable to autocomplete query.\n" "Please request a more specific field.") + + visited.add(idx) + child_data = self._schema_graph[idx] + assert isinstance(child_data.index, int) # for mypy + + if isinstance(child_data, FieldNode): + child_descendants = self._get_descendant_fields(idx, field_name, visited) + # If further subfields append as dictionary. ex: {field index: [subfield1, subfield2, ...]} + if child_descendants: + result.append({child_data.index: child_descendants}) + # If scalar, append index + else: + result.append(child_data.index) + elif isinstance(child_data, TypeNode): + type_descendants = self._get_descendant_fields(idx, field_name, visited) + # If further subfields, append the list of descendants (indices and index dicts) + if type_descendants: + result.extend(type_descendants) + # Skips appending if no further subfields (ENUMS) + return result + + def find_field_names(self, search_string: str) -> List[str]: + """find field names that fully or partially match the search string + + Args: + search_string (str): string to search field names for + + Raises: + ValueError: thrown when a type other than string is passed in for search_string + ValueError: thrown when no fields match search_string + + Returns: + List[str]: list of matching field names + """ + if not isinstance(search_string, str): + raise ValueError(f"Please input a string instead of {type(search_string)}") + field_names = [key for key in self._field_to_idx_dict if search_string.lower() in key.lower()] + if not field_names: + raise ValueError(f"No fields found matching '{search_string}'") + return field_names + + def construct_query( + self, + query_type: str, + query_args: Union[Dict[str, Dict], Dict[str, str]], + return_data_list: List[str], + suppress_autocomplete_warning=False + ) -> Dict: + unknown_return_list: List[str] = [] + for field in return_data_list: + if "." in field: + separate_fields = field.split(".") + for sep_field in separate_fields: + if sep_field not in self._field_names_list: + unknown_return_list.append(sep_field) + else: + if field not in self._field_names_list: + unknown_return_list.append(field) + if unknown_return_list: + raise ValueError(f"Unknown item in return_data_list: {unknown_return_list}") + # if use_networkx: + # query = self._construct_query_networkx( + # input_type=input_type, + # input_ids=input_ids, + # return_data_list=return_data_list, + # suppress_autocomplete_warning=suppress_autocomplete_warning + # ) + # else: + # query = self._construct_query_rustworkx( + # input_type=input_type, + # input_ids=input_ids, + # return_data_list=return_data_list, + # add_rcsb_id=add_rcsb_id, + # suppress_autocomplete_warning=suppress_autocomplete_warning + # ) + query = self._construct_query_rustworkx( + query_type=query_type, + query_args=query_args, + return_data_list=return_data_list, + suppress_autocomplete_warning=suppress_autocomplete_warning + ) + return query + + # def _construct_query_networkx( + # self, + # input_type: str, + # input_ids: Union[Dict[str, str], List[str]], + # return_data_list: List[str], + # add_rcsb_id: bool, + # suppress_autocomplete_warning: bool + # ): # Incomplete function + # query = "" + # return query + + def _construct_query_rustworkx( + self, + query_type: str, + query_args: Union[Dict[str, Dict], Dict[str, str]], + return_data_list: List[str], + suppress_autocomplete_warning: bool = False, + ) -> Dict: + """Construct a GraphQL query as JSON using a rustworkx graph. + + Args: + input_ids (Union[List[str], Dict[str, str], Dict[str, List[str]]]): identifying information for the specific entry, chemical component, etc to query + input_type (str): specifies where you are starting your query. These are specific fields like "entry" or "polymer_entity_instance". + return_data_list (List[str]): requested data, can be field name(s) or dot-separated field names + ex: "cluster_id" or "exptl.method" + + Raises: + ValueError: input_ids dictionary keys don't match the input_type given + ValueError: input_ids dictionary keys missing + ValueError: input_ids dictionary value should be a string, but another type was passed in + ValueError: field in return_data_list exists, but is a redundant name and needs to be further specified + ValueError: path in return_data_list exists, but is a redundant and needs to be further specified + + Returns: + str: query in GraphQL syntax + """ + arg_list = self._root_dict[query_type] + # arg_name_list = [id["name"] for id in arg_list] # might need to revert back to this + + # # Check formatting of input_ids + # input_dict: Union[Dict[str, str], Dict[str, List[str]]] = {} + + # if isinstance(input_ids, Dict): + # input_dict = input_ids + # if not all(key in arg_name_list for key in input_dict.keys()): + # raise ValueError(f"Input IDs keys do not match: {input_dict.keys()} vs {arg_name_list}") + # missing_keys = [key_arg for key_arg in arg_name_list if key_arg not in input_dict] + # if len(missing_keys) > 0: + # raise ValueError( + # f"Missing input_id dictionary keys: {missing_keys}. Find input_id keys and descriptions by running:\n" + # f" from rcsbapi.data import Schema\n" + # f" schema = Schema()\n" + # f' schema.get_input_id_dict("{input_type}")' + # ) + # attr_kind = {attr["name"]: attr["kind"] for attr in attr_list} + # for key, value in input_dict.items(): + # if attr_kind[key] == "SCALAR": + # if not isinstance(value, str): + # raise ValueError(f"Input ID for {key} should be a single string") + # elif attr_kind[key] == "LIST": + # if not isinstance(value, list): + # raise ValueError(f"Input ID for {key} should be a list of strings") + # if not all(isinstance(item, str) for item in value): + # raise ValueError(f"Input ID for {key} should be a list of strings") + + start_node_index = self._root_to_idx[query_type] + + return_data_paths: Dict[int, List[List[int]]] = {} + complete_path: int = 0 + + for field in return_data_list: + # Generate list of all possible paths to the final requested field. Try to find matching sequence to user input. + path_list = field.split(".") + possible_paths = self.find_paths(query_type, path_list[-1]) + matching_paths: List[str] = [] + for path in possible_paths: + possible_path_list = path.split(".") + possible_path_list.insert(0, str(query_type)) + + # If there is an exact path match, + # the path is fully specified and other possible_paths can be removed and loop can stop. + # Iterate complete path, so warning can be raised if autocompletion is used + path_list_with_input = [query_type] + path_list + if (possible_path_list == path_list) or (possible_path_list == path_list_with_input): + matching_paths = [".".join(possible_path_list)] + complete_path += 1 + break + # Else, check for matching path segments. + else: + for i in range(len(possible_path_list)): + if possible_path_list[i: i + len(path_list)] == path_list: + matching_paths.append(".".join(possible_path_list)) + + idx_paths: List[List[int]] = [] + if len(matching_paths) > 0: + for path in matching_paths: + idx_paths.extend(self._parse_dot_path(path)) + + # remove paths not beginning with input_type + full_idx_paths: List[List[int]] = list(idx_paths) + input_type_idx = self._root_to_idx[query_type] + for path in idx_paths: + if path[0] != input_type_idx: + full_idx_paths.remove(path) + idx_paths = full_idx_paths + + if len(idx_paths) > 1: + # Print error message that doesn't include input_type at beginning + # But keep input_type in matching_paths for query construction reasons + path_choice_msg = " " + "\n ".join([".".join(path.split(".")[1:]) for path in matching_paths[:10]]) + if len(matching_paths) > 10: + len_path = 10 + else: + len_path = len(matching_paths) + + if len(matching_paths) > 10: + raise ValueError( + f'Given path "{field}" not specific enough. Use one or more of these paths in return_data_list argument:\n\n' + f"{len_path} of {len(matching_paths)} possible paths:\n" + f"{path_choice_msg}" + f"\n ...\n\n" + f"For all paths run:\n" + f" from rcsbapi.data import Schema\n" + f" schema = Schema()\n" + f' schema.find_paths("{query_type}", "{path_list[-1]}")' + ) + + raise ValueError( + f'Given path "{field}" not specific enough. Use one or more of these paths in return_data_list argument:\n\n' + f"{len_path} of {len(matching_paths)} possible paths:\n" + f"{path_choice_msg}" + ) + + # If path isn't in possible_paths_list, try using the graph to validate the path. Allows for queries with loops and paths that have repeated nodes. + if len(idx_paths) == 0: + possible_dot_paths: List[List[int]] = self._parse_dot_path(field) # Throws an error if path is invalid + shortest_full_paths: List[List[int]] = self._compare_paths(start_node_index, possible_dot_paths) + assert len(shortest_full_paths) != 0 + if len(shortest_full_paths) > 1: + shortest_name_paths = [".".join([self._idx_to_name(idx) for idx in path[1:] if isinstance(self._schema_graph[idx], FieldNode)]) for path in shortest_full_paths] + shortest_name_paths.sort() + path_choice_msg = "" + for name_path in shortest_name_paths: + path_choice_msg += " " + name_path + "\n" + raise ValueError( + "Given path not specific enough. Use one or more of these paths in return_data_list argument:\n\n" + f"{path_choice_msg}\n" + "Please note that this list may not be complete. " + "If looking for a different path, you can search the interactive editor's documentation explorer: https://data.rcsb.org/graphql/index.html" + ) + idx_paths = shortest_full_paths + final_idx: int = idx_paths[0][-1] + return_data_paths[final_idx] = idx_paths + + if (complete_path != len(return_data_list)) and (suppress_autocomplete_warning is False): + info_list = [] + for path in return_data_paths.values(): + assert len(path) == 1 + info_list.append(".".join(self._idx_path_to_name_path(path[0][1:]))) + + path_msg = "".join(f'\n "{item}",' for item in info_list) + logger.warning( + "\n" + "Some paths are being autocompleted based on the current API. If this code is meant for long-term use, use the set of fully-specified paths below:\n" + " [" + "%s\n" + " ]", path_msg + ) + + for return_data in return_data_list: + if any(not value for value in return_data_paths.values()): + raise ValueError(f'You can\'t access "{return_data}" from input type {query_type}') + + final_fields = {} + for target_idx in return_data_paths.keys(): + final_fields[target_idx] = self._get_descendant_fields(node_idx=target_idx, field_name=self._schema_graph[target_idx].name) + + field_names: Dict[Any, Any] = {} + paths: Dict[Any, Any] = {} + + for target_idx, paths_list in return_data_paths.items(): + node_data = self._schema_graph[target_idx] + if isinstance(node_data, FieldNode): + field_names[target_idx] = [] + paths[target_idx] = [] + for each_path in paths_list: + skip_first = True + path = [node_idx for node_idx in each_path if isinstance(self._schema_graph[node_idx], FieldNode)][1:] + paths[target_idx].append(path) + for node_idx in each_path: + node_data = self._schema_graph[node_idx] + if isinstance(node_data, FieldNode): + if skip_first: + skip_first = False + continue + field_names[target_idx].append(node_idx) + + query = "query { " + query_type + "(" + + num_arg_added = 0 + for arg_dict in arg_list: + arg_name = arg_dict["name"] + # If arg not in query_args, assume it's an optional (checking done earlier) + if arg_name not in query_args: + continue + query += self.format_args(arg_dict, query_args[arg_name]) + num_arg_added += 1 + if num_arg_added < (len(query_args) - 1): + query += ", " + + query += ") { " + query += self._recurse_fields(final_fields, field_names) + query += " } }" + json_query = {"query": f"{query}"} + return json_query + + def format_args(self, arg_dict: Dict[str, str], input_value: str) -> str: + """Add double quotes or omit quotes around a single GraphQL argument + + Args: + arg_dict (Dict[str, str]): dictionary with information about the argument + input_value (str): input value of the argument + + Returns: + str: returns input value formatted with quotes, no quotes, or as a list + """ + format_arg = "" + if arg_dict["type"] == "String": + # If arg type is string, add double quotes around value + format_arg += f'{arg_dict["name"]}: "{input_value}"' + elif arg_dict["kind"] == "LIST": + if ["of_kind_name"] == "String": + # Add double quotes around each item + format_arg += f'{arg_dict["name"]}: {str(input_value).replace("'", '"')}' + else: + # Remove single quotes + format_arg += f'{arg_dict["name"]}: {str(input_value).replace("'", "")}' + else: + format_arg += f'{arg_dict["name"]}: {input_value}' + return format_arg + + def _find_idx_path(self, dot_path: List[str], idx_list: List[int], node_idx: int) -> List[int]: + """function that recursively finds a list of indices that matches a list of field names. + + Args: + dot_path (List[str]): list of field names to find index matches for + idx_list (List[int]): list of matching indices, appended to as matches are found during recursion + node_idx (int): index to be searched for a child node matching the next field name + + Returns: + List[int]: a list of indices matching the given dot_path. If no path is found, an empty list is returned. + """ + if len(dot_path) == 0: + idx_list.append(node_idx) + return idx_list + if (self._schema_graph[node_idx].kind == "SCALAR") or (self._schema_graph[node_idx].of_kind == "SCALAR"): + return self._find_idx_path(dot_path[1:], idx_list, node_idx) + else: + type_node = list(self._schema_graph.successor_indices(node_idx))[0] + field_nodes = self._schema_graph.successor_indices(type_node) + for field_idx in field_nodes: + if self._schema_graph[field_idx].name == dot_path[0]: + idx_list.append(node_idx) + return self._find_idx_path(dot_path[1:], idx_list, field_idx) + else: + continue + return [] + + def _parse_dot_path(self, dot_path: str) -> List[List[int]]: + """Parse dot-separated field names into lists of matching node indices + ex: "prd.chem_comp.id" --> [[57, 81, 116], [610, 81, 116], [858, 81, 116]] + + Args: + dot_path (str): dot-separated field names given in return_data_list + ex: "exptl.method" or "prd.chem_comp.id" + + Raises: + ValueError: thrown if no path matches dot_path + + Returns: + List[List[int]]: list of paths where each path is a list of FieldNode indices matching the given dot_path + """ + path_list = dot_path.split(".") + node_matches: List[int] = self._field_to_idx_dict[path_list[0]] + idx_path_list: List[List[int]] = [] + for node_idx in node_matches: + found_path: List[int] = [] + found_path = self._find_idx_path(path_list[1:], found_path, node_idx) + if len(found_path) == len(path_list): + idx_path_list.append(found_path) + if len(idx_path_list) == 0: + raise ValueError(f"return_data_list path is not valid: {dot_path}") + + return idx_path_list + + def _compare_paths(self, start_node_index: int, dot_paths: List[List[int]]) -> List[List[int]]: + """Compare length of paths from the starting node to dot notation paths, returning the shortest paths + + Args: + start_node_index (int): the index of query's input_type + ex: input_type entry --> 20 + dot_paths (List[List[int]]): a list of paths where each path is a list of node indices matching a dot notation string + + Raises: + ValueError: thrown when there is no path from the input_type node to the return data nodes. + + Returns: + List[List[int]]: list of shortest paths from the input_type node index to the index of the final field given in dot notation. + ex: input_type "entry" and "exptl.method" would return a list of shortest path(s) with indices from "entry" to "method". + """ + all_paths: List[List[int]] = [] + + for path in dot_paths: + first_path_idx = path[0] + if start_node_index == first_path_idx: + unique_paths_list: List[List[int]] = [path] + else: + paths = rx.digraph_all_shortest_paths(self._schema_graph, start_node_index, first_path_idx, weight_fn=lambda edge: edge) + unique_paths = {tuple(path) for path in paths} + unique_paths_list = [list(unique_path) for unique_path in unique_paths] + if len(unique_paths_list) == 0: + unique_paths_list = [] + else: + for unique_path in unique_paths_list: + unique_path += path[1:] + all_paths.extend(unique_paths_list) + if len(all_paths) == 0: + raise ValueError(f"Can't access \"{'.'.join(self._idx_path_to_name_path(dot_paths[0]))}\" from given input_type {self._schema_graph[start_node_index].name}") + shortest_path_len = len(min(all_paths, key=len)) + shortest_paths = [path for path in all_paths if len(path) == shortest_path_len] + return shortest_paths + + def _weigh_assemblies(self, paths: List[List[int]], assembly_node_idxs: List[int]) -> List[List[int]]: + """remove paths containing "assemblies" if there are shorter or equal length paths available. + Mimics weighing assembly edges in the rest of query construction. + + Args: + paths (List[List[int]]): list of paths where each path is a list of indices from a root node to a requested field. + assembly_node_idxs (List[int]): list of indices of nodes named "assemblies" (root node excluded) + + Returns: + List[List[int]]: List with weight applied (no "assemblies" path if there is an equivalent path present) + """ + remove_paths: set = set() + + for path in paths: + for assemblies_idx in assembly_node_idxs: + if assemblies_idx in path: + for compare_path in paths: + if compare_path == path: + continue + name_compare_path = self._idx_path_to_name_path(compare_path) + # If there are shorter or equal length paths without "assemblies", filter out + if ( + (len(compare_path) <= len(path)) + and ("assemblies" not in name_compare_path) + and (compare_path[-1] == path[-1]) + ): + remove_paths.add(tuple(path)) + + for path in remove_paths: + paths.remove(list(path)) + + return paths + + def _idx_to_name(self, idx: int) -> str: + """Given an index, return the associated node's name + + Args: + idx (int): index of a node + + Returns: + str: name of node + """ + return self._schema_graph[idx].name + + def _idx_path_to_name_path(self, idx_path: List[int]) -> List[str]: + """Take a path of graph indices and return a path of field names + + Args: + idx_path (List[int]): List of node indices (can be both TypeNodes and FieldNodes) + + Returns: + List[str]: List of field names, removing TypeNodes. + """ + name_path: List[str] = [] + for idx in idx_path: + if isinstance(self._schema_graph[idx], FieldNode): + name_path.append(self._schema_graph[idx].name) + return name_path + + def find_paths(self, input_type: str, return_data_name: str, descriptions: bool = False) -> Union[List[str], Dict]: + """Find path from input_type to any nodes matching return_data_name + + Args: + input_type (str): name of an input_type (ex: entry, polymer_entity_instance) + return_data_name (str): name of one field, can be a redundant name + description (bool, optional): whether to include descriptions for the final field of each path. Default is False. + + Returns: + Union[List[str], Dict] + List[str]: list of paths to nodes with names that match return_data_name + Dict: if description is True, a dictionary with paths as keys and descriptions as values is returned. + """ + paths: List[List[int]] = [] + input_type_idx: int = self._root_to_idx[input_type] + for possible_idx in self._field_to_idx_dict[return_data_name]: + paths_to_idx = rx.all_simple_paths(self._schema_graph, input_type_idx, possible_idx) + paths.extend(paths_to_idx) + dot_paths: List[str] = [] + description_dict: Dict[str, str] = {} + for path in paths: + name_path = self._idx_path_to_name_path(path) + dot_path = ".".join(name_path[1:]) + dot_paths.append(dot_path) + if descriptions: + final_field_idx = path[-1] + description = self._schema_graph[final_field_idx].description + if description is None: + description = "" + description_dict[dot_path] = description.replace("\n", " ") + + if descriptions: + return description_dict + dot_paths.sort() + return dot_paths From 0e57b5d3b7bcc5024b5e19734cacda8cfc602bec Mon Sep 17 00:00:00 2001 From: Ivana Truong Date: Mon, 25 Nov 2024 16:31:09 -0600 Subject: [PATCH 02/12] support rest of query types, add tests, add local schema --- rcsbapi/const.py | 2 +- rcsbapi/sequence/__init__.py | 11 +- rcsbapi/sequence/query.py | 316 +- .../sequence/resources/seq_api_schema.json | 3767 +++++++++++++++++ rcsbapi/sequence/schema.py | 111 +- tests/test_data_query.py | 4 +- tests/test_data_schema.py | 4 +- tests/test_seq_query.py | 176 + 8 files changed, 4277 insertions(+), 114 deletions(-) create mode 100644 rcsbapi/sequence/resources/seq_api_schema.json create mode 100644 tests/test_seq_query.py diff --git a/rcsbapi/const.py b/rcsbapi/const.py index db1ed87..8ea8997 100644 --- a/rcsbapi/const.py +++ b/rcsbapi/const.py @@ -99,7 +99,7 @@ class Const: @dataclass(frozen=True) class SeqConst: - API_ENDPOINT: str = "https://sequence-coordinates.rcsb.org/graphql" + API_ENDPOINT: str = "https://sequence-coordinates.rcsb.org" seq_const = SeqConst() diff --git a/rcsbapi/sequence/__init__.py b/rcsbapi/sequence/__init__.py index 690a759..eb9127f 100644 --- a/rcsbapi/sequence/__init__.py +++ b/rcsbapi/sequence/__init__.py @@ -1,13 +1,16 @@ """RCSB PDB Sequence Coordinates API""" -from .schema import CoordSchema +from .schema import SeqSchema -COORD_SCHEMA = CoordSchema() +SEQ_SCHEMA = SeqSchema() -from .query import alignments, annotations, AnnotationFilterInput # noqa:E402 (ignore that import is not at top) +from .query import alignments, group_alignments, annotations, group_annotations, group_annotations_summary, AnnotationFilterInput # noqa:E402 __all__ = [ - "CoordSchema", + "SeqSchema", "alignments", "annotations", + "group_alignments", + "group_annotations", + "group_annotations_summary", "AnnotationFilterInput", ] diff --git a/rcsbapi/sequence/query.py b/rcsbapi/sequence/query.py index 1caae2e..b31fd03 100644 --- a/rcsbapi/sequence/query.py +++ b/rcsbapi/sequence/query.py @@ -1,19 +1,24 @@ -from typing import Dict, Literal, List, Any, Optional +from typing import Dict, List, Any, Optional +from enum import Enum from types import MappingProxyType from abc import ABC, abstractmethod -from dataclasses import dataclass, fields, is_dataclass +from dataclasses import dataclass, fields +import urllib.parse import requests from rcsbapi.const import seq_const from rcsbapi.config import config -from rcsbapi.sequence import COORD_SCHEMA +from rcsbapi.sequence import SEQ_SCHEMA + # pylint: disable=useless-parent-delegation -# This should be dynamically populated at some point -SequenceReference = Literal["NCBI_GENOME", "NCBI_PROTEIN", "PDB_ENTITY", "PDB_INSTANCE", "UNIPROT"] -FieldName = Literal["TARGET_ID", "TYPE"] -OperationType = Literal["CONTAINS", "EQUALS"] -AnnotationReference = Literal["PDB_ENTITY", "PDB_INSTANCE", "PDB_INTERFACE", "UNIPROT"] +# This should be dynamically populated at some point. +class EnumTypes(Enum): + SequenceReference = SEQ_SCHEMA.read_enum("SequenceReference") + FieldName = SEQ_SCHEMA.read_enum("FieldName") + OperationType = SEQ_SCHEMA.read_enum("OperationType") + AnnotationReference = SEQ_SCHEMA.read_enum("AnnotationReference") + GroupReference = SEQ_SCHEMA.read_enum("GroupReference") @dataclass(frozen=True) @@ -29,38 +34,87 @@ def to_dict(self) -> Dict: field_value = getattr(self, field_name) field_name = field_name.replace("_", "") if field_value: - if is_dataclass(field_value): - field_value = field_value.to_dict() + # Create an exception for AnnotationFilterInput + if ( + isinstance(field_value, list) + and all(isinstance(item, AnnotationFilterInput) for item in field_value) + ): + field_value = [filter.to_string() for filter in field_value] request_dict[field_name] = field_value return request_dict - @abstractmethod + def construct_query(self, query_type: str) -> Dict: + """type check based on the GraphQL schema, then construct the GraphQL query""" + # Assert attributes exists for mypy. + # Can't be defined in Query class because + # attributes without defaults must be defined before those with defaults. + # Inherited attributes are placed before non-inherited attributes. + # Possible workaround is making the attributes keyword-only, but I decided against it for now. + # Issue: https://github.com/python-attrs/attrs/issues/38 + assert hasattr(self, "return_data_list"), \ + f"{self.__class__.__name__} must define 'return_data_list' attribute." + assert hasattr(self, "suppress_autocomplete_warning"), \ + f"{self.__class__.__name__} must define 'suppress_autocomplete_warning' attribute." + + SEQ_SCHEMA.check_typing( + query_type=query_type, + enum_types=EnumTypes, + args=self.to_dict(), + ) + + query = SEQ_SCHEMA.construct_query( + query_type=query_type, + query_args=self.to_dict(), + return_data_list=self.return_data_list, + suppress_autocomplete_warning=self.suppress_autocomplete_warning, + ) + + return query + def exec(self) -> Dict: - """execute query and return JSON response""" + """execute given query and return JSON response""" + # Assert attribute exists for mypy + assert hasattr(self, "_query"), \ + f"{self.__class__.__name__} must define '_query' attribute." + + response_json = requests.post( + json=dict(self._query), + url=seq_const.API_ENDPOINT + "/graphql", + timeout=config.DATA_API_TIMEOUT + ).json() + self._parse_gql_error(response_json) + return response_json + + def get_editor_link(self): + """Get link to GraphiQL editor with given query populated""" + editor_base_link = str(seq_const.API_ENDPOINT) + "/graphiql" + "/index.html?query=" + return editor_base_link + urllib.parse.quote(str(self._query["query"])) def _parse_gql_error(self, response_json: Dict[str, Any]): - if "error" in response_json.keys(): + """Look through responses to see if there are errors. If so, throw an HTTP error, """ + if "errors" in response_json.keys(): + error = response_json["errors"][0] raise requests.HTTPError( - f"Status code {response_json["status"]} {response_json["error"]}:\n" + f"\n{error["message"]}\n" f" Run .get_editor_link() to get a link to GraphiQL editor with query" ) - def get_editor_link(self): # TODO - pass - @dataclass(frozen=True) class alignments(Query): """ - sequence alignments - from_ (SequenceReference): From which query sequence database - to (SequenceReference): To which query sequence database + Get sequence alignments + + from_ (str): From which query sequence database + to (str): To which query sequence database queryId (str): Database sequence identifier - return_data_list (List[str]): requested data fields - range (Optional, List[]) + return_data_list (List[str]): Requested data fields + range (Optional, List[]): Optional integer list to filter alignments to a particular region + suppress_autocomplete_warning (bool, optional): Suppress warning message about field path autocompletion. Defaults to False. + _query (MappingProxyType): Attribute for storing GraphQL query """ - from_: SequenceReference # python keyword:( Is this the best way? - to: SequenceReference + from_: str + to: str queryId: str return_data_list: List[str] range: Optional[List[int]] = None @@ -71,35 +125,29 @@ def to_dict(self) -> Dict: return super().to_dict() def __post_init__(self): - query = COORD_SCHEMA.construct_query( - query_type="alignments", - query_args=self.to_dict(), - return_data_list=self.return_data_list, - suppress_autocomplete_warning=self.suppress_autocomplete_warning, - ) - object.__setattr__( - self, - "_query", - query, - ) - - def exec(self) -> Dict: - response_json = requests.post( - json=dict(self._query), - url=seq_const.API_ENDPOINT, - timeout=config.DATA_API_TIMEOUT - ).json() - self._parse_gql_error(response_json) - return response_json + query = super().construct_query("alignments") + object.__setattr__(self, "_query", query) @dataclass(frozen=True) class annotations(Query): + """ + Get sequence annotations + + queryId (str): Database sequence identifier + sources (List[str]): List defining the annotation collections to be requested + reference (SequenceReference): Query sequence database + return_data_list (List[str]): Requested data fields + filters (list["AnnotationFilterInput"], optional): Optional annotation filter by type or target identifier + range: (List[int], optional): Optional integer list to filter annotations to a particular region + suppress_autocomplete_warning (bool, optional): Suppress warning message about field path autocompletion. Defaults to False. + _query (MappingProxyType): Attribute for storing GraphQL query + """ queryId: str - sources: List[AnnotationReference] - reference: SequenceReference + sources: List[str] + reference: str return_data_list: List[str] - filters: Optional["AnnotationFilterInput"] = None + filters: Optional[list["AnnotationFilterInput"]] = None range: Optional[List[int]] = None suppress_autocomplete_warning: bool = False _query: MappingProxyType = MappingProxyType({}) @@ -108,43 +156,149 @@ def to_dict(self) -> Dict: return super().to_dict() def __post_init__(self): - query = COORD_SCHEMA.construct_query( - query_type="annotations", - query_args=self.to_dict(), - return_data_list=self.return_data_list, - suppress_autocomplete_warning=self.suppress_autocomplete_warning, - ) - object.__setattr__( - self, - "_query", - query, - ) - print(query) + query = super().construct_query("annotations") + object.__setattr__(self, "_query", query) - def exec(self) -> Dict: - response_json = requests.post( - json=dict(self._query), - url=seq_const.API_ENDPOINT, - timeout=config.DATA_API_TIMEOUT - ).json() - self._parse_gql_error(response_json) - return response_json + +@dataclass(frozen=True) +class group_alignments(Query): + """ + Get alignments for structures in groups + + queryId (str): Database sequence identifier for group + return_data_list (list[str]): Requested data fields + filter (list[str], optional): Optional string list of allowed identifiers for group members + suppress_autocomplete_warning (bool, optional): Suppress warning message about field path autocompletion. Defaults to False. + _query (MappingProxyType): Attribute for storing GraphQL query + """ + group: str + groupId: str + return_data_list: list[str] + filter: Optional[list[str]] = None + suppress_autocomplete_warning: bool = False + _query: MappingProxyType = MappingProxyType({}) + + def to_dict(self) -> Dict: + return super().to_dict() + + def __post_init__(self): + query = super().construct_query("group_alignments") + object.__setattr__(self, "_query", query) @dataclass(frozen=True) -class AnnotationFilterInput: - field: FieldName - operation: OperationType - source: AnnotationReference - values: List[str] +class group_annotations(Query): + """ + Get annotations for structures in groups + + group (GroupReference): Query sequence database + groupId (str): Database sequence identifier for group + sources (list[AnnotationReference]): List defining the annotation collections to be requested + return_data_list (list[str]): Requested data fields + filters (list[AnnotationFilterInput]): Optional annotation filter by type or target identifier + suppress_autocomplete_warning (bool, optional): Suppress warning message about field path autocompletion. Defaults to False. + _query (MappingProxyType): Attribute for storing GraphQL query + """ + group: str + groupId: str + sources: List[str] + return_data_list: list[str] + filters: Optional[List["AnnotationFilterInput"]] = None + suppress_autocomplete_warning: bool = False + _query: MappingProxyType = MappingProxyType({}) + + def to_dict(self) -> Dict: + return super().to_dict() + + def __post_init__(self): + query = super().construct_query("group_annotations") + object.__setattr__(self, "_query", query) + + +@dataclass(frozen=True) +class group_annotations_summary(Query): + """ + Get a positional summary of group annotations + + group (GroupReference): Query sequence database + groupId (str): Database sequence identifier for group + sources (list[AnnotationReference]): List defining the annotation collections to be requested + return_data_list (list[str]): Request data fields + filters (list[AnnotationFilterInput], optional): Optional annotation filter by type or target identifier + suppress_autocomplete_warning (bool, optional): Suppress warning message about field path autocompletion. Defaults to False. + _query (MappingProxyType): Attribute for storing GraphQL query + """ + group: str + groupId: str + sources: List[str] + return_data_list: list[str] + filters: Optional[List["AnnotationFilterInput"]] = None + suppress_autocomplete_warning: bool = False + _query: MappingProxyType = MappingProxyType({}) def to_dict(self) -> Dict: - return { - "field": self.field, - "operation": self.operation, - "source": self.source, - "values": self.values, - } - - def to_string(self) -> Dict: - pass + return super().to_dict() + + def __post_init__(self): + query = super().construct_query("group_annotations_summary") + object.__setattr__(self, "_query", query) + + +class AnnotationFilterInput: + """ + filter used to select which annotations will be retrieved + """ + + def __init__( + self, + field: str, + operation: str, + values: List[str], + source: Optional[str] = None, + ): + """ + Args: + field (FieldName): Defines the field to be compared + operation (OperationType): Defines the comparison method + values (List[str]): List of allowed values + source (AnnotationReference, optional): Only features with the same annotation collections will be filtered + """ + self.field = field + self.operation = operation + self.values = values + self.source = source + + def to_string(self): + """Generate string to insert in GraphQL query based on GraphQL schema""" + + input_field_specs = [] + for arg_dict in SEQ_SCHEMA._root_dict["annotations"]: + if arg_dict["name"] == "filters": + input_field_specs = arg_dict["input_fields"] + assert len(input_field_specs) > 0, '"filters" key not found in arg_dict' + + args = set() + for input_field in input_field_specs: + field_name = input_field["name"] + if getattr(self, field_name) is None: + continue + if ( + (input_field["type"]["ofType"] is not None) + and (input_field["type"]["ofType"]["kind"] == "LIST") + ): + if input_field["type"]["ofType"]["ofType"]["ofType"]["name"] == "String": + # If type is string, add list with double quotes around each item + args.add(f"{field_name}: {str(getattr(self, field_name)).replace("'", '"')}") + else: + # If type isn't string, remove single quotes + args.add(f"{field_name}: {str(getattr(self, field_name)).replace("'", "")}") + elif ( + (input_field["type"]["kind"] == "ENUM") + or (input_field["type"]["ofType"]["kind"] == "ENUM") + ): + # If type is ENUM, remove single quotes + args.add(f"{field_name}: {str(getattr(self, field_name)).replace("'", "")}") + else: + raise NotImplementedError("Unsupported type in schema dictionary") + str_filter = str(args).replace("'", "") + return str_filter diff --git a/rcsbapi/sequence/resources/seq_api_schema.json b/rcsbapi/sequence/resources/seq_api_schema.json new file mode 100644 index 0000000..c59f397 --- /dev/null +++ b/rcsbapi/sequence/resources/seq_api_schema.json @@ -0,0 +1,3767 @@ +{ + "data": { + "__schema": { + "queryType": { + "name": "Query" + }, + "types": [ + { + "kind": "OBJECT", + "name": "AlignedRegions", + "description": null, + "fields": [ + { + "name": "exon_shift", + "description": "List of genomic indexes that are needed to complete the last nucleotide codon of a genome-protein sequence alignment", + "args": [], + "type": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "Int", + "ofType": null + } + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "query_begin", + "description": "Query sequence start position\n \nExamples:\nnull, null", + "args": [], + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "Int", + "ofType": null + } + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "query_end", + "description": "Query sequence end position\n \nExamples:\nnull, null", + "args": [], + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "Int", + "ofType": null + } + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "target_begin", + "description": "Target sequence start position\n \nExamples:\nnull, null", + "args": [], + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "Int", + "ofType": null + } + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "target_end", + "description": "Target sequence start position\n \nExamples:\nnull, null", + "args": [], + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "Int", + "ofType": null + } + }, + "isDeprecated": false, + "deprecationReason": null + } + ], + "inputFields": null, + "interfaces": [], + "enumValues": null, + "possibleTypes": null + }, + { + "kind": "OBJECT", + "name": "AlignmentLogo", + "description": null, + "fields": [ + { + "name": "symbol", + "description": null, + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "value", + "description": null, + "args": [], + "type": { + "kind": "SCALAR", + "name": "Int", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + } + ], + "inputFields": null, + "interfaces": [], + "enumValues": null, + "possibleTypes": null + }, + { + "kind": "INPUT_OBJECT", + "name": "AnnotationFilterInput", + "description": null, + "fields": null, + "inputFields": [ + { + "name": "field", + "description": null, + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "ENUM", + "name": "FieldName", + "ofType": null + } + }, + "defaultValue": null + }, + { + "name": "operation", + "description": null, + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "ENUM", + "name": "OperationType", + "ofType": null + } + }, + "defaultValue": null + }, + { + "name": "source", + "description": null, + "type": { + "kind": "ENUM", + "name": "AnnotationReference", + "ofType": null + }, + "defaultValue": null + }, + { + "name": "values", + "description": null, + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "String", + "ofType": null + } + } + } + }, + "defaultValue": null + } + ], + "interfaces": null, + "enumValues": null, + "possibleTypes": null + }, + { + "kind": "ENUM", + "name": "AnnotationReference", + "description": null, + "fields": null, + "inputFields": null, + "interfaces": null, + "enumValues": [ + { + "name": "PDB_ENTITY", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "PDB_INSTANCE", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "PDB_INTERFACE", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "UNIPROT", + "description": null, + "isDeprecated": false, + "deprecationReason": null + } + ], + "possibleTypes": null + }, + { + "kind": "SCALAR", + "name": "Boolean", + "description": "Built-in Boolean", + "fields": null, + "inputFields": null, + "interfaces": null, + "enumValues": null, + "possibleTypes": null + }, + { + "kind": "OBJECT", + "name": "Coverage", + "description": null, + "fields": [ + { + "name": "query_coverage", + "description": "Fraction of the query sequence covered by the alignment\n \nExamples:\nnull, null", + "args": [], + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + } + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "query_length", + "description": "Length of the full query sequence\n \nExamples:\nnull, null", + "args": [], + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "Int", + "ofType": null + } + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "target_coverage", + "description": "Fraction of the target sequence covered by the alignment\n \nExamples:\nnull, null", + "args": [], + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + } + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "target_length", + "description": "Length of the full target sequence\n \nExamples:\nnull, null", + "args": [], + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "Int", + "ofType": null + } + }, + "isDeprecated": false, + "deprecationReason": null + } + ], + "inputFields": null, + "interfaces": [], + "enumValues": null, + "possibleTypes": null + }, + { + "kind": "OBJECT", + "name": "Features", + "description": null, + "fields": [ + { + "name": "additional_properties", + "description": "Attribute/Value list", + "args": [], + "type": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "OBJECT", + "name": "FeaturesAdditionalProperties", + "ofType": null + } + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "description", + "description": "Free-form text describing the feature\n \nExamples:\nSoftware generated binding site for ligand entity 2 component HEM instance C chain A", + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "feature_id", + "description": "Identifier of the feature\n \nExamples:\nHELX_P11, AC1", + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "feature_positions", + "description": "List of documents that describes the location of the feature", + "args": [], + "type": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "OBJECT", + "name": "FeaturesFeaturePositions", + "ofType": null + } + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "name", + "description": "Name associated to the feature\n \nExamples:\nligand HEM", + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "provenance_source", + "description": "Original database or software name used to obtain the feature\n \nExamples:\nPDB, UNIPROT", + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "type", + "description": "The connection type.\n \nExamples:\nASA_UNBOUND, BINDING_SITE, mutation, artifact, CATH, SCOP", + "args": [], + "type": { + "kind": "ENUM", + "name": "FeaturesType", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "value", + "description": "Numerical value associated with the feature\n \nExamples:\nnull, null", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + } + ], + "inputFields": null, + "interfaces": [], + "enumValues": null, + "possibleTypes": null + }, + { + "kind": "OBJECT", + "name": "FeaturesAdditionalProperties", + "description": null, + "fields": [ + { + "name": "property_name", + "description": "The additional property name.\n \nExamples:\nPARENT_COMP_ID, CATH_NAME, PARTNER_BOND_DISTANCE", + "args": [], + "type": { + "kind": "ENUM", + "name": "FeaturesAdditionalPropertiesPropertyName", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "property_value", + "description": null, + "args": [], + "type": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "ObjectScalar", + "ofType": null + } + }, + "isDeprecated": false, + "deprecationReason": null + } + ], + "inputFields": null, + "interfaces": [], + "enumValues": null, + "possibleTypes": null + }, + { + "kind": "ENUM", + "name": "FeaturesAdditionalPropertiesPropertyName", + "description": null, + "fields": null, + "inputFields": null, + "interfaces": null, + "enumValues": [ + { + "name": "CARD_MODEL_DESCRIPTION", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "CARD_MODEL_ORGANISM", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "CATH_DOMAIN_ID", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "CATH_NAME", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "CLINICAL_SIGNIFICANCE", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "CONSEQUENCE_TYPE", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "DISEASE_TYPE", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "ECOD_DOMAIN_ID", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "ECOD_FAMILY_NAME", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "EVIDENCE", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "LINK", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "MODELCIF_MODEL_ID", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "MUTATED_TYPE", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "OMEGA_ANGLE", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "PARENT_COMP_ID", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "PARTNER_ASYM_ID", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "PARTNER_BOND_DISTANCE", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "PARTNER_COMP_ID", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "PREDICTED_IMPACT", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "SCOP_2_DOMAIN_ID", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "SCOP_2_FAMILY_ID", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "SCOP_2_FAMILY_NAME", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "SCOP_2_SUPERFAMILY_ID", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "SCOP_2_SUPERFAMILY_NAME", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "SCOP_DOMAIN_ID", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "SCOP_NAME", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "SCOP_SUN_ID", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "SHEET_SENSE", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "STRAIN_ID", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "SUBTYPE", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "TARGET_ID", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "WILD_TYPE", + "description": null, + "isDeprecated": false, + "deprecationReason": null + } + ], + "possibleTypes": null + }, + { + "kind": "OBJECT", + "name": "FeaturesFeaturePositions", + "description": null, + "fields": [ + { + "name": "beg_ori_id", + "description": "Index at which this segment of the feature begins on the original provenance_source. When reference and source point to the same reference system this file will be null\n \nExamples:\nnull, null", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Int", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "beg_seq_id", + "description": "Index at which this segment of the feature begins\n \nExamples:\nnull, null", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Int", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "end_ori_id", + "description": "Index at which this segment of the feature ends on the original provenance_source. If the positional feature maps to a single residue this field will be null. When reference and source point to the same reference system this file will be null\n \nExamples:\nnull, null", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Int", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "end_seq_id", + "description": "Index at which this segment of the feature ends. If the positional feature maps to a single residue this field will be null\n \nExamples:\nnull, null", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Int", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "open_begin", + "description": "Flag that indicates the feature begins before the feature index begin\n \nExamples:\nnull, null", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Boolean", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "open_end", + "description": "Flag that indicates the feature end after the feature index end\n \nExamples:\nnull, null", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Boolean", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "range_id", + "description": "Fragment identifier that groups a set of ranges resulting from gaps\n \nExamples:\nrange-1, range-2", + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "value", + "description": "The value for the feature at this region\n \nExamples:\nnull, null, null", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "values", + "description": "The value(s) for the feature at this region", + "args": [], + "type": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "Float", + "ofType": null + } + }, + "isDeprecated": false, + "deprecationReason": null + } + ], + "inputFields": null, + "interfaces": [], + "enumValues": null, + "possibleTypes": null + }, + { + "kind": "ENUM", + "name": "FeaturesType", + "description": null, + "fields": null, + "inputFields": null, + "interfaces": null, + "enumValues": [ + { + "name": "ACTIVE_SITE", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "ANGLE_OUTLIER", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "ARTIFACT", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "ASA", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "ASA_BOUND", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "ASA_UNBOUND", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "BEND", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "BINDING_SITE", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "BOND_OUTLIER", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "CALCIUM_BINDING_REGION", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "CARD_MODEL", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "CATH", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "CHAIN", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "CIS_PEPTIDE", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "COMPOSITIONALLY_BIASED_REGION", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "COVALENT_BOND", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "COVALENT_MODIFICATION_OF_A_NUCLEOTIDE_BASE", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "COVALENT_MODIFICATION_OF_A_NUCLEOTIDE_PHOSPHATE", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "COVALENT_MODIFICATION_OF_A_NUCLEOTIDE_SUGAR", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "COVALENT_RESIDUE_MODIFICATION", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "CROSS_LINK", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "C_MANNOSYLATION_SITE", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "DISORDER", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "DISORDER_BINDING", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "DISULFIDE_BRIDGE", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "DNA_BINDING_REGION", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "DOMAIN", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "ECOD", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "GLYCOSYLATION_SITE", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "HELIX_P", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "HELX_LH_PP_P", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "HELX_RH_3_T_P", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "HELX_RH_AL_P", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "HELX_RH_PI_P", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "HYDROGEN_BOND", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "HYDROPATHY", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "IMGT_ANTIBODY_DESCRIPTION", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "IMGT_ANTIBODY_DOMAIN_NAME", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "IMGT_ANTIBODY_GENE_ALLELE_NAME", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "IMGT_ANTIBODY_ORGANISM_NAME", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "IMGT_ANTIBODY_PROTEIN_NAME", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "IMGT_ANTIBODY_RECEPTOR_DESCRIPTION", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "IMGT_ANTIBODY_RECEPTOR_TYPE", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "INITIATOR_METHIONINE", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "INTRAMEMBRANE_REGION", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "IONIC_INTERACTION", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "LIGAND_COVALENT_LINKAGE", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "LIGAND_INTERACTION", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "LIGAND_METAL_COORDINATION_LINKAGE", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "LIPID_MOIETY_BINDING_REGION", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "MA_QA_METRIC_LOCAL_TYPE_CONTACT_PROBABILITY", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "MA_QA_METRIC_LOCAL_TYPE_DISTANCE", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "MA_QA_METRIC_LOCAL_TYPE_ENERGY", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "MA_QA_METRIC_LOCAL_TYPE_IPTM", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "MA_QA_METRIC_LOCAL_TYPE_NORMALIZED_SCORE", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "MA_QA_METRIC_LOCAL_TYPE_OTHER", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "MA_QA_METRIC_LOCAL_TYPE_PAE", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "MA_QA_METRIC_LOCAL_TYPE_PLDDT", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "MA_QA_METRIC_LOCAL_TYPE_PLDDT_0_1", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "MA_QA_METRIC_LOCAL_TYPE_PLDDT_ALL_ATOM", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "MA_QA_METRIC_LOCAL_TYPE_PLDDT_ALL_ATOM_0_1", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "MA_QA_METRIC_LOCAL_TYPE_PTM", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "MA_QA_METRIC_LOCAL_TYPE_ZSCORE", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "MEMBRANE_SEGMENT", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "METAL_COORDINATION", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "METAL_ION_BINDING_SITE", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "MISMATCHED_BASE_PAIRS", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "MODIFIED_MONOMER", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "MODIFIED_RESIDUE", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "MOGUL_ANGLE_OUTLIER", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "MOGUL_BOND_OUTLIER", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "MUTAGENESIS_SITE", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "MUTATION", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "NON_CONSECUTIVE_RESIDUES", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "NON_STANDARD_AMINO_ACID", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "NON_TERMINAL_RESIDUE", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "NUCLEOTIDE_PHOSPHATE_BINDING_REGION", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "N_GLYCOSYLATION_SITE", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "O_GLYCOSYLATION_SITE", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "PEPTIDE", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "PFAM", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "PROPEPTIDE", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "PROTEIN_BINDING", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "RAMACHANDRAN_OUTLIER", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "REGION_OF_INTEREST", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "REPEAT", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "ROTAMER_OUTLIER", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "RSCC_OUTLIER", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "RSRZ_OUTLIER", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "SABDAB_ANTIBODY_ANTIGEN_NAME", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "SABDAB_ANTIBODY_HEAVY_CHAIN_SUBCLASS", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "SABDAB_ANTIBODY_LIGHT_CHAIN_SUBCLASS", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "SABDAB_ANTIBODY_LIGHT_CHAIN_TYPE", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "SABDAB_ANTIBODY_NAME", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "SABDAB_ANTIBODY_TARGET", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "SCOP", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "SCOP_2_B_SUPERFAMILY", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "SCOP_2_FAMILY", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "SCOP_2_SUPERFAMILY", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "SEQUENCE_CONFLICT", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "SEQUENCE_VARIANT", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "SHEET", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "SHORT_SEQUENCE_MOTIF", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "SIGNAL_PEPTIDE", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "SITE", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "SPLICE_VARIANT", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "STEREO_OUTLIER", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "STRN", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "S_GLYCOSYLATION_SITE", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "TOPOLOGICAL_DOMAIN", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "TRANSIT_PEPTIDE", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "TRANSMEMBRANE_REGION", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "TURN_TY_1_P", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "UNASSIGNED_SEC_STRUCT", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "UNOBSERVED_ATOM_XYZ", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "UNOBSERVED_RESIDUE_XYZ", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "UNSURE_RESIDUE", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "ZERO_OCCUPANCY_ATOM_XYZ", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "ZERO_OCCUPANCY_RESIDUE_XYZ", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "ZINC_FINGER_REGION", + "description": null, + "isDeprecated": false, + "deprecationReason": null + } + ], + "possibleTypes": null + }, + { + "kind": "ENUM", + "name": "FieldName", + "description": null, + "fields": null, + "inputFields": null, + "interfaces": null, + "enumValues": [ + { + "name": "TARGET_ID", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "TYPE", + "description": null, + "isDeprecated": false, + "deprecationReason": null + } + ], + "possibleTypes": null + }, + { + "kind": "SCALAR", + "name": "Float", + "description": "Built-in Float", + "fields": null, + "inputFields": null, + "interfaces": null, + "enumValues": null, + "possibleTypes": null + }, + { + "kind": "ENUM", + "name": "GroupReference", + "description": null, + "fields": null, + "inputFields": null, + "interfaces": null, + "enumValues": [ + { + "name": "MATCHING_UNIPROT_ACCESSION", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "SEQUENCE_IDENTITY", + "description": null, + "isDeprecated": false, + "deprecationReason": null + } + ], + "possibleTypes": null + }, + { + "kind": "SCALAR", + "name": "Int", + "description": "Built-in Int", + "fields": null, + "inputFields": null, + "interfaces": null, + "enumValues": null, + "possibleTypes": null + }, + { + "kind": "SCALAR", + "name": "ObjectScalar", + "description": "Built-in scalar for dynamic values", + "fields": null, + "inputFields": null, + "interfaces": null, + "enumValues": null, + "possibleTypes": null + }, + { + "kind": "ENUM", + "name": "OperationType", + "description": null, + "fields": null, + "inputFields": null, + "interfaces": null, + "enumValues": [ + { + "name": "CONTAINS", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "EQUALS", + "description": null, + "isDeprecated": false, + "deprecationReason": null + } + ], + "possibleTypes": null + }, + { + "kind": "OBJECT", + "name": "Query", + "description": "Query root", + "fields": [ + { + "name": "alignments", + "description": "Get sequence alignments", + "args": [ + { + "name": "from", + "description": "Query sequence database", + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "ENUM", + "name": "SequenceReference", + "ofType": null + } + }, + "defaultValue": null + }, + { + "name": "queryId", + "description": "Database sequence identifier", + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "String", + "ofType": null + } + }, + "defaultValue": null + }, + { + "name": "range", + "description": "Optional integer list (2-tuple) to filter alignments to a particular region", + "type": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "Int", + "ofType": null + } + } + }, + "defaultValue": null + }, + { + "name": "to", + "description": "Target Sequence database", + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "ENUM", + "name": "SequenceReference", + "ofType": null + } + }, + "defaultValue": null + } + ], + "type": { + "kind": "OBJECT", + "name": "SequenceAlignments", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "annotations", + "description": "Get sequence annotations", + "args": [ + { + "name": "filters", + "description": "Optional annotation filter by type or target identifier", + "type": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "INPUT_OBJECT", + "name": "AnnotationFilterInput", + "ofType": null + } + } + }, + "defaultValue": null + }, + { + "name": "queryId", + "description": "Database sequence identifier", + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "String", + "ofType": null + } + }, + "defaultValue": null + }, + { + "name": "range", + "description": "Optional integer list (2-tuple) to filter annotations to a particular region", + "type": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "Int", + "ofType": null + } + } + }, + "defaultValue": null + }, + { + "name": "reference", + "description": "Query sequence database", + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "ENUM", + "name": "SequenceReference", + "ofType": null + } + }, + "defaultValue": null + }, + { + "name": "sources", + "description": "List defining the annotation collections to be requested", + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "ENUM", + "name": "AnnotationReference", + "ofType": null + } + } + }, + "defaultValue": null + } + ], + "type": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "OBJECT", + "name": "SequenceAnnotations", + "ofType": null + } + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "group_alignments", + "description": "Get group alignments", + "args": [ + { + "name": "filter", + "description": "Optional string list of allowed group member identifiers", + "type": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "String", + "ofType": null + } + } + }, + "defaultValue": null + }, + { + "name": "group", + "description": "Target Sequence database", + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "ENUM", + "name": "GroupReference", + "ofType": null + } + }, + "defaultValue": null + }, + { + "name": "groupId", + "description": "Database group identifier", + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "String", + "ofType": null + } + }, + "defaultValue": null + } + ], + "type": { + "kind": "OBJECT", + "name": "SequenceAlignments", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "group_annotations", + "description": "Get group annotations", + "args": [ + { + "name": "filters", + "description": "Optional annotation filter by type or target identifier", + "type": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "INPUT_OBJECT", + "name": "AnnotationFilterInput", + "ofType": null + } + } + }, + "defaultValue": null + }, + { + "name": "group", + "description": "Query sequence database", + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "ENUM", + "name": "GroupReference", + "ofType": null + } + }, + "defaultValue": null + }, + { + "name": "groupId", + "description": "Database sequence identifier", + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "String", + "ofType": null + } + }, + "defaultValue": null + }, + { + "name": "sources", + "description": "List defining the annotation collections to be requested", + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "ENUM", + "name": "AnnotationReference", + "ofType": null + } + } + }, + "defaultValue": null + } + ], + "type": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "OBJECT", + "name": "SequenceAnnotations", + "ofType": null + } + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "group_annotations_summary", + "description": "Get a positional summary of group annotations", + "args": [ + { + "name": "filters", + "description": "Optional annotation filter by type or target identifier", + "type": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "INPUT_OBJECT", + "name": "AnnotationFilterInput", + "ofType": null + } + } + }, + "defaultValue": null + }, + { + "name": "group", + "description": "Query sequence database", + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "ENUM", + "name": "GroupReference", + "ofType": null + } + }, + "defaultValue": null + }, + { + "name": "groupId", + "description": "Database sequence identifier", + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "String", + "ofType": null + } + }, + "defaultValue": null + }, + { + "name": "sources", + "description": "List defining the annotation collections to be requested", + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "ENUM", + "name": "AnnotationReference", + "ofType": null + } + } + }, + "defaultValue": null + } + ], + "type": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "OBJECT", + "name": "SequenceAnnotations", + "ofType": null + } + }, + "isDeprecated": false, + "deprecationReason": null + } + ], + "inputFields": null, + "interfaces": [], + "enumValues": null, + "possibleTypes": null + }, + { + "kind": "OBJECT", + "name": "SequenceAlignments", + "description": null, + "fields": [ + { + "name": "alignment_length", + "description": null, + "args": [], + "type": { + "kind": "SCALAR", + "name": "Int", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "alignment_logo", + "description": null, + "args": [], + "type": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "OBJECT", + "name": "AlignmentLogo", + "ofType": null + } + } + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "query_sequence", + "description": null, + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "target_alignments", + "description": "Multiple sequence alignment of group members.", + "args": [ + { + "name": "first", + "description": null, + "type": { + "kind": "SCALAR", + "name": "Int", + "ofType": null + }, + "defaultValue": null + }, + { + "name": "offset", + "description": null, + "type": { + "kind": "SCALAR", + "name": "Int", + "ofType": null + }, + "defaultValue": null + } + ], + "type": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "OBJECT", + "name": "TargetAlignments", + "ofType": null + } + }, + "isDeprecated": false, + "deprecationReason": null + } + ], + "inputFields": null, + "interfaces": [], + "enumValues": null, + "possibleTypes": null + }, + { + "kind": "OBJECT", + "name": "SequenceAnnotations", + "description": null, + "fields": [ + { + "name": "features", + "description": "List of positional features", + "args": [], + "type": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "OBJECT", + "name": "Features", + "ofType": null + } + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "source", + "description": null, + "args": [], + "type": { + "kind": "ENUM", + "name": "AnnotationReference", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "target_id", + "description": "Database identifier associated to the annotation\n \nExamples:\n101M_1, 2UZI.C, P01112", + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "target_identifiers", + "description": null, + "args": [], + "type": { + "kind": "OBJECT", + "name": "TargetIdentifiers", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + } + ], + "inputFields": null, + "interfaces": [], + "enumValues": null, + "possibleTypes": null + }, + { + "kind": "ENUM", + "name": "SequenceReference", + "description": null, + "fields": null, + "inputFields": null, + "interfaces": null, + "enumValues": [ + { + "name": "NCBI_GENOME", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "NCBI_PROTEIN", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "PDB_ENTITY", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "PDB_INSTANCE", + "description": null, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "UNIPROT", + "description": null, + "isDeprecated": false, + "deprecationReason": null + } + ], + "possibleTypes": null + }, + { + "kind": "SCALAR", + "name": "String", + "description": "Built-in String", + "fields": null, + "inputFields": null, + "interfaces": null, + "enumValues": null, + "possibleTypes": null + }, + { + "kind": "OBJECT", + "name": "Subscription", + "description": "Subscription root", + "fields": [ + { + "name": "alignments_subscription", + "description": "Get sequence alignments", + "args": [ + { + "name": "from", + "description": "Query sequence database", + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "ENUM", + "name": "SequenceReference", + "ofType": null + } + }, + "defaultValue": null + }, + { + "name": "queryId", + "description": "Database sequence identifier", + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "String", + "ofType": null + } + }, + "defaultValue": null + }, + { + "name": "range", + "description": "Optional integer list (2-tuple) to filter alignments to a particular region", + "type": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "Int", + "ofType": null + } + } + }, + "defaultValue": null + }, + { + "name": "to", + "description": "Target Sequence database", + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "ENUM", + "name": "SequenceReference", + "ofType": null + } + }, + "defaultValue": null + } + ], + "type": { + "kind": "OBJECT", + "name": "TargetAlignments", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "annotations_subscription", + "description": "Get sequence annotations", + "args": [ + { + "name": "filters", + "description": "Optional annotation filter by type or target identifier", + "type": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "INPUT_OBJECT", + "name": "AnnotationFilterInput", + "ofType": null + } + } + }, + "defaultValue": null + }, + { + "name": "queryId", + "description": "Database sequence identifier", + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "String", + "ofType": null + } + }, + "defaultValue": null + }, + { + "name": "range", + "description": "Optional integer list (2-tuple) to filter annotations to a particular region", + "type": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "Int", + "ofType": null + } + } + }, + "defaultValue": null + }, + { + "name": "reference", + "description": "Query sequence database", + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "ENUM", + "name": "SequenceReference", + "ofType": null + } + }, + "defaultValue": null + }, + { + "name": "sources", + "description": "List defining the annotation collections to be requested", + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "ENUM", + "name": "AnnotationReference", + "ofType": null + } + } + }, + "defaultValue": null + } + ], + "type": { + "kind": "OBJECT", + "name": "SequenceAnnotations", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "group_alignments_subscription", + "description": "Get group alignments", + "args": [ + { + "name": "filter", + "description": "Optional string list of allowed group member identifiers", + "type": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "String", + "ofType": null + } + } + }, + "defaultValue": null + }, + { + "name": "group", + "description": "Target Sequence database", + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "ENUM", + "name": "GroupReference", + "ofType": null + } + }, + "defaultValue": null + }, + { + "name": "groupId", + "description": "Database group identifier", + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "String", + "ofType": null + } + }, + "defaultValue": null + } + ], + "type": { + "kind": "OBJECT", + "name": "TargetAlignments", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "group_annotations_subscription", + "description": "Get group annotations", + "args": [ + { + "name": "filters", + "description": "Optional annotation filter by type or target identifier", + "type": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "INPUT_OBJECT", + "name": "AnnotationFilterInput", + "ofType": null + } + } + }, + "defaultValue": null + }, + { + "name": "group", + "description": "Query sequence database", + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "ENUM", + "name": "GroupReference", + "ofType": null + } + }, + "defaultValue": null + }, + { + "name": "groupId", + "description": "Database sequence identifier", + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "String", + "ofType": null + } + }, + "defaultValue": null + }, + { + "name": "sources", + "description": "List defining the annotation collections to be requested", + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "ENUM", + "name": "AnnotationReference", + "ofType": null + } + } + }, + "defaultValue": null + } + ], + "type": { + "kind": "OBJECT", + "name": "SequenceAnnotations", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + } + ], + "inputFields": null, + "interfaces": [], + "enumValues": null, + "possibleTypes": null + }, + { + "kind": "OBJECT", + "name": "TargetAlignments", + "description": null, + "fields": [ + { + "name": "aligned_regions", + "description": null, + "args": [], + "type": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "OBJECT", + "name": "AlignedRegions", + "ofType": null + } + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "coverage", + "description": null, + "args": [], + "type": { + "kind": "OBJECT", + "name": "Coverage", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "orientation", + "description": null, + "args": [], + "type": { + "kind": "SCALAR", + "name": "Int", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "target_id", + "description": null, + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "target_sequence", + "description": null, + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + } + ], + "inputFields": null, + "interfaces": [], + "enumValues": null, + "possibleTypes": null + }, + { + "kind": "OBJECT", + "name": "TargetIdentifiers", + "description": null, + "fields": [ + { + "name": "assembly_id", + "description": "This item references an assembly in core_assembly\n \nExamples:\n1, 2", + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "asym_id", + "description": "This item references an instance in core_polymer_entity_instance\n \nExamples:\nA, B, 1, 2", + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "entity_id", + "description": "This item references an entity in core_polymer_entity\n \nExamples:\n1, 2", + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "entry_id", + "description": "This item references an entry in core_entry\n \nExamples:\n101M, 1ACB", + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "interface_id", + "description": "This item references an interface in core_interface\n \nExamples:\n1, 2", + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "interface_partner_index", + "description": "This item references a partner of an interface in core_interface\n \nExamples:\nnull, null", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Int", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "target_id", + "description": "Database identifier associated to the annotation\n \nExamples:\n101M_1, 2UZI.C, P01112", + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "uniprot_id", + "description": "This item references a UniProt document in core_uniprot\n \nExamples:\nP01112", + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + } + ], + "inputFields": null, + "interfaces": [], + "enumValues": null, + "possibleTypes": null + }, + { + "kind": "OBJECT", + "name": "__Directive", + "description": null, + "fields": [ + { + "name": "name", + "description": "The __Directive type represents a Directive that a server supports.", + "args": [], + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "String", + "ofType": null + } + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "description", + "description": null, + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "isRepeatable", + "description": null, + "args": [], + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "Boolean", + "ofType": null + } + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "locations", + "description": null, + "args": [], + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "ENUM", + "name": "__DirectiveLocation", + "ofType": null + } + } + } + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "args", + "description": null, + "args": [ + { + "name": "includeDeprecated", + "description": null, + "type": { + "kind": "SCALAR", + "name": "Boolean", + "ofType": null + }, + "defaultValue": "false" + } + ], + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "OBJECT", + "name": "__InputValue", + "ofType": null + } + } + } + }, + "isDeprecated": false, + "deprecationReason": null + } + ], + "inputFields": null, + "interfaces": [], + "enumValues": null, + "possibleTypes": null + }, + { + "kind": "ENUM", + "name": "__DirectiveLocation", + "description": "An enum describing valid locations where a directive can be placed", + "fields": null, + "inputFields": null, + "interfaces": null, + "enumValues": [ + { + "name": "QUERY", + "description": "Indicates the directive is valid on queries.", + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "MUTATION", + "description": "Indicates the directive is valid on mutations.", + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "SUBSCRIPTION", + "description": "Indicates the directive is valid on subscriptions.", + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "FIELD", + "description": "Indicates the directive is valid on fields.", + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "FRAGMENT_DEFINITION", + "description": "Indicates the directive is valid on fragment definitions.", + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "FRAGMENT_SPREAD", + "description": "Indicates the directive is valid on fragment spreads.", + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "INLINE_FRAGMENT", + "description": "Indicates the directive is valid on inline fragments.", + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "VARIABLE_DEFINITION", + "description": "Indicates the directive is valid on variable definitions.", + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "SCHEMA", + "description": "Indicates the directive is valid on a schema SDL definition.", + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "SCALAR", + "description": "Indicates the directive is valid on a scalar SDL definition.", + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "OBJECT", + "description": "Indicates the directive is valid on an object SDL definition.", + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "FIELD_DEFINITION", + "description": "Indicates the directive is valid on a field SDL definition.", + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "ARGUMENT_DEFINITION", + "description": "Indicates the directive is valid on a field argument SDL definition.", + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "INTERFACE", + "description": "Indicates the directive is valid on an interface SDL definition.", + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "UNION", + "description": "Indicates the directive is valid on an union SDL definition.", + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "ENUM", + "description": "Indicates the directive is valid on an enum SDL definition.", + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "ENUM_VALUE", + "description": "Indicates the directive is valid on an enum value SDL definition.", + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "INPUT_OBJECT", + "description": "Indicates the directive is valid on an input object SDL definition.", + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "INPUT_FIELD_DEFINITION", + "description": "Indicates the directive is valid on an input object field SDL definition.", + "isDeprecated": false, + "deprecationReason": null + } + ], + "possibleTypes": null + }, + { + "kind": "OBJECT", + "name": "__EnumValue", + "description": null, + "fields": [ + { + "name": "name", + "description": null, + "args": [], + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "String", + "ofType": null + } + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "description", + "description": null, + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "isDeprecated", + "description": null, + "args": [], + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "Boolean", + "ofType": null + } + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "deprecationReason", + "description": null, + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + } + ], + "inputFields": null, + "interfaces": [], + "enumValues": null, + "possibleTypes": null + }, + { + "kind": "OBJECT", + "name": "__Field", + "description": null, + "fields": [ + { + "name": "name", + "description": null, + "args": [], + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "String", + "ofType": null + } + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "description", + "description": null, + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "args", + "description": null, + "args": [ + { + "name": "includeDeprecated", + "description": null, + "type": { + "kind": "SCALAR", + "name": "Boolean", + "ofType": null + }, + "defaultValue": "false" + } + ], + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "OBJECT", + "name": "__InputValue", + "ofType": null + } + } + } + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "type", + "description": null, + "args": [], + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "OBJECT", + "name": "__Type", + "ofType": null + } + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "isDeprecated", + "description": null, + "args": [], + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "Boolean", + "ofType": null + } + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "deprecationReason", + "description": null, + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + } + ], + "inputFields": null, + "interfaces": [], + "enumValues": null, + "possibleTypes": null + }, + { + "kind": "OBJECT", + "name": "__InputValue", + "description": null, + "fields": [ + { + "name": "name", + "description": null, + "args": [], + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "String", + "ofType": null + } + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "description", + "description": null, + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "type", + "description": null, + "args": [], + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "OBJECT", + "name": "__Type", + "ofType": null + } + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "defaultValue", + "description": null, + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "isDeprecated", + "description": null, + "args": [], + "type": { + "kind": "SCALAR", + "name": "Boolean", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "deprecationReason", + "description": null, + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + } + ], + "inputFields": null, + "interfaces": [], + "enumValues": null, + "possibleTypes": null + }, + { + "kind": "OBJECT", + "name": "__Schema", + "description": "A GraphQL Introspection defines the capabilities of a GraphQL server. It exposes all available types and directives on the server, the entry points for query, mutation, and subscription operations.", + "fields": [ + { + "name": "description", + "description": null, + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "types", + "description": "A list of all types supported by this server.", + "args": [], + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "OBJECT", + "name": "__Type", + "ofType": null + } + } + } + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "queryType", + "description": "The type that query operations will be rooted at.", + "args": [], + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "OBJECT", + "name": "__Type", + "ofType": null + } + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "mutationType", + "description": "If this server supports mutation, the type that mutation operations will be rooted at.", + "args": [], + "type": { + "kind": "OBJECT", + "name": "__Type", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "directives", + "description": "'A list of all directives supported by this server.", + "args": [], + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "OBJECT", + "name": "__Directive", + "ofType": null + } + } + } + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "subscriptionType", + "description": "'If this server support subscription, the type that subscription operations will be rooted at.", + "args": [], + "type": { + "kind": "OBJECT", + "name": "__Type", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + } + ], + "inputFields": null, + "interfaces": [], + "enumValues": null, + "possibleTypes": null + }, + { + "kind": "OBJECT", + "name": "__Type", + "description": null, + "fields": [ + { + "name": "kind", + "description": null, + "args": [], + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "ENUM", + "name": "__TypeKind", + "ofType": null + } + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "name", + "description": null, + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "description", + "description": null, + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "fields", + "description": null, + "args": [ + { + "name": "includeDeprecated", + "description": null, + "type": { + "kind": "SCALAR", + "name": "Boolean", + "ofType": null + }, + "defaultValue": "false" + } + ], + "type": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "OBJECT", + "name": "__Field", + "ofType": null + } + } + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "interfaces", + "description": null, + "args": [], + "type": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "OBJECT", + "name": "__Type", + "ofType": null + } + } + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "possibleTypes", + "description": null, + "args": [], + "type": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "OBJECT", + "name": "__Type", + "ofType": null + } + } + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "enumValues", + "description": null, + "args": [ + { + "name": "includeDeprecated", + "description": null, + "type": { + "kind": "SCALAR", + "name": "Boolean", + "ofType": null + }, + "defaultValue": "false" + } + ], + "type": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "OBJECT", + "name": "__EnumValue", + "ofType": null + } + } + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "inputFields", + "description": null, + "args": [ + { + "name": "includeDeprecated", + "description": null, + "type": { + "kind": "SCALAR", + "name": "Boolean", + "ofType": null + }, + "defaultValue": "false" + } + ], + "type": { + "kind": "LIST", + "name": null, + "ofType": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "OBJECT", + "name": "__InputValue", + "ofType": null + } + } + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "ofType", + "description": null, + "args": [], + "type": { + "kind": "OBJECT", + "name": "__Type", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "isOneOf", + "description": "This field is considered experimental because it has not yet been ratified in the graphql specification", + "args": [], + "type": { + "kind": "SCALAR", + "name": "Boolean", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "specifiedByURL", + "description": null, + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "specifiedByUrl", + "description": null, + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": true, + "deprecationReason": "This legacy name has been replaced by `specifiedByURL`" + } + ], + "inputFields": null, + "interfaces": [], + "enumValues": null, + "possibleTypes": null + }, + { + "kind": "ENUM", + "name": "__TypeKind", + "description": "An enum describing what kind of type a given __Type is", + "fields": null, + "inputFields": null, + "interfaces": null, + "enumValues": [ + { + "name": "SCALAR", + "description": "Indicates this type is a scalar. 'specifiedByURL' is a valid field", + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "OBJECT", + "description": "Indicates this type is an object. `fields` and `interfaces` are valid fields.", + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "INTERFACE", + "description": "Indicates this type is an interface. `fields` and `possibleTypes` are valid fields.", + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "UNION", + "description": "Indicates this type is a union. `possibleTypes` is a valid field.", + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "ENUM", + "description": "Indicates this type is an enum. `enumValues` is a valid field.", + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "INPUT_OBJECT", + "description": "Indicates this type is an input object. `inputFields` is a valid field.", + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "LIST", + "description": "Indicates this type is a list. `ofType` is a valid field.", + "isDeprecated": false, + "deprecationReason": null + }, + { + "name": "NON_NULL", + "description": "Indicates this type is a non-null. `ofType` is a valid field.", + "isDeprecated": false, + "deprecationReason": null + } + ], + "possibleTypes": null + } + ], + "directives": [ + { + "name": "include", + "description": "Directs the executor to include this field or fragment only when the `if` argument is true", + "locations": [ + "FIELD", + "FRAGMENT_SPREAD", + "INLINE_FRAGMENT" + ], + "args": [ + { + "name": "if", + "description": "Included when true.", + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "Boolean", + "ofType": null + } + }, + "defaultValue": null + } + ] + }, + { + "name": "skip", + "description": "Directs the executor to skip this field or fragment when the `if` argument is true.", + "locations": [ + "FIELD", + "FRAGMENT_SPREAD", + "INLINE_FRAGMENT" + ], + "args": [ + { + "name": "if", + "description": "Skipped when true.", + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "Boolean", + "ofType": null + } + }, + "defaultValue": null + } + ] + }, + { + "name": "deprecated", + "description": "Marks the field, argument, input field or enum value as deprecated", + "locations": [ + "FIELD_DEFINITION", + "ARGUMENT_DEFINITION", + "ENUM_VALUE", + "INPUT_FIELD_DEFINITION" + ], + "args": [ + { + "name": "reason", + "description": "The reason for the deprecation", + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "defaultValue": "\"No longer supported\"" + } + ] + }, + { + "name": "oneOf", + "description": "Indicates an Input Object is a OneOf Input Object.", + "locations": [ + "INPUT_OBJECT" + ], + "args": [] + }, + { + "name": "specifiedBy", + "description": "Exposes a URL that specifies the behaviour of this scalar.", + "locations": [ + "SCALAR" + ], + "args": [ + { + "name": "url", + "description": "The URL that specifies the behaviour of this scalar.", + "type": { + "kind": "NON_NULL", + "name": null, + "ofType": { + "kind": "SCALAR", + "name": "String", + "ofType": null + } + }, + "defaultValue": null + } + ] + } + ] + } + } +} \ No newline at end of file diff --git a/rcsbapi/sequence/schema.py b/rcsbapi/sequence/schema.py index 5e9b469..530a38a 100644 --- a/rcsbapi/sequence/schema.py +++ b/rcsbapi/sequence/schema.py @@ -103,7 +103,7 @@ def set_field_list(self, field_list: List[FieldNode]): self.field_list = field_list -class CoordSchema: +class SeqSchema: """ GraphQL schema defining available fields, types, and how they are connected. """ @@ -112,7 +112,7 @@ def __init__(self) -> None: """ GraphQL schema defining available fields, types, and how they are connected. """ - self.pdb_url: str = seq_const.API_ENDPOINT + self.pdb_url: str = seq_const.API_ENDPOINT + "/graphql" self.timeout: int = config.DATA_API_TIMEOUT # TODO: change? self.schema: Dict = self.fetch_schema() """JSON resulting from full introspection of the GraphQL schema""" @@ -151,8 +151,8 @@ def _request_root_types(self) -> Dict: Dict: JSON response of introspection request """ root_query = {"query": """query IntrospectionQuery{ __schema{ queryType{ fields{ name args - { name description type{ ofType{ name kind ofType{ inputFields {name type { ofType { kind ofType { ofType { kind name } } } } } kind name ofType{ name kind - } } } } } } } } }"""} + { name description type{ kind ofType{ name kind ofType{ inputFields {name type { kind ofType { name kind ofType { ofType { kind name ofType {kind name}} } } } } + kind name ofType{name kind} } } } } } } } }"""} response = requests.post(headers={"Content-Type": "application/json"}, json=root_query, url=self.pdb_url, timeout=self.timeout) return response.json() @@ -163,7 +163,7 @@ def _construct_root_dict(self) -> Dict[str, List[Dict[str, str]]]: Dict[str, List[Dict]]: Dict where keys are the type names. Values are lists of dictionaries with information about arguments. - ex: {"entry": [{'name': 'entry_id', 'description': '', 'kind': 'SCALAR', 'type': 'String'}]} + ex: {"alignments": [{'name': 'from', 'description': 'Query sequence database'...}, ...], ...} """ response = self._root_introspection root_dict: Dict[str, List[Dict[str, str]]] = {} @@ -174,22 +174,25 @@ def _construct_root_dict(self) -> Dict[str, List[Dict[str, str]]]: for arg_dict in arg_dict_list: arg_name = arg_dict["name"] arg_description = arg_dict["description"] - arg_kind = arg_dict["type"]["ofType"]["kind"] + arg_kind = arg_dict["type"]["kind"] arg_of_kind = "" - arg_of_kind_name = "" - if arg_kind == 'LIST': - arg_of_kind = arg_dict["type"]["ofType"]["ofType"]["kind"] - arg_of_kind_name = arg_dict["type"]["ofType"]["ofType"]["name"] - arg_type = self._find_type_name(arg_dict["type"]["ofType"]) + arg_of_type = "" + if arg_kind == "LIST" or arg_kind == "NON_NULL": + arg_of_kind = arg_dict["type"]["ofType"]["kind"] + arg_of_type = self._find_type_name(arg_dict["type"]["ofType"]) + input_fields = "" + if ("ofType" in arg_dict["type"]["ofType"]) and (arg_dict["type"]["ofType"]["ofType"] is not None): + if ("inputFields" in arg_dict["type"]["ofType"]["ofType"]) and (arg_dict["type"]["ofType"]["ofType"]["inputFields"] is not None): + input_fields = arg_dict["type"]["ofType"]["ofType"]["inputFields"] if root_name not in root_dict: root_dict[root_name] = [] root_dict[root_name].append({ "name": arg_name, "description": arg_description, - "type": arg_type, "kind": arg_kind, "of_kind": arg_of_kind, - "of_kind_name": arg_of_kind_name + "of_type": arg_of_type, + "input_fields": input_fields }) return root_dict @@ -222,7 +225,7 @@ def fetch_schema(self) -> Dict: return schema_response.json() logger.info("Loading data schema from file") current_dir = os.path.dirname(os.path.abspath(__file__)) - json_file_path = os.path.join(current_dir, "../", "resources", "data_api_schema.json") + json_file_path = os.path.join(current_dir, "resources", "seq_api_schema.json") with open(json_file_path, "r", encoding="utf-8") as schema_file: return json.load(schema_file) @@ -499,7 +502,7 @@ def find_field_names(self, search_string: str) -> List[str]: def construct_query( self, query_type: str, - query_args: Union[Dict[str, Dict], Dict[str, str]], + query_args: Union[Dict[str, str], Dict[str, list]], return_data_list: List[str], suppress_autocomplete_warning=False ) -> Dict: @@ -552,7 +555,7 @@ def construct_query( def _construct_query_rustworkx( self, query_type: str, - query_args: Union[Dict[str, Dict], Dict[str, str]], + query_args: Union[Dict[str, str], Dict[str, list]], return_data_list: List[str], suppress_autocomplete_warning: bool = False, ) -> Dict: @@ -754,7 +757,7 @@ def _construct_query_rustworkx( json_query = {"query": f"{query}"} return json_query - def format_args(self, arg_dict: Dict[str, str], input_value: str) -> str: + def format_args(self, arg_dict: Union[Dict[str, list], Dict[str, str]], input_value: Union[str, List[str]]) -> str: """Add double quotes or omit quotes around a single GraphQL argument Args: @@ -765,18 +768,19 @@ def format_args(self, arg_dict: Dict[str, str], input_value: str) -> str: str: returns input value formatted with quotes, no quotes, or as a list """ format_arg = "" - if arg_dict["type"] == "String": - # If arg type is string, add double quotes around value - format_arg += f'{arg_dict["name"]}: "{input_value}"' - elif arg_dict["kind"] == "LIST": - if ["of_kind_name"] == "String": + if arg_dict["kind"] == "LIST" or arg_dict["of_kind"] == "LIST": + if arg_dict["of_type"] == "String": # Add double quotes around each item format_arg += f'{arg_dict["name"]}: {str(input_value).replace("'", '"')}' else: - # Remove single quotes + # Remove single quotes if not string format_arg += f'{arg_dict["name"]}: {str(input_value).replace("'", "")}' + elif arg_dict["of_type"] == "String": + # If arg type is string, add double quotes around value + format_arg += f'{arg_dict["name"]}: "{input_value}"' else: - format_arg += f'{arg_dict["name"]}: {input_value}' + assert isinstance(input_value, str) + format_arg += f"{arg_dict["name"]}: {input_value}" return format_arg def _find_idx_path(self, dot_path: List[str], idx_list: List[int], node_idx: int) -> List[int]: @@ -964,3 +968,62 @@ def find_paths(self, input_type: str, return_data_name: str, descriptions: bool return description_dict dot_paths.sort() return dot_paths + + def read_enum(self, type_name: str) -> List[str]: + """parse given type name into a list of enumeration values + + Args: + type_name (str): GraphQL type name + """ + for type_dict in self.schema["data"]["__schema"]["types"]: + if type_dict["name"] == type_name: + enum_values = [] + for value in type_dict["enumValues"]: + enum_values.append(value["name"]) + return enum_values + + def check_typing(self, query_type: str, enum_types, args: Dict[str, Any]): + """Check that arguments match typing specified in schema + + Args: + query_type (str): Name of query field (annotations, alignments, etc) + enum_types (Enum): Enum class of GraphQL types that are enumerations. + Values are lists of valid strings corresponding to enumerations + kwargs**: key word arguments corresponding to query-specific arguments + """ + error_list = [] + arg_dict_list = self._root_dict[query_type] + for arg_dict in arg_dict_list: + arg_type = arg_dict["of_type"] + arg_name = arg_dict["name"] + + if arg_name not in args: + continue + + if arg_dict["kind"] == "NON_NULL": + if arg_dict["of_kind"] == "ENUM": + if args[arg_name] not in enum_types[arg_type].value: + error_list.append( + f"Invalid value '{args[arg_name]}' for '{arg_name}': valid values are {enum_types[arg_type].value}" + ) + + if arg_dict["kind"] == "LIST": + if not isinstance(args[arg_name], list): + error_list.append( + f"'{arg_name}' must be a list" + ) + + # List of ENUMs + if arg_dict["kind"] == "NON_NULL": + if arg_dict["of_kind"] == "LIST": + mismatch_type = [item for item in args[arg_name] if item not in enum_types[arg_type].value] + if mismatch_type: + raise ValueError( + f"Invalid value(s) {mismatch_type} for '{arg_name}': valid values are {enum_types[arg_type].value}" + ) + + if error_list: + raise ValueError( + "\n" + " " + + "\n ".join(error_list) + ) diff --git a/tests/test_data_query.py b/tests/test_data_query.py index 02f8431..5290b85 100644 --- a/tests/test_data_query.py +++ b/tests/test_data_query.py @@ -1,6 +1,6 @@ ## -# File: testquery.py -# Author: +# File: test_data_query.py +# Author: Ivana Truong # Date: # Version: # diff --git a/tests/test_data_schema.py b/tests/test_data_schema.py index 81c234d..03da595 100644 --- a/tests/test_data_schema.py +++ b/tests/test_data_schema.py @@ -1,6 +1,6 @@ ## -# File: testschema.py -# Author: +# File: test_data_schema.py +# Author: Ivana Truong # Date: # Version: # diff --git a/tests/test_seq_query.py b/tests/test_seq_query.py new file mode 100644 index 0000000..d996f3d --- /dev/null +++ b/tests/test_seq_query.py @@ -0,0 +1,176 @@ +## +# File: testschema.py +# Author: Ivana Truong +# Date: +# Version: +# +# Update: +# +# +## +""" +Tests for all functions of the schema file. (Work in progress) +""" + +__docformat__ = "google en" +__author__ = "" +__email__ = "" +__license__ = "" + +import logging + +# import platform +# import resource +import time +import unittest +# import rustworkx as rx +# import networkx as nx + +from rcsbapi.sequence.query import alignments, group_alignments, annotations, group_annotations, group_annotations_summary, AnnotationFilterInput + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +class SeqTests(unittest.TestCase): + def setUp(self): + self.__startTime = time.time() + logger.info("Starting %s at %s", self.id().split(".")[-1], time.strftime("%Y %m %d %H:%M:%S", time.localtime())) + + def tearDown(self) -> None: + endTime = time.time() + logger.info("Completed %s at %s (%.4f seconds)", self.id().split(".")[-1], time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime) + + def testAnnotations(self) -> None: + with self.subTest(msg="1. Annotations query with filter"): + try: + query_obj = annotations( + reference="NCBI_GENOME", + sources=["PDB_INSTANCE"], + queryId="NC_000001", + filters=[ + AnnotationFilterInput( + field="TYPE", + operation="EQUALS", + values=["BINDING_SITE"], + source="UNIPROT" + ) + ], + return_data_list=["features.description"] + ) + query_obj.exec() + except Exception as error: + self.fail(f"Failed unexpectedly: {error}") + + def testAlignments(self) -> None: + with self.subTest(msg="1. Alignments query without filter"): + try: + query_obj = alignments( + from_="NCBI_PROTEIN", + to="PDB_ENTITY", + queryId="XP_642496", + return_data_list=["target_id"] + ) + query_obj.exec() + except Exception as error: + self.fail(f"Failed unexpectedly: {error}") + + def testGroupAlignments(self) -> None: + with self.subTest(msg="1. group_alignments query without filter"): + try: + query_obj = group_alignments( + group="MATCHING_UNIPROT_ACCESSION", + groupId="P01112", + return_data_list=["target_id"], + ) + query_obj.exec() + except Exception as error: + self.fail(f"Failed unexpectedly: {error}") + with self.subTest(msg="2. group_alignments query with filter"): + try: + query_obj = group_alignments( + group="MATCHING_UNIPROT_ACCESSION", + groupId="P01112", + return_data_list=["target_id"], + filter=["8CNJ_1", "8FG4_1"] + ) + query_obj.exec() + except Exception as error: + self.fail(f"Failed unexpectedly: {error}") + + def testGroupAnnotations(self) -> None: + with self.subTest(msg="1. group_annotations query without filter"): + try: + query_obj = group_annotations( + group="MATCHING_UNIPROT_ACCESSION", + groupId="P01112", + sources=["PDB_ENTITY"], + return_data_list=["target_id"] + ) + query_obj.exec() + except Exception as error: + self.fail(f"Failed unexpectedly: {error}") + with self.subTest(msg="2. group_annotations query with filter"): + try: + query_obj = group_annotations( + group="MATCHING_UNIPROT_ACCESSION", + groupId="P01112", + sources=["PDB_ENTITY"], + filters=[ + AnnotationFilterInput( + field="TYPE", + operation="EQUALS", + values=["BINDING_SITE"], + source="UNIPROT" + ) + ], + return_data_list=["target_id"] + ) + query_obj.exec() + except Exception as error: + self.fail(f"Failed unexpectedly: {error}") + + def testGroupAnnotationsSummary(self): + with self.subTest(msg="1. group_annotations_summary query without filter"): + try: + query_obj = group_annotations_summary( + group="MATCHING_UNIPROT_ACCESSION", + groupId="P01112", + sources=["PDB_INSTANCE"], + return_data_list=["target_id", "features.type"] + ) + query_obj.exec() + except Exception as error: + self.fail(f"Failed unexpectedly: {error}") + with self.subTest(msg="2. group_annotations_summary query with filter"): + try: + query_obj = group_annotations_summary( + group="MATCHING_UNIPROT_ACCESSION", + groupId="P01112", + sources=["PDB_INSTANCE"], + filters=[ + AnnotationFilterInput( + field="TYPE", + operation="EQUALS", + values=["BINDING_SITE"], + source="UNIPROT" + ) + ], + return_data_list=["target_id", "features.type"] + ) + query_obj.exec() + except Exception as error: + self.fail(f"Failed unexpectedly: {error}") + + +def buildQuery(): + suiteSelect = unittest.TestSuite() + suiteSelect.addTest(SeqTests("testAnnotations")) + suiteSelect.addTest(SeqTests("testAlignments")) + suiteSelect.addTest(SeqTests("testGroupAlignments")) + return suiteSelect + + +if __name__ == "__main__": + mySuite = buildQuery() + unittest.TextTestRunner(verbosity=2).run(mySuite) From 8f34db63a98c97dc7b4ca7b58ad2f9a5d634fe62 Mon Sep 17 00:00:00 2001 From: Ivana Truong Date: Mon, 2 Dec 2024 16:02:15 -0600 Subject: [PATCH 03/12] add ruff config, rename query/schema, edit tests, edit construct_query (WIP) --- pyproject.toml | 554 ++++++++++++++++ rcsbapi/sequence/__init__.py | 8 +- rcsbapi/sequence/{query.py => seq_query.py} | 39 +- rcsbapi/sequence/{schema.py => seq_schema.py} | 605 ++++++++++-------- tests/test_seq_query.py | 38 +- 5 files changed, 931 insertions(+), 313 deletions(-) create mode 100644 pyproject.toml rename rcsbapi/sequence/{query.py => seq_query.py} (92%) rename rcsbapi/sequence/{schema.py => seq_schema.py} (64%) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..dacd63f --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,554 @@ +# SPDX-FileCopyrightText: Copyright 2020-2024, Contributors to Tyrannosaurus +# SPDX-PackageHomePage: https://github.com/dmyersturnbull/tyrannosaurus +# SPDX-License-Identifier: Apache-2.0 + +# +######################################################################################### +# [build-system] +######################################################################################### + +[build-system] +build-backend = "hatchling.build" +requires = ["hatchling~=1.25"] + +# +######################################################################################### +# [project] +######################################################################################### + +[project] +name = "cicd" +version = "0.0.1-alpha0" +requires-python = "~=3.12" +# 'readme' |--> 'description' in importlib.metadata +readme = { file = "README.md", content-type = "text/markdown" } +# 'description' |--> 'summary' in importlib.metadata +description = "CI/CD test repo" +maintainers = [ + { name = "Douglas Myers-Turnbull", email = " dmyersturnbull@gmail.com" }, +] +authors = [ + { name = "Douglas Myers-Turnbull", email = " dmyersturnbull@gmail.com" }, +] +keywords = ["python", "ci", "cd"] +# The Python Packaging Authority has contradictory recommendations for specifying the license: +# - If the license is well-known, add a Trove classifier and omit `project.license`. +# - If you do that, tools like pip won't know what the license is. +# - PEP 639, which is actively developed but not targeted for a release, +# deprecates the Trove classifiers in favor of `project.license` as an SPDX expression: +# license = "Apache-2.0" +# license-files = ["LICENSE.txt", "3rd-party-notices/**/*.txt"] +# The former maps to the core metadata `License-Expression`, now a string. +# Unfortunately, following that now breaks everything. +# note: PyPI is required to error if License-Expression and a Trove license classifier are present. +# see: https://peps.python.org/pep-0639/#deprecate-license-classifiers +# see: https://discuss.python.org/t/pep-639-round-3-improving-license-clarity-with-better-package-metadata/53020 +# see: https://github.com/python/peps/pull/3866 + +# which maps to License-Expression +license = { text = "Apache-2.0" } +classifiers = [ + "Development Status :: 2 - Pre-Alpha", + "Environment :: Console", + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", + "Natural Language :: English", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3 :: Only", + "Topic :: Software Development :: Build Tools", + "Topic :: Software Development :: Code Generators", + "Typing :: Typed", +] +dependencies = [ + "loguru >=0.7.2", + "platformdirs >=4.3.2", + "rich >=13.9.1", + "typer-slim >=0.12.5", + "tzdata >=2024.2; platform_system == 'Windows'", # /usr/share/zoneinfo uses IANA zones; Windows does not +] + +[project.optional-dependencies] +cli = ["typer-slim[standard] >=0.12.5"] + +# +#=================================== scripts =================================# +# +[project.scripts] +# :tyranno: ${project.name} = "${project.name}.__main__:cli" +tyranno = "cicd.__main__:cli" + +# +#==================================== URIs ===================================# +# +[project.urls] +# :tyranno: "https://github.com/${.frag}" +Homepage = "https://github.com/dmyersturnbull/cicd" +# :tyranno: "https://github.com/${.frag}" +Source = "https://github.com/dmyersturnbull/cicd" +# :tyranno: Documentation = https://${.vendor}.github.io/${project.name} +Documentation = "https://dmyersturnbull.github.io/cicd" +# :tyranno: "Issue Tracker" = ${.home}/issues +Tracker = "https://github.com/dmyersturnbull/cicd/issues" +# :tyranno: Download = "https://pypi.org/project/${project.name}" +Download = "https://pypi.org/project/cicd/" +# :tyranno: "Release Notes" = "https://github.com/${.frag}/releases" +"Release Notes" = "https://github.com/dmyersturnbull/cicd/releases" +# :tyranno: #Sponsor = "https://${.vendor}.github.io/${project.name}/placeholder#sponsor" +Sponsor = "https://${.vendor}.github.io/${project.name}/placeholder#sponsor" + +# +######################################################################################### +# [tool.hatch] +######################################################################################### + +# +#============================= Hatch environments ============================# +# + +[tool.hatch.envs.default] +skip-install = true +dependencies = ["pre-commit >=4.0", "mypy >=1.11", "ruff >=0.6.9"] + +[tool.hatch.envs.test] +skip-install = false +dependencies = [ + "hypothesis >=6.110", + "pytest >=8.3", + "tzdata >=2024.2; platform_system == 'Windows'", + "coverage[toml] >=7.6.1", + "pytest-cov >=5.0", +] + +[tool.hatch.envs.docs] +skip-install = true +dependencies = [ + "mkdocs >=1.6", + "mike >=2.1", + "mkdocs-literate-nav >=0.6.1", + "mkdocs-material >=9.5.32", + "mkdocs-minify-plugin >=0.8", + "mkdocstrings[python] >=0.26", + "mkdocs-table-reader-plugin >=3.1", +] + +# +#=============================== Hatch scripts ===============================# + +# +# -------------------- default environment scripts ------------------ +[tool.hatch.envs.default.scripts] + +# 'clean' removes unnecessary files (always safe to run) +clean = "tyranno clean {args}" + +# hatch now has a built-in 'fmt' subcommand, which runs ruff +# https://hatch.pypa.io/latest/cli/reference/#hatch-fmt + +# 'format' formats and fixes issues but does not complain +format = [ + "ruff format", + "ruff check --fix-only", + "npx prettier .", +] + +# 'format-python' just excludes Prettier +format-python = ["ruff format", "ruff check --fix-only"] + +# 'check' only checks but never fixes +check = ["ruff check --no-fix .", "mypy --non-interactive src tests"] + +# only checks with ruff (mypy has a lot of false positives) +check-ruff = "ruff check --no-fix . {args}" + +# to complement 'check-ruff' +check-mypy = "mypy --non-interactive src tests {args}" + +# only check security via Bandit; we require these rules to pass +check-security = "ruff check --select S --no-fix ." + +# +# --------------------- `test` environment scripts ------------------ +[tool.hatch.envs.test.scripts] + +# hatch now has a hatch-test subcommand +# https://hatch.pypa.io/latest/cli/reference/#hatch-test +# it's an alternative to pytest +# hatch test --randomize --cover + +# 'test' uses args in [tool.pytest] section (you can pass more) +test = "pytest {args}" + +# +# --------------------- `docs` environment scripts ------------------ +[tool.hatch.envs.docs.scripts] + +# build docs and fail for any warning +build-docs = "mkdocs build --clean --strict {args}" +# preview docs (only the current version) +# also builds, but only prints warnings +serve-docs = "mkdocs serve {args}" + +# - - - - scripts for CI/CD - - - - +# - - - - !! DANGER ZONE - - - - +# deploys the **latest** docs to GitHub Pages +# this will overwrite the 'latest' alias (which is a tag on the GH Pages branch) +x-deploy-docs = "mike deploy {args} latest --update-aliases" + +# - - - - scripts to deploy docs - - - +# - - - - !! DANGER ZONE - - - +# deletes all versions of docs from gh-pages for rebuilding +x-obliterate-docs = "mike delete --all" +# needed only if the docs were deleted or a prior deployment failed +x-redeploy-docs = "mike deploy --strict {args}" +# serves all versions of docs, complete with version selector +x-serve-docs = "mike serve --strict {args}" + +# +######################################################################################### +# [tool.ruff] +######################################################################################### + +[tool.ruff] + +# +#=================================== Ruff basics =============================# + +line-length = 120 # 120 is hatch's default via 'hatch init' +include = [ + "*.py", # Source + "pyproject.toml", # This file (RUF rules only) + "*.ipynb", # Jupyter notebooks +] + +# +#=================================== Ruff lint ==============================# + +[tool.ruff.lint] + +# +# ------------------------- Ruff misc. options ---------------------- +preview = true +explicit-preview-rules = true + +# +# ---------------------- Ruff selected rule sets -------------------- + +select = [ + "A", # flake8-builtins + #"AIR", # Airflow //DISABLED: enable if using airflow// + "ANN", # flake8-annotations + #"ARG", # flake8-unused-args //DISABLED: correct in overrides// + "ASYNC", # flake8-async + "B", # flake8-bugbear + #"BLE", # flake8-blind-exception (try: * except: only) //DISABLED: covered by E722 (?)// + "C", # flake8-comprehensions + "C4", # flake8-comprehensions + #"C90", # mmcabe //DISABLED: useful but thin-lined and time-intensive// + #"COM", # flake8-commas //DISABLED: handled by formatter// + #"CPY", # flake8-copyright //DISABLED: handle carefully instead; also unmaintained// + "D", # pydocstyle + #"DJ", # flake8-django //DISABLED: enable if using Django// + "DTZ", # flake8-datetimez + #"E", # pycodestyle ERRORs //DISABLED: almost all handled by formatter// + "E502", # redundant backslash + "E71", # == None, != False, not in, not is, etc. + "E72", # bare except + "E73", # lambda assignment + "E74", # easily confused characters in names + "E9", # can't lint -- syntax error, etc. + "EM", # flake8-errmsg + #"ERA", # eradicate (commented-out code) //DISABLED: catches code in comments, etc.// + #"EXE", # flake8-executable //DISABLED: pre-commit shebang rules instead// + "F", # pyflakes + "FA", # flake8-future-annotations + #"FBT", # flake8-boolean-trap //DISABLED: debatable and sometimes unavoidable// + #"FIX", # flake8-fix*me //DISABLED: bans TO*DO, etc. and not configurable// + "FLY", # flynt (https://pypi.org/project/flynt/; updates .format) + "FURB", # refurb (misc., mostly fixable) + #"E", # flake8 errors //DISABLED: handled by formatter or nitpicky// + "G", # flake8-logging-format + #"I", # isort //DISABLED: handled by formatter// + #"ICN", # flake8-import-conventionals //DISABLED: handled by formatter// + "INP", # flake8-no-pep420 (require __init__.py to import) + "INT", # flake8-gettext (i18n) + "ISC", # flake8-implicit-str-concat + "LOG", # flake8-logging + "N", # pep8-naming + #"NPY", # numpy-specific rules //DISABLED: enable if using numpy// + #"PD", # pandas-vet //DISABLED: enable if using pandas// + "PERF", # perflint + "PGH", # pygrep-hooks + "PIE", # flake8-pie (misc. lint rules) + "PL", # pylint + "PT", # flake8-pytest-style + "PTH", # flake8-use-pathlib + #"PYI", # flake8-pyi (rules for stub files) //DISABLED: auto-generate stub files// + #"Q", # flake8-quotes //DISABLED: handled by formatter// + #"RSE", # flake8-raise //DISABLED: handled by formatter// + "RET", # flake8-return (fixable) + "RUF", # Ruff-specific tests + "S", # bandit + "SIM", # flake8-simplify + "SLF", # flake8-self (private access) + "SLOT", # flake8-slots + "T10", # flake8-debugger (bans debugger) + "T20", # flake8-print (bands print and pprint) + "TD", # flake8-to*do (mandates syntax; note: use 'to-do' in prose) + "TCH", # flake8-type-checking + "TID", # flake8-tidy-imports (bans relative imports) + "TRY", # tryceratops (try-except-raise) + "UP", # pyupgrade + #"W", # warnings //DISABLED: most fixed by formatter// + "W60", # E605 invalid escape character + "YTT", # flake8-2020 (incorrect Python vr checks) + "RUF", # Ruff-specific rules +] + +# +# ------------------------ Ruff ignored rules ---------------------- +ignore = [ + "COM812", # contradicts Ruff formatter + "D107", # missing docstring in __init__ (put in class docstring) + "D212", # multi-line docstring start (contradictory) + "E203", # clons with space before (sometimes useful expand) + "E225", # missing whitespace around operator (sometimes useful to condense) + "E501", # line > 79 chars (we use Ruff's formatter) + "INP001", # missing __init__ -- false positives + "ISC001", # contradicts Ruff formatter + "PLR0912", # too many branches + "PLR1711", # return None at end + "PLR0916", # too many boolean expressions + "PLR2044", # line with empty comment + "PLR6301", # 'self' not used -- correct for overrides etc.; and required in +] + +# +# ------------------------ Ruff unfixable rules --------------------- +unfixable = [ + "F601", # repeated key in dict {} + "F602", # repeated key in dict() + "F811", # unused and redefined + "F841", # unused variable + "PGH004", # blanketed noqa + "PLE2510", # invalid unescaped characters + "RUF100", # unused noqa + "SIM112", # uncapitalized env var + "TD006", # 'to*do' -> 'TO*DO' + "W605", # invalid escape sequence +] + +# +# ----------------------- Ruff delegated options -------------------- + +flake8-bandit.check-typed-exception = true +pydocstyle.convention = "google" +flake8-tidy-imports.ban-relative-imports = "all" + +[tool.ruff.lint.pylint] +max-nested-blocks = 3 # default is 5 + +# +# ------------------------ Ruff per-file ignores -------------------- +[tool.ruff.lint.per-file-ignores] +"tests/**/*" = [ + "INP001", # missing __init__ + "PLR2004", # magic value + "S101", # assert + "TID252", # relative import + "S105", # possible password + "S106", + "S107", + "S108", # harcoded temp file +] + +# +######################################################################################### +# [tool.pytest] +######################################################################################### + +# A future version of pytest will create [tool.pytest] +[tool.pytest.ini_options] + +# show log output from the tests +# in the tests/ code, name the logger {pkg}-TEST to differentiate +log_cli = true +log_cli_level = "INFO" +log_cli_format = "%(asctime)s [%(levelname)8s] %(name)s: %(message)s (%(filename)s:%(lineno)s)" +log_cli_date_format = "%Y-%m-%d %H:%M:%S" + +doctest_optionflags = [ + "DONT_ACCEPT_TRUE_FOR_1", # 1 is not the same as True + "NORMALIZE_WHITESPACE", # allows us to wrap expected output to 120 lines (ignores newlines) +] + +# create markers so we can disable internet (or network-connected), or very slow tests +markers = [ + #"integration: marks integration tests (deselect with `-m 'not integration'`)", + "slow: marks tests that take a long time to run (select with `-m slow`)", + "net: marks tests that require internet access (select with `-m net`)", + "ux: marks tests that require manual input such as in-browser (select with `-m ux`)", +] + +pythonpath = "./src" # critical! +# coverage stops recursing after it finds one dir without an __init__.py +# so if it finds src/java-app before src/pyapp, it won't find pyapp +# So specify exactly which directories to test +# :tyranno: addopts = """ +# :tyranno: --cov=src/${project.name} +addopts = """ + --cov=src/cicd \ + --cov-report json:.coverage.json \ + --cov-report term \ + --cov-config=pyproject.toml \ + --random-order \ + --doctest-modules \ + --strict-markers \ + -m 'not (slow or net or ux)' \ + tests/ +""" + +# +######################################################################################### +# [tool.coverage] +######################################################################################### + +[tool.coverage.run] +data_file = ".coverage.json" +branch = true # quantify % coverage of execution branches +parallel = true # note that pytest-cov overrides this +# :tyranno: source_pkgs = ["src/${project.name}"] +source_pkgs = ["cicd"] +omit = ["src/**/__main__.py"] + +[tool.coverage.paths] +source = ["src/"] + +[tool.coverage.report] +fail_under = 50 # 50% coverage required +precision = 1 # n decimal points for coverage % +show_missing = true +exclude_lines = [ + "nocov", + "pragma: no cover", + "def __repr__", + "def __str__", + "raise AssertionError", + "raise NotImplementedError", + "if __name__ == \"__main__\":", + "if TYPE_CHECKING:", + "if typing.TYPE_CHECKING:", +] + +# +######################################################################################### +# [tool.mypy] +######################################################################################### + +[tool.mypy] + +files = ["src/**/*.py", "tests/**/*.py"] + +# strictness options +disallow_any_generics = true +disallow_subclassing_any = true +disallow_untyped_defs = true +warn_redundant_casts = true +warn_no_return = true +warn_return_any = true + +# misc. options +# warn if an unreachable execution branch is found +warn_unreachable = true +# warn if a `# type: ignore` is unnecessary +warn_unused_ignores = true +# no testing 'abc' == 123 +strict_equality = true + +# formatting output options +pretty = true +show_column_numbers = true +show_error_code_links = true + +# +######################################################################################### +# [tool.tyranno] +######################################################################################### + +[tool.tyranno] + +# Note: All trash globs are automatically excluded from targets. +# So, e.g., .py files in cache dirs are never synced. +target_globs = [ + "**/*.yaml", + "**/*.toml", + "**/*.md", + "**/Dockerfile", + "**/*.py", + "**/*.ts", + "**/*.js", + "**/*.css", + "**/*.less", +] + +trash_globs = [ + "**/*~", + "**/*.bak", + "**/*[.~]t[e]mp", + "**/*.class", + "**/*.py[codi]", + "**/.*_cache", + "**/__pycache__", + "**/cython_debug", + "build/", + ".coverage.json", +] + +# +#============================ Tyranno remote sources =========================# +# +[tool.tyranno.fetch] + +# Sync is performed internally after these files are pulled down. +# That enables us to pull files like SECURITY.md, +# even though they contain strings specific to this project (e.g. URIs). +uris = [ + "https://github.com/dmyersturnbull/tyranno/blob/main/mkdocs.yaml", + "https://github.com/dmyersturnbull/tyranno/blob/main/CITATION.cff", + "https://github.com/dmyersturnbull/tyranno/blob/main/CONTRIBUTING.md", + "https://github.com/dmyersturnbull/tyranno/blob/main/SECURITY.md", + "https://github.com/dmyersturnbull/tyranno/blob/main/.gitignore", + "https://github.com/dmyersturnbull/tyranno/blob/main/.dockerignore", + "https://github.com/dmyersturnbull/tyranno/blob/main/.editorconfig", + "https://github.com/dmyersturnbull/tyranno/blob/main/.gitattributes", + "https://github.com/dmyersturnbull/tyranno/blob/main/.prettierignore", + "https://github.com/dmyersturnbull/tyranno/blob/main/.prettierrc.toml", + "https://github.com/dmyersturnbull/tyranno/blob/main/.scrutinizer.yaml", + "https://github.com/dmyersturnbull/tyranno/blob/main/.pre-commit-config.yaml", + "https://github.com/dmyersturnbull/tyranno/blob/main/docs/gen_ref_pages.py", + "https://github.com/dmyersturnbull/tyranno/blob/main/docs/js/", + "https://github.com/dmyersturnbull/tyranno/blob/main/docs/overrides/", + "https://github.com/dmyersturnbull/tyranno/blob/main/.github/dependabot.yaml", + "https://github.com/dmyersturnbull/tyranno/blob/main/.github/release.yaml", + "https://github.com/dmyersturnbull/tyranno/tree/main/.github/workflows/", + "https://github.com/dmyersturnbull/tyranno/tree/main/.github/DISCUSSION_TEMPLATE/", + "https://github.com/dmyersturnbull/tyranno/tree/main/.github/ISSUE_TEMPLATE/", + "https://github.com/dmyersturnbull/tyranno/tree/main/.github/PULL_REQUEST_TEMPLATE/", +] + +# +#================================ Tyranno data ===============================# +# +# You can add any key-value pairs you want here; +# they're only referenced by 'tyranno' sync comments. +# No keys here have specific meanings. +[tool.tyranno.data] +vendor = "dmyersturnbull" +frag = "${.vendor}/${project.name}" +copyright = "Copyright ${~|year(now_utc())~}, Contributors to ${project.name}" +doi = "10.5281/zenodo.4485186" # << Dict: return query - def exec(self) -> Dict: + def exec(self) -> Dict[str, Any]: """execute given query and return JSON response""" # Assert attribute exists for mypy assert hasattr(self, "_query"), \ f"{self.__class__.__name__} must define '_query' attribute." - + print(self._query) response_json = requests.post( json=dict(self._query), url=seq_const.API_ENDPOINT + "/graphql", timeout=config.DATA_API_TIMEOUT ).json() self._parse_gql_error(response_json) - return response_json + return dict(response_json) - def get_editor_link(self): + def get_editor_link(self) -> str: """Get link to GraphiQL editor with given query populated""" editor_base_link = str(seq_const.API_ENDPOINT) + "/graphiql" + "/index.html?query=" + assert hasattr(self, "_query") # for mypy return editor_base_link + urllib.parse.quote(str(self._query["query"])) - def _parse_gql_error(self, response_json: Dict[str, Any]): + def _parse_gql_error(self, response_json: Dict[str, Any]) -> None: """Look through responses to see if there are errors. If so, throw an HTTP error, """ if "errors" in response_json.keys(): error = response_json["errors"][0] @@ -101,7 +102,7 @@ def _parse_gql_error(self, response_json: Dict[str, Any]): @dataclass(frozen=True) -class alignments(Query): +class Alignments(Query): """ Get sequence alignments @@ -119,18 +120,20 @@ class alignments(Query): return_data_list: List[str] range: Optional[List[int]] = None suppress_autocomplete_warning: bool = False - _query: MappingProxyType = MappingProxyType({}) + _query: MappingProxyType[str, Any] = MappingProxyType({}) + offset: Optional[int] = None + first: Optional[int] = None - def to_dict(self) -> Dict: + def to_dict(self) -> Dict[str, Any]: return super().to_dict() - def __post_init__(self): + def __post_init__(self) -> None: query = super().construct_query("alignments") object.__setattr__(self, "_query", query) @dataclass(frozen=True) -class annotations(Query): +class Annotations(Query): """ Get sequence annotations @@ -150,18 +153,18 @@ class annotations(Query): filters: Optional[list["AnnotationFilterInput"]] = None range: Optional[List[int]] = None suppress_autocomplete_warning: bool = False - _query: MappingProxyType = MappingProxyType({}) + _query: MappingProxyType[str, Any] = MappingProxyType({}) def to_dict(self) -> Dict: return super().to_dict() - def __post_init__(self): + def __post_init__(self) -> None: query = super().construct_query("annotations") object.__setattr__(self, "_query", query) @dataclass(frozen=True) -class group_alignments(Query): +class GroupAlignments(Query): """ Get alignments for structures in groups @@ -176,7 +179,9 @@ class group_alignments(Query): return_data_list: list[str] filter: Optional[list[str]] = None suppress_autocomplete_warning: bool = False - _query: MappingProxyType = MappingProxyType({}) + _query: MappingProxyType[str, Any] = MappingProxyType({}) + offset: Optional[int] = None + first: Optional[int] = None def to_dict(self) -> Dict: return super().to_dict() @@ -187,7 +192,7 @@ def __post_init__(self): @dataclass(frozen=True) -class group_annotations(Query): +class GroupAnnotations(Query): """ Get annotations for structures in groups @@ -216,7 +221,7 @@ def __post_init__(self): @dataclass(frozen=True) -class group_annotations_summary(Query): +class GroupAnnotationsSummary(Query): """ Get a positional summary of group annotations @@ -268,7 +273,7 @@ def __init__( self.values = values self.source = source - def to_string(self): + def to_string(self) -> str: """Generate string to insert in GraphQL query based on GraphQL schema""" input_field_specs = [] diff --git a/rcsbapi/sequence/schema.py b/rcsbapi/sequence/seq_schema.py similarity index 64% rename from rcsbapi/sequence/schema.py rename to rcsbapi/sequence/seq_schema.py index 530a38a..ba0959b 100644 --- a/rcsbapi/sequence/schema.py +++ b/rcsbapi/sequence/seq_schema.py @@ -1,7 +1,10 @@ +"""Fetching and Parsing API's GraphQL schema.""" + +from __future__ import annotations import logging import json -from typing import List, Dict, Union, Any, Optional -import os +from typing import Any +from pathlib import Path import requests from graphql import build_client_schema import rustworkx as rx @@ -24,7 +27,8 @@ class FieldNode: """ - Node representing GraphQL field + Node representing GraphQL field. + name (str): field name description (str): field description redundant (bool): whether field name is redundant in schema @@ -34,8 +38,9 @@ class FieldNode: index (int): graph index """ - def __init__(self, kind: str, node_type: str, name: str, description: str): - """Initialize FieldNodes + def __init__(self, kind: str, node_type: str, name: str, description: str) -> None: + """ + Initialize FieldNodes. Args: kind (str): GraphQL kind, can be "OBJECT", "SCALAR", "LIST" @@ -49,21 +54,24 @@ def __init__(self, kind: str, node_type: str, name: str, description: str): self.kind: str = kind self.of_kind: str = "" self.type: str = node_type - self.index: Optional[int] = None + self.index: None | int = None def __str__(self) -> str: + """FieldNode as a string.""" return f"Field Object name: {self.name}, Kind: {self.kind}, Type: {self.type}, Index if set: {self.index}, Description: {self.description}" - def set_index(self, index: int): - """set index that is associated with the FieldNode + def set_index(self, index: int) -> None: + """ + Set index that is associated with the FieldNode. Args: index (int): index of node in schema_graph """ self.index = index - def set_of_kind(self, of_kind: str): - """Only applicable if kind is LIST. Describes the GraphQL kind of the list (OBJECT, SCALAR) + def set_of_kind(self, of_kind: str) -> None: + """ + Only applicable if kind is LIST. Describes the GraphQL kind of the list (OBJECT, SCALAR). Args: of_kind (str): GraphQL kind of the list returned by a node (a LIST can be "of_kind" OBJECT) @@ -72,49 +80,45 @@ def set_of_kind(self, of_kind: str): class TypeNode: - """ - Class for nodes representing GraphQL Types in the schema graph. - """ + """Class for nodes representing GraphQL Types in the schema graph.""" - def __init__(self, name: str): - """Initialize TypeNodes + def __init__(self, name: str) -> None: + """ + Initialize TypeNodes. Args: name (str): name of GraphQL type (ex: CoreEntry) """ self.name = name - self.index: Optional[int] = None - self.field_list: List[FieldNode] = [] + self.index: None | int = None + self.field_list: list[FieldNode] = [] - def set_index(self, index: int): - """set index that is associated with the TypeNode + def set_index(self, index: int) -> None: + """ + Set index that is associated with the TypeNode. Args: index (int): index of node in schema_graph """ self.index = index - def set_field_list(self, field_list: List[FieldNode]): - """List of FieldNodes associated with the GraphQL type + def set_field_list(self, field_list: list[FieldNode]) -> None: + """List of FieldNodes associated with the GraphQL type. Args: - field_list (Union[None, List[FieldNode]]): list of FieldNodes + field_list (Union[None, list[FieldNode]]): list of FieldNodes """ self.field_list = field_list class SeqSchema: - """ - GraphQL schema defining available fields, types, and how they are connected. - """ + """GraphQL schema defining available fields, types, and how they are connected.""" def __init__(self) -> None: - """ - GraphQL schema defining available fields, types, and how they are connected. - """ + """GraphQL schema defining available fields, types, and how they are connected.""" self.pdb_url: str = seq_const.API_ENDPOINT + "/graphql" self.timeout: int = config.DATA_API_TIMEOUT # TODO: change? - self.schema: Dict = self.fetch_schema() + self.schema: dict[str, Any] = self.fetch_schema() """JSON resulting from full introspection of the GraphQL schema""" self._use_networkx: bool = use_networkx @@ -125,48 +129,51 @@ def __init__(self) -> None: # self._schema_graph = rx.PyDiGraph() # """rustworkx graph representing the GraphQL schema""" - self._type_to_idx_dict: Dict[str, int] = {} - self._field_to_idx_dict: Dict[str, List[int]] = {} + self._type_to_idx_dict: dict[str, int] = {} + self._field_to_idx_dict: dict[str, list[int]] = {} """Dict where keys are field names and values are lists of indices. Indices of redundant fields are appended to the list under the field name. (ex: {id: [[43, 116, 317...]})""" self._root_introspection = self._request_root_types() """Request root types of the GraphQL schema and their required arguments""" self._client_schema = build_client_schema(self.schema["data"]) """GraphQLSchema object from graphql package, used for query validation""" - self._type_fields_dict: Dict[str, Dict] = self._construct_type_dict() + self._type_fields_dict: dict[str, dict[Any, Any]] = self._construct_type_dict() """Dict where keys are type names and the values are their associated fields""" self._field_names_list = self._construct_name_list() """list of all field names""" - self._root_dict: Dict[str, List[Dict[str, str]]] = self._construct_root_dict() - self._schema_graph: rx.PyDiGraph = rx.PyDiGraph() + self._root_dict: dict[str, list[dict[str, str]]] = self._construct_root_dict() + self._schema_graph: rx.PyDiGraph[FieldNode | TypeNode, None | int] = rx.PyDiGraph() self._schema_graph = self._recurse_build_schema(self._schema_graph, "Query") - self._root_to_idx: Dict[str, int] = self._make_root_to_idx() + self._root_to_idx: dict[str, int] = self._make_root_to_idx() self._field_names_list = self._construct_name_list() """Dict where keys are field names and values are indices. Redundant field names are represented as . (ex: {entry.id: 1452})""" - def _request_root_types(self) -> Dict: - """Make an introspection query to get information about schema's root types + def _request_root_types(self) -> dict[str, Any]: + """ + Make an introspection query to get information about schema's root types. Returns: Dict: JSON response of introspection request """ - root_query = {"query": """query IntrospectionQuery{ __schema{ queryType{ fields{ name args + root_query = { + "query": """query IntrospectionQuery{ __schema{ queryType{ fields{ name args { name description type{ kind ofType{ name kind ofType{ inputFields {name type { kind ofType { name kind ofType { ofType { kind name ofType {kind name}} } } } } - kind name ofType{name kind} } } } } } } } }"""} + kind name ofType{name kind} } } } } } } } }""" + } response = requests.post(headers={"Content-Type": "application/json"}, json=root_query, url=self.pdb_url, timeout=self.timeout) - return response.json() + return dict(response.json()) - def _construct_root_dict(self) -> Dict[str, List[Dict[str, str]]]: + def _construct_root_dict(self) -> dict[str, list[dict[str, str]]]: """Build a dictionary to organize information about schema root types. Returns: - Dict[str, List[Dict]]: Dict where keys are the type names. + dict[str, list[Dict]]: Dict where keys are the type names. Values are lists of dictionaries with information about arguments. ex: {"alignments": [{'name': 'from', 'description': 'Query sequence database'...}, ...], ...} """ response = self._root_introspection - root_dict: Dict[str, List[Dict[str, str]]] = {} + root_dict: dict[str, list[dict[str, str]]] = {} root_fields_list = response["data"]["__schema"]["queryType"]["fields"] for name_arg_dict in root_fields_list: root_name = name_arg_dict["name"] @@ -177,28 +184,24 @@ def _construct_root_dict(self) -> Dict[str, List[Dict[str, str]]]: arg_kind = arg_dict["type"]["kind"] arg_of_kind = "" arg_of_type = "" - if arg_kind == "LIST" or arg_kind == "NON_NULL": + if arg_kind in {"LIST", "NON_NULL"}: arg_of_kind = arg_dict["type"]["ofType"]["kind"] arg_of_type = self._find_type_name(arg_dict["type"]["ofType"]) input_fields = "" - if ("ofType" in arg_dict["type"]["ofType"]) and (arg_dict["type"]["ofType"]["ofType"] is not None): - if ("inputFields" in arg_dict["type"]["ofType"]["ofType"]) and (arg_dict["type"]["ofType"]["ofType"]["inputFields"] is not None): - input_fields = arg_dict["type"]["ofType"]["ofType"]["inputFields"] + if ("ofType" in arg_dict["type"]["ofType"] and arg_dict["type"]["ofType"]["ofType"] is not None) and ( + "inputFields" in arg_dict["type"]["ofType"]["ofType"] and arg_dict["type"]["ofType"]["ofType"]["inputFields"] is not None + ): + input_fields = arg_dict["type"]["ofType"]["ofType"]["inputFields"] if root_name not in root_dict: root_dict[root_name] = [] - root_dict[root_name].append({ - "name": arg_name, - "description": arg_description, - "kind": arg_kind, - "of_kind": arg_of_kind, - "of_type": arg_of_type, - "input_fields": input_fields - }) + root_dict[root_name].append( + {"name": arg_name, "description": arg_description, "kind": arg_kind, "of_kind": arg_of_kind, "of_type": arg_of_type, "input_fields": input_fields} + ) return root_dict - def fetch_schema(self) -> Dict: - """Make an introspection query to get full Data API query. - Can also be found in resources folder as "data_api_schema.json" + def fetch_schema(self) -> dict[str, Any]: + """ + Make an introspection query to get full Data API schema. Also found in resources folder as "seq_api_schema.json". Returns: Dict: JSON response of introspection request @@ -222,23 +225,23 @@ def fetch_schema(self) -> Dict: } schema_response = requests.post(headers={"Content-Type": "application/json"}, json=query, url=self.pdb_url, timeout=self.timeout) if schema_response.status_code == 200: - return schema_response.json() + return dict(schema_response.json()) logger.info("Loading data schema from file") - current_dir = os.path.dirname(os.path.abspath(__file__)) - json_file_path = os.path.join(current_dir, "resources", "seq_api_schema.json") - with open(json_file_path, "r", encoding="utf-8") as schema_file: - return json.load(schema_file) + current_dir = Path(Path(__file__).resolve()).parent + json_file_path = Path(current_dir) / "resources" / "seq_api_schema.json" + with Path.open(json_file_path, encoding="utf-8") as schema_file: + return dict(json.load(schema_file)) - def _construct_type_dict(self) -> Dict[str, Dict[str, Dict[str, str]]]: + def _construct_type_dict(self) -> dict[str, dict[str, dict[str, str]]]: """Construct dictionary of GraphQL types and their associated fields. Args: schema (Dict): GraphQL schema Returns: - Dict[str, Dict[str, Dict[str, str]]]: Dict where keys are GraphQL types and values are lists of field names + dict[str, dict[str, dict[str, str]]]: Dict where keys are GraphQL types and values are lists of field names """ - all_types_dict: Dict = self.schema["data"]["__schema"]["types"] + all_types_dict: dict[Any, Any] = self.schema["data"]["__schema"]["types"] type_fields_dict = {} for each_type_dict in all_types_dict: type_name = str(each_type_dict["name"]) @@ -250,23 +253,22 @@ def _construct_type_dict(self) -> Dict[str, Dict[str, Dict[str, str]]]: type_fields_dict[type_name] = field_dict return type_fields_dict - def _construct_name_list(self) -> List[str]: - """construct a list of all field names in the schema. - Used to determine whether a redundant field and if a field is known. + def _construct_name_list(self) -> list[str]: + """Construct a list of all field names in the schema. Used to determine whether a field is known or redundant. Returns: - List[str]: list of all fields + list[str]: list of all fields """ field_names_list = [] for type_name, field_dict in self._type_fields_dict.items(): if "__" in type_name: continue - for field_name in field_dict.keys(): - field_names_list.append(field_name) + for field_name in field_dict: + field_names_list.append(field_name) # noqa: PERF402 return field_names_list def make_type_subgraph(self, type_name: str) -> TypeNode: - """Make a subgraph of only one type and its associated fields + """Make a subgraph of only one type and its associated fields. Args: type_name (str): name of the type for which to construct subgraph @@ -284,9 +286,8 @@ def make_type_subgraph(self, type_name: str) -> TypeNode: type_node.set_field_list(field_node_list) return type_node - def _recurse_build_schema(self, schema_graph: rx.PyDiGraph, type_name: str) -> rx.PyDiGraph: - """Build the API schema by iterating through the fields of the given type - and building subgraphs for each one recursively until a scalar (leaf) is reached + def _recurse_build_schema(self, schema_graph: rx.PyDiGraph[FieldNode | TypeNode, None | int], type_name: str) -> rx.PyDiGraph: + """Build the API schema by iterating through the fields of the given type and building subgraphs for each one recursively until a scalar (leaf) is reached. Args: schema_graph (rx.PyDiGraph): graph object to build into @@ -297,31 +298,30 @@ def _recurse_build_schema(self, schema_graph: rx.PyDiGraph, type_name: str) -> r """ type_node = self.make_type_subgraph(type_name) for field_node in type_node.field_list: - assert isinstance(field_node.index, int) # for mypy + assert isinstance(field_node.index, int) # noqa: S101 (assert needed for mypy) if field_node.kind == "SCALAR" or field_node.of_kind == "SCALAR": continue - else: - type_name = field_node.type - if type_name in self._type_to_idx_dict: - type_index = self._type_to_idx_dict[type_name] - if use_networkx: - schema_graph.add_edge(field_node.index, type_index, 1) - else: - schema_graph.add_edge(field_node.index, type_index, 1) + type_name = field_node.type + if type_name in self._type_to_idx_dict: + type_index = self._type_to_idx_dict[type_name] + if use_networkx: + schema_graph.add_edge(field_node.index, type_index, 1) else: - self._recurse_build_schema(schema_graph, type_name) - type_index = self._type_to_idx_dict[type_name] - # if self._use_networkx: - # schema_graph.add_edge(field_node.index, type_index, 1) - if self._use_networkx is False: - schema_graph.add_edge(field_node.index, type_index, 1) + schema_graph.add_edge(field_node.index, type_index, 1) + else: + self._recurse_build_schema(schema_graph, type_name) + type_index = self._type_to_idx_dict[type_name] + # if self._use_networkx: + # schema_graph.add_edge(field_node.index, type_index, 1) + if self._use_networkx is False: + schema_graph.add_edge(field_node.index, type_index, 1) return schema_graph - # def _apply_weights(self, root_type_list: List[str], weight: int) -> None: + # def _apply_weights(self, root_type_list: list[str], weight: int) -> None: # """applies weight to all edges from a root TypeNode to FieldNodes # Args: - # root_type_list (List[str]): list of root fields to apply weights to + # root_type_list (list[str]): list of root fields to apply weights to # ex: "CoreEntry", "CoreAssembly" # weight (int): integer weight to apply to edges from specified type(s) # """ @@ -350,12 +350,12 @@ def _make_type_node(self, type_name: str) -> TypeNode: type_node.set_index(index) return type_node - def _find_kind(self, field_dict: Dict) -> str: + def _find_kind(self, field_dict: dict[str, Any]) -> Any | str: # noqa: ANN401 if field_dict["name"] is not None: return field_dict["kind"] return self._find_kind(field_dict["ofType"]) - def _find_type_name(self, field_dict: Dict) -> str: + def _find_type_name(self, field_dict: dict[str, Any]) -> Any | str: # noqa: ANN401 if field_dict: if field_dict["name"] is not None: return field_dict["name"] @@ -366,18 +366,17 @@ def _find_description(self, type_name: str, field_name: str) -> str: for type_dict in self.schema["data"]["__schema"]["types"]: if type_dict["name"] == type_name: for field in type_dict["fields"]: - if field["name"] == field_name: + if (field["name"] == field_name) and isinstance(field["description"], str): return field["description"] return "" def _make_field_node(self, parent_type: str, field_name: str) -> FieldNode: kind = self._type_fields_dict[parent_type][field_name]["kind"] - field_type_dict: Dict = self._type_fields_dict[parent_type][field_name] + field_type_dict: dict[str, Any] = self._type_fields_dict[parent_type][field_name] return_type = self._find_type_name(field_type_dict) description = self._find_description(parent_type, field_name) field_node = FieldNode(kind, return_type, field_name, description) - assert field_node.type is not None - if kind == "LIST" or kind == "NON_NULL": + if kind in {"LIST", "NON_NULL"}: of_kind = self._find_kind(field_type_dict) field_node.set_of_kind(of_kind) parent_type_index = self._type_to_idx_dict[parent_type] @@ -393,8 +392,7 @@ def _make_field_node(self, parent_type: str, field_name: str) -> FieldNode: if self._field_names_list.count(field_name) > 1: field_node.redundant = True field_node.set_index(index) - - assert isinstance(field_node.index, int) # for mypy + assert isinstance(field_node.index, int) # noqa: S101 (needed for mypy) if field_name not in self._field_to_idx_dict: self._field_to_idx_dict[field_name] = [field_node.index] else: @@ -402,17 +400,30 @@ def _make_field_node(self, parent_type: str, field_name: str) -> FieldNode: return field_node - def _make_root_to_idx(self) -> Dict[str, int]: - root_to_idx: Dict[str, int] = {} + def _make_root_to_idx(self) -> dict[str, int]: + root_to_idx: dict[str, int] = {} # Assumes 0 is the index for root Query node. # Remains true as long as graph building starts from there for root_node in self._schema_graph.successors(0): + assert isinstance(root_node.index, int) # for mypy root_to_idx[root_node.name] = root_node.index return root_to_idx - def get_input_id_dict(self, input_type: str) -> Dict[str, str]: - if input_type not in self._root_dict.keys(): - raise ValueError("Not a valid input_type, no available input_id dictionary") + def get_input_id_dict(self, input_type: str) -> dict[str, str]: + """Get keys input dictionary for given input_type. + + Args: + input_type (str): GraphQL input_type (ex: alignments) + + Raises: + ValueError: _description_ + + Returns: + dict[str, str]: _description_ + """ + if input_type not in self._root_dict: + error_msg = "Not a valid input_type, no available input_id dictionary" + raise ValueError(error_msg) root_dict_entry = self._root_dict[input_type] input_dict = {} for arg in root_dict_entry: @@ -423,7 +434,7 @@ def get_input_id_dict(self, input_type: str) -> Dict[str, str]: input_dict[name] = description return input_dict - def _recurse_fields(self, fields: Dict[Any, Any], field_map: Dict[Any, Any]) -> str: + def _recurse_fields(self, fields: dict[Any, Any], field_map: dict[Any, Any]) -> str: query_str = "" for target_idx, idx_path in fields.items(): mapped_path = field_map.get(target_idx, [target_idx]) @@ -448,20 +459,49 @@ def _recurse_fields(self, fields: Dict[Any, Any], field_map: Dict[Any, Any]) -> query_str += " " + "} " return query_str - def _get_descendant_fields(self, node_idx: int, field_name: str, visited=None) -> List[Union[int, Dict]]: + def _idx_dict_to_name_dict(self, idx_fields: dict[Any, Any] | list[int] | int) -> dict[str, Any] | list[str] | str: + query_dict = {} + if isinstance(idx_fields, dict): + for field_idx, subfield in idx_fields.items(): + field_name = self._idx_to_name(field_idx) + query_dict[field_name] = self._idx_dict_to_name_dict(subfield) + return query_dict + elif isinstance(idx_fields, list): + return [self._idx_dict_to_name_dict(field) for field in idx_fields] + elif not idx_fields: + return "" + else: + return self._idx_to_name(idx_fields) + + def _fields_to_string(self, idx_fields: dict[str, Any]) -> str: + name_dict = self._idx_dict_to_name_dict(idx_fields) + print(name_dict) + return ( + # format the dict as a GraphQL query + str(name_dict) + .replace("'", "") + .replace("[", "") + .replace("]", "") + .replace(",", "") + .replace("{", " ") + .replace(":", "{") + ) + + def _get_descendant_fields(self, node_idx: int, field_name: str, visited: None | set[int] = None) -> list[int | dict[int, Any]]: if visited is None: visited = set() - result: List[Union[int, Dict]] = [] + result: list[int | dict[int, Any]] = [] children_idx = list(self._schema_graph.neighbors(node_idx)) for idx in children_idx: if idx in visited: - raise ValueError(f"{field_name} in return_data_list is too general, unable to autocomplete query.\n" "Please request a more specific field.") + error_msg = f"{field_name} in return_data_list is too general, unable to autocomplete query.\n" "Please request a more specific field." + raise ValueError(error_msg) visited.add(idx) child_data = self._schema_graph[idx] - assert isinstance(child_data.index, int) # for mypy + assert isinstance(child_data.index, int) # noqa: S101 (needed for mypy) if isinstance(child_data, FieldNode): child_descendants = self._get_descendant_fields(idx, field_name, visited) @@ -479,8 +519,8 @@ def _get_descendant_fields(self, node_idx: int, field_name: str, visited=None) - # Skips appending if no further subfields (ENUMS) return result - def find_field_names(self, search_string: str) -> List[str]: - """find field names that fully or partially match the search string + def find_field_names(self, search_string: str) -> list[str]: + """Find field names that fully or partially match the search string. Args: search_string (str): string to search field names for @@ -490,34 +530,50 @@ def find_field_names(self, search_string: str) -> List[str]: ValueError: thrown when no fields match search_string Returns: - List[str]: list of matching field names + list[str]: list of matching field names """ if not isinstance(search_string, str): - raise ValueError(f"Please input a string instead of {type(search_string)}") + error_msg = f"Please input a string instead of {type(search_string)}" # type: ignore[unreachable] + raise TypeError(error_msg) + field_names = [key for key in self._field_to_idx_dict if search_string.lower() in key.lower()] if not field_names: - raise ValueError(f"No fields found matching '{search_string}'") + error_msg = f"No fields found matching '{search_string}'" + raise ValueError(error_msg) return field_names def construct_query( - self, - query_type: str, - query_args: Union[Dict[str, str], Dict[str, list]], - return_data_list: List[str], - suppress_autocomplete_warning=False - ) -> Dict: - unknown_return_list: List[str] = [] + self, query_type: str, query_args: dict[str, str] | dict[str, list[Any]], return_data_list: list[str], suppress_autocomplete_warning: bool = False + ) -> dict[str, Any]: + """ + Construct a GraphQL query. Currently only uses rustworkx. + + Args: + query_type (str): root type ("alignments", "annotations") + query_args (dict[str, str] | dict[str, list]): dictionary where keys are argument names and + values are input values + return_data_list (list[str]): list of fields to request data for + suppress_autocomplete_warning (bool, optional): Whether to suppress warning for autocompletion of paths. + Defaults to False. + + Raises: + ValueError: unknown field in the return_data_list + + Returns: + dict: GraphQL query in JSON format + """ + unknown_return_list: list[str] = [] for field in return_data_list: if "." in field: separate_fields = field.split(".") for sep_field in separate_fields: if sep_field not in self._field_names_list: - unknown_return_list.append(sep_field) - else: - if field not in self._field_names_list: - unknown_return_list.append(field) + unknown_return_list.append(sep_field) # noqa: PERF401 + elif field not in self._field_names_list: + unknown_return_list.append(field) if unknown_return_list: - raise ValueError(f"Unknown item in return_data_list: {unknown_return_list}") + error_msg = f"Unknown item in return_data_list: {unknown_return_list}" + raise ValueError(error_msg) # if use_networkx: # query = self._construct_query_networkx( # input_type=input_type, @@ -526,26 +582,23 @@ def construct_query( # suppress_autocomplete_warning=suppress_autocomplete_warning # ) # else: - # query = self._construct_query_rustworkx( - # input_type=input_type, - # input_ids=input_ids, - # return_data_list=return_data_list, - # add_rcsb_id=add_rcsb_id, - # suppress_autocomplete_warning=suppress_autocomplete_warning - # ) + # query = self._construct_query_rustworkx( + # input_type=input_type, + # input_ids=input_ids, + # return_data_list=return_data_list, + # add_rcsb_id=add_rcsb_id, + # suppress_autocomplete_warning=suppress_autocomplete_warning + # ) query = self._construct_query_rustworkx( - query_type=query_type, - query_args=query_args, - return_data_list=return_data_list, - suppress_autocomplete_warning=suppress_autocomplete_warning + query_type=query_type, query_args=query_args, return_data_list=return_data_list, suppress_autocomplete_warning=suppress_autocomplete_warning ) - return query + return query # noqa: RET504 # def _construct_query_networkx( # self, # input_type: str, - # input_ids: Union[Dict[str, str], List[str]], - # return_data_list: List[str], + # input_ids: Union[dict[str, str], list[str]], + # return_data_list: list[str], # add_rcsb_id: bool, # suppress_autocomplete_warning: bool # ): # Incomplete function @@ -555,17 +608,19 @@ def construct_query( def _construct_query_rustworkx( self, query_type: str, - query_args: Union[Dict[str, str], Dict[str, list]], - return_data_list: List[str], + query_args: dict[str, str] | dict[str, list[Any]], + return_data_list: list[str], suppress_autocomplete_warning: bool = False, - ) -> Dict: + ) -> dict[str, Any]: """Construct a GraphQL query as JSON using a rustworkx graph. Args: - input_ids (Union[List[str], Dict[str, str], Dict[str, List[str]]]): identifying information for the specific entry, chemical component, etc to query - input_type (str): specifies where you are starting your query. These are specific fields like "entry" or "polymer_entity_instance". - return_data_list (List[str]): requested data, can be field name(s) or dot-separated field names - ex: "cluster_id" or "exptl.method" + query_type (str): root type ("alignments", "annotations") + query_args (dict[str, str] | dict[str, list]): dictionary where keys are argument names and + values are input values + return_data_list (list[str]): list of fields to request data for + suppress_autocomplete_warning (bool, optional): Whether to suppress warning for autocompletion of paths. + Defaults to False. Raises: ValueError: input_ids dictionary keys don't match the input_type given @@ -581,7 +636,7 @@ def _construct_query_rustworkx( # arg_name_list = [id["name"] for id in arg_list] # might need to revert back to this # # Check formatting of input_ids - # input_dict: Union[Dict[str, str], Dict[str, List[str]]] = {} + # input_dict: Union[dict[str, str], dict[str, list[str]]] = {} # if isinstance(input_ids, Dict): # input_dict = input_ids @@ -608,14 +663,14 @@ def _construct_query_rustworkx( start_node_index = self._root_to_idx[query_type] - return_data_paths: Dict[int, List[List[int]]] = {} + return_data_paths: dict[int, list[list[int]]] = {} complete_path: int = 0 for field in return_data_list: # Generate list of all possible paths to the final requested field. Try to find matching sequence to user input. path_list = field.split(".") possible_paths = self.find_paths(query_type, path_list[-1]) - matching_paths: List[str] = [] + matching_paths: list[str] = [] for path in possible_paths: possible_path_list = path.split(".") possible_path_list.insert(0, str(query_type)) @@ -623,41 +678,38 @@ def _construct_query_rustworkx( # If there is an exact path match, # the path is fully specified and other possible_paths can be removed and loop can stop. # Iterate complete path, so warning can be raised if autocompletion is used - path_list_with_input = [query_type] + path_list - if (possible_path_list == path_list) or (possible_path_list == path_list_with_input): + path_list_with_input = [query_type, *path_list] + if possible_path_list in (path_list, path_list_with_input): matching_paths = [".".join(possible_path_list)] complete_path += 1 break # Else, check for matching path segments. - else: - for i in range(len(possible_path_list)): - if possible_path_list[i: i + len(path_list)] == path_list: - matching_paths.append(".".join(possible_path_list)) + for i in range(len(possible_path_list)): + if possible_path_list[i: i + len(path_list)] == path_list: + matching_paths.append(".".join(possible_path_list)) - idx_paths: List[List[int]] = [] + idx_paths: list[list[int]] = [] if len(matching_paths) > 0: for path in matching_paths: idx_paths.extend(self._parse_dot_path(path)) # remove paths not beginning with input_type - full_idx_paths: List[List[int]] = list(idx_paths) + full_idx_paths: list[list[int]] = list(idx_paths) input_type_idx = self._root_to_idx[query_type] - for path in idx_paths: - if path[0] != input_type_idx: - full_idx_paths.remove(path) + for idx_path in idx_paths: + if idx_path[0] != input_type_idx: + full_idx_paths.remove(idx_path) idx_paths = full_idx_paths if len(idx_paths) > 1: # Print error message that doesn't include input_type at beginning # But keep input_type in matching_paths for query construction reasons + num_paths_to_print = 10 path_choice_msg = " " + "\n ".join([".".join(path.split(".")[1:]) for path in matching_paths[:10]]) - if len(matching_paths) > 10: - len_path = 10 - else: - len_path = len(matching_paths) + len_path = min(len(matching_paths), num_paths_to_print) - if len(matching_paths) > 10: - raise ValueError( + if len(matching_paths) > num_paths_to_print: + error_msg = ( f'Given path "{field}" not specific enough. Use one or more of these paths in return_data_list argument:\n\n' f"{len_path} of {len(matching_paths)} possible paths:\n" f"{path_choice_msg}" @@ -667,59 +719,60 @@ def _construct_query_rustworkx( f" schema = Schema()\n" f' schema.find_paths("{query_type}", "{path_list[-1]}")' ) + raise ValueError(error_msg) - raise ValueError( + error_msg = ( f'Given path "{field}" not specific enough. Use one or more of these paths in return_data_list argument:\n\n' f"{len_path} of {len(matching_paths)} possible paths:\n" f"{path_choice_msg}" ) + raise ValueError(error_msg) # If path isn't in possible_paths_list, try using the graph to validate the path. Allows for queries with loops and paths that have repeated nodes. if len(idx_paths) == 0: - possible_dot_paths: List[List[int]] = self._parse_dot_path(field) # Throws an error if path is invalid - shortest_full_paths: List[List[int]] = self._compare_paths(start_node_index, possible_dot_paths) - assert len(shortest_full_paths) != 0 + possible_dot_paths: list[list[int]] = self._parse_dot_path(field) # Throws an error if path is invalid + shortest_full_paths: list[list[int]] = self._compare_paths(start_node_index, possible_dot_paths) if len(shortest_full_paths) > 1: shortest_name_paths = [".".join([self._idx_to_name(idx) for idx in path[1:] if isinstance(self._schema_graph[idx], FieldNode)]) for path in shortest_full_paths] shortest_name_paths.sort() path_choice_msg = "" for name_path in shortest_name_paths: path_choice_msg += " " + name_path + "\n" - raise ValueError( + error_msg = ( "Given path not specific enough. Use one or more of these paths in return_data_list argument:\n\n" f"{path_choice_msg}\n" "Please note that this list may not be complete. " "If looking for a different path, you can search the interactive editor's documentation explorer: https://data.rcsb.org/graphql/index.html" ) + raise ValueError(error_msg) idx_paths = shortest_full_paths final_idx: int = idx_paths[0][-1] return_data_paths[final_idx] = idx_paths if (complete_path != len(return_data_list)) and (suppress_autocomplete_warning is False): - info_list = [] - for path in return_data_paths.values(): - assert len(path) == 1 - info_list.append(".".join(self._idx_path_to_name_path(path[0][1:]))) + info_list = [".".join(self._idx_path_to_name_path(path[0][1:])) for path in return_data_paths.values()] - path_msg = "".join(f'\n "{item}",' for item in info_list) + path_msg = "".join(f'\n\t"{item}",' for item in info_list) logger.warning( "\n" "Some paths are being autocompleted based on the current API. If this code is meant for long-term use, use the set of fully-specified paths below:\n" " [" "%s\n" - " ]", path_msg + " ]", + path_msg, ) for return_data in return_data_list: if any(not value for value in return_data_paths.values()): - raise ValueError(f'You can\'t access "{return_data}" from input type {query_type}') + error_msg = f'You can\'t access "{return_data}" from input type {query_type}' + raise ValueError(error_msg) final_fields = {} - for target_idx in return_data_paths.keys(): + for target_idx in return_data_paths: final_fields[target_idx] = self._get_descendant_fields(node_idx=target_idx, field_name=self._schema_graph[target_idx].name) - field_names: Dict[Any, Any] = {} - paths: Dict[Any, Any] = {} + field_names: dict[Any, Any] = {} + paths: dict[Any, Any] = {} for target_idx, paths_list in return_data_paths.items(): node_data = self._schema_graph[target_idx] @@ -752,13 +805,15 @@ def _construct_query_rustworkx( query += ", " query += ") { " + print(self._fields_to_string(final_fields)) query += self._recurse_fields(final_fields, field_names) query += " } }" json_query = {"query": f"{query}"} - return json_query + print(query) + return json_query # noqa: RET504 - def format_args(self, arg_dict: Union[Dict[str, list], Dict[str, str]], input_value: Union[str, List[str]]) -> str: - """Add double quotes or omit quotes around a single GraphQL argument + def format_args(self, arg_dict: dict[str, list[Any]] | dict[str, str], input_value: str | list[str]) -> str: + """Add double quotes or omit quotes around a single GraphQL argument. Args: arg_dict (Dict[str, str]): dictionary with information about the argument @@ -783,36 +838,33 @@ def format_args(self, arg_dict: Union[Dict[str, list], Dict[str, str]], input_va format_arg += f"{arg_dict["name"]}: {input_value}" return format_arg - def _find_idx_path(self, dot_path: List[str], idx_list: List[int], node_idx: int) -> List[int]: - """function that recursively finds a list of indices that matches a list of field names. + def _find_idx_path(self, dot_path: list[str], idx_list: list[int], node_idx: int) -> list[int]: + """Function that recursively finds a list of indices that matches a list of field names. Args: - dot_path (List[str]): list of field names to find index matches for - idx_list (List[int]): list of matching indices, appended to as matches are found during recursion + dot_path (list[str]): list of field names to find index matches for + idx_list (list[int]): list of matching indices, appended to as matches are found during recursion node_idx (int): index to be searched for a child node matching the next field name Returns: - List[int]: a list of indices matching the given dot_path. If no path is found, an empty list is returned. + list[int]: a list of indices matching the given dot_path. If no path is found, an empty list is returned. """ if len(dot_path) == 0: idx_list.append(node_idx) return idx_list if (self._schema_graph[node_idx].kind == "SCALAR") or (self._schema_graph[node_idx].of_kind == "SCALAR"): return self._find_idx_path(dot_path[1:], idx_list, node_idx) - else: - type_node = list(self._schema_graph.successor_indices(node_idx))[0] - field_nodes = self._schema_graph.successor_indices(type_node) - for field_idx in field_nodes: - if self._schema_graph[field_idx].name == dot_path[0]: - idx_list.append(node_idx) - return self._find_idx_path(dot_path[1:], idx_list, field_idx) - else: - continue - return [] - - def _parse_dot_path(self, dot_path: str) -> List[List[int]]: - """Parse dot-separated field names into lists of matching node indices - ex: "prd.chem_comp.id" --> [[57, 81, 116], [610, 81, 116], [858, 81, 116]] + type_node = next(iter(self._schema_graph.successor_indices(node_idx))) + field_nodes = self._schema_graph.successor_indices(type_node) + for field_idx in field_nodes: + if self._schema_graph[field_idx].name == dot_path[0]: + idx_list.append(node_idx) + return self._find_idx_path(dot_path[1:], idx_list, field_idx) + continue + return [] + + def _parse_dot_path(self, dot_path: str) -> list[list[int]]: + """Parse dot-separated field names into lists of matching node indices. ex: "prd.chem_comp.id" --> [[57, 81, 116], [610, 81, 116], [858, 81, 116]]. Args: dot_path (str): dot-separated field names given in return_data_list @@ -822,42 +874,43 @@ def _parse_dot_path(self, dot_path: str) -> List[List[int]]: ValueError: thrown if no path matches dot_path Returns: - List[List[int]]: list of paths where each path is a list of FieldNode indices matching the given dot_path + list[list[int]]: list of paths where each path is a list of FieldNode indices matching the given dot_path """ path_list = dot_path.split(".") - node_matches: List[int] = self._field_to_idx_dict[path_list[0]] - idx_path_list: List[List[int]] = [] + node_matches: list[int] = self._field_to_idx_dict[path_list[0]] + idx_path_list: list[list[int]] = [] for node_idx in node_matches: - found_path: List[int] = [] + found_path: list[int] = [] found_path = self._find_idx_path(path_list[1:], found_path, node_idx) if len(found_path) == len(path_list): idx_path_list.append(found_path) if len(idx_path_list) == 0: - raise ValueError(f"return_data_list path is not valid: {dot_path}") + error_msg = f"return_data_list path is not valid: {dot_path}" + raise ValueError(error_msg) return idx_path_list - def _compare_paths(self, start_node_index: int, dot_paths: List[List[int]]) -> List[List[int]]: - """Compare length of paths from the starting node to dot notation paths, returning the shortest paths + def _compare_paths(self, start_node_index: int, dot_paths: list[list[int]]) -> list[list[int]]: + """Compare length of paths from the starting node to dot notation paths, returning the shortest paths. Args: start_node_index (int): the index of query's input_type ex: input_type entry --> 20 - dot_paths (List[List[int]]): a list of paths where each path is a list of node indices matching a dot notation string + dot_paths (list[list[int]]): a list of paths where each path is a list of node indices matching a dot notation string Raises: ValueError: thrown when there is no path from the input_type node to the return data nodes. Returns: - List[List[int]]: list of shortest paths from the input_type node index to the index of the final field given in dot notation. + list[list[int]]: list of shortest paths from the input_type node index to the index of the final field given in dot notation. ex: input_type "entry" and "exptl.method" would return a list of shortest path(s) with indices from "entry" to "method". """ - all_paths: List[List[int]] = [] + all_paths: list[list[int]] = [] for path in dot_paths: first_path_idx = path[0] if start_node_index == first_path_idx: - unique_paths_list: List[List[int]] = [path] + unique_paths_list: list[list[int]] = [path] else: paths = rx.digraph_all_shortest_paths(self._schema_graph, start_node_index, first_path_idx, weight_fn=lambda edge: edge) unique_paths = {tuple(path) for path in paths} @@ -869,23 +922,23 @@ def _compare_paths(self, start_node_index: int, dot_paths: List[List[int]]) -> L unique_path += path[1:] all_paths.extend(unique_paths_list) if len(all_paths) == 0: - raise ValueError(f"Can't access \"{'.'.join(self._idx_path_to_name_path(dot_paths[0]))}\" from given input_type {self._schema_graph[start_node_index].name}") + error_msg = f"Can't access \"{'.'.join(self._idx_path_to_name_path(dot_paths[0]))}\" from given input_type {self._schema_graph[start_node_index].name}" + raise ValueError(error_msg) shortest_path_len = len(min(all_paths, key=len)) shortest_paths = [path for path in all_paths if len(path) == shortest_path_len] - return shortest_paths + return shortest_paths # noqa: RET504 - def _weigh_assemblies(self, paths: List[List[int]], assembly_node_idxs: List[int]) -> List[List[int]]: - """remove paths containing "assemblies" if there are shorter or equal length paths available. - Mimics weighing assembly edges in the rest of query construction. + def _weigh_assemblies(self, paths: list[list[int]], assembly_node_idxs: list[int]) -> list[list[int]]: + """Remove paths containing "assemblies" if there are <= length paths available. Mimics weighing assembly edges in the rest of query construction. Args: - paths (List[List[int]]): list of paths where each path is a list of indices from a root node to a requested field. - assembly_node_idxs (List[int]): list of indices of nodes named "assemblies" (root node excluded) + paths (list[list[int]]): list of paths where each path is a list of indices from a root node to a requested field. + assembly_node_idxs (list[int]): list of indices of nodes named "assemblies" (root node excluded) Returns: - List[List[int]]: List with weight applied (no "assemblies" path if there is an equivalent path present) + list[list[int]]: List with weight applied (no "assemblies" path if there is an equivalent path present) """ - remove_paths: set = set() + remove_paths: set[tuple[int, ...]] = set() for path in paths: for assemblies_idx in assembly_node_idxs: @@ -895,20 +948,16 @@ def _weigh_assemblies(self, paths: List[List[int]], assembly_node_idxs: List[int continue name_compare_path = self._idx_path_to_name_path(compare_path) # If there are shorter or equal length paths without "assemblies", filter out - if ( - (len(compare_path) <= len(path)) - and ("assemblies" not in name_compare_path) - and (compare_path[-1] == path[-1]) - ): + if (len(compare_path) <= len(path)) and ("assemblies" not in name_compare_path) and (compare_path[-1] == path[-1]): remove_paths.add(tuple(path)) - for path in remove_paths: + for path in remove_paths: # type: ignore[assignment] paths.remove(list(path)) return paths def _idx_to_name(self, idx: int) -> str: - """Given an index, return the associated node's name + """Given an index, return the associated node's name. Args: idx (int): index of a node @@ -916,43 +965,43 @@ def _idx_to_name(self, idx: int) -> str: Returns: str: name of node """ - return self._schema_graph[idx].name + return str(self._schema_graph[idx].name) # casting as string for mypy - def _idx_path_to_name_path(self, idx_path: List[int]) -> List[str]: - """Take a path of graph indices and return a path of field names + def _idx_path_to_name_path(self, idx_path: list[int]) -> list[str]: + """Take a path of graph indices and return a path of field names. Args: - idx_path (List[int]): List of node indices (can be both TypeNodes and FieldNodes) + idx_path (list[int]): List of node indices (can be both TypeNodes and FieldNodes) Returns: - List[str]: List of field names, removing TypeNodes. + list[str]: List of field names, removing TypeNodes. """ - name_path: List[str] = [] + name_path: list[str] = [] for idx in idx_path: if isinstance(self._schema_graph[idx], FieldNode): - name_path.append(self._schema_graph[idx].name) + name_path.append(self._schema_graph[idx].name) # noqa: PERF401 return name_path - def find_paths(self, input_type: str, return_data_name: str, descriptions: bool = False) -> Union[List[str], Dict]: - """Find path from input_type to any nodes matching return_data_name + def find_paths(self, input_type: str, return_data_name: str, descriptions: bool = False) -> list[str] | dict[str, str]: + """Find path from input_type to any nodes matching return_data_name. Args: input_type (str): name of an input_type (ex: entry, polymer_entity_instance) return_data_name (str): name of one field, can be a redundant name - description (bool, optional): whether to include descriptions for the final field of each path. Default is False. + descriptions (bool, optional): whether to include descriptions for the final field of each path. Default is False. Returns: - Union[List[str], Dict] - List[str]: list of paths to nodes with names that match return_data_name + Union[list[str], Dict] + list[str]: list of paths to nodes with names that match return_data_name Dict: if description is True, a dictionary with paths as keys and descriptions as values is returned. """ - paths: List[List[int]] = [] + paths: list[list[int]] = [] input_type_idx: int = self._root_to_idx[input_type] for possible_idx in self._field_to_idx_dict[return_data_name]: paths_to_idx = rx.all_simple_paths(self._schema_graph, input_type_idx, possible_idx) paths.extend(paths_to_idx) - dot_paths: List[str] = [] - description_dict: Dict[str, str] = {} + dot_paths: list[str] = [] + description_dict: dict[str, str] = {} for path in paths: name_path = self._idx_path_to_name_path(path) dot_path = ".".join(name_path[1:]) @@ -969,27 +1018,27 @@ def find_paths(self, input_type: str, return_data_name: str, descriptions: bool dot_paths.sort() return dot_paths - def read_enum(self, type_name: str) -> List[str]: - """parse given type name into a list of enumeration values + def read_enum(self, type_name: str) -> list[str]: + """Parse given type name into a list of enumeration values. Args: type_name (str): GraphQL type name """ for type_dict in self.schema["data"]["__schema"]["types"]: if type_dict["name"] == type_name: - enum_values = [] - for value in type_dict["enumValues"]: - enum_values.append(value["name"]) - return enum_values + return [value["name"] for value in type_dict["enumValues"]] + error_msg = "Not an ENUM value in GraphQL schema" + raise ValueError(error_msg) - def check_typing(self, query_type: str, enum_types, args: Dict[str, Any]): - """Check that arguments match typing specified in schema + def check_typing(self, query_type: str, enum_types: "EnumTypes", args: dict[str, Any]) -> None: # noqa: UP037, F821 + """Check that ENUM types are valid values and that if the schema type. Args: query_type (str): Name of query field (annotations, alignments, etc) - enum_types (Enum): Enum class of GraphQL types that are enumerations. + enum_types (EnumTypes): Enum class of GraphQL types that are enumerations. Values are lists of valid strings corresponding to enumerations - kwargs**: key word arguments corresponding to query-specific arguments + args (Dict[str, Any]): Dictionary where keys are argument names and + values are input values """ error_list = [] arg_dict_list = self._root_dict[query_type] @@ -1000,30 +1049,26 @@ def check_typing(self, query_type: str, enum_types, args: Dict[str, Any]): if arg_name not in args: continue - if arg_dict["kind"] == "NON_NULL": - if arg_dict["of_kind"] == "ENUM": - if args[arg_name] not in enum_types[arg_type].value: - error_list.append( - f"Invalid value '{args[arg_name]}' for '{arg_name}': valid values are {enum_types[arg_type].value}" - ) + if arg_dict["kind"] == "NON_NULL" and arg_dict["of_kind"] == "ENUM" and args[arg_name] not in enum_types[arg_type].value: + error_list.append(f"Invalid value '{args[arg_name]}' for '{arg_name}': valid values are {enum_types[arg_type].value}") + # If list. Does not do type-checking for items of list. if arg_dict["kind"] == "LIST": if not isinstance(args[arg_name], list): - error_list.append( - f"'{arg_name}' must be a list" - ) - - # List of ENUMs - if arg_dict["kind"] == "NON_NULL": - if arg_dict["of_kind"] == "LIST": - mismatch_type = [item for item in args[arg_name] if item not in enum_types[arg_type].value] - if mismatch_type: - raise ValueError( - f"Invalid value(s) {mismatch_type} for '{arg_name}': valid values are {enum_types[arg_type].value}" - ) + error_list.append(f"'{arg_name}' must be a list") + # No case written for boolean arguments since there are none right now + if arg_type == "Int": + if not all(isinstance(item, int) for item in args[arg_name]): + error_list.append(f"'{arg_name}' must be a list of integer(s)") + elif not all(isinstance(item, str) for item in args[arg_name]): + error_list.append(f"'{arg_name}' must be list of string(s)") + + # if list of ENUMs + if arg_dict["kind"] == "NON_NULL" and arg_dict["of_kind"] == "LIST": + mismatch_type = [item for item in args[arg_name] if item not in enum_types[arg_type].value] + if mismatch_type: + error_msg = f"Invalid value(s) {mismatch_type} for '{arg_name}': valid values are {enum_types[arg_type].value}" + raise ValueError(error_msg) if error_list: - raise ValueError( - "\n" + " " + - "\n ".join(error_list) - ) + raise ValueError("\n" + " " + "\n ".join(error_list)) diff --git a/tests/test_seq_query.py b/tests/test_seq_query.py index d996f3d..a270db8 100644 --- a/tests/test_seq_query.py +++ b/tests/test_seq_query.py @@ -26,14 +26,14 @@ # import rustworkx as rx # import networkx as nx -from rcsbapi.sequence.query import alignments, group_alignments, annotations, group_annotations, group_annotations_summary, AnnotationFilterInput +from rcsbapi.sequence.seq_query import Alignments, GroupAlignments, Annotations, GroupAnnotations, GroupAnnotationsSummary, AnnotationFilterInput logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) class SeqTests(unittest.TestCase): - def setUp(self): + def setUp(self) -> None: self.__startTime = time.time() logger.info("Starting %s at %s", self.id().split(".")[-1], time.strftime("%Y %m %d %H:%M:%S", time.localtime())) @@ -44,7 +44,7 @@ def tearDown(self) -> None: def testAnnotations(self) -> None: with self.subTest(msg="1. Annotations query with filter"): try: - query_obj = annotations( + query_obj = Annotations( reference="NCBI_GENOME", sources=["PDB_INSTANCE"], queryId="NC_000001", @@ -65,7 +65,7 @@ def testAnnotations(self) -> None: def testAlignments(self) -> None: with self.subTest(msg="1. Alignments query without filter"): try: - query_obj = alignments( + query_obj = Alignments( from_="NCBI_PROTEIN", to="PDB_ENTITY", queryId="XP_642496", @@ -74,11 +74,23 @@ def testAlignments(self) -> None: query_obj.exec() except Exception as error: self.fail(f"Failed unexpectedly: {error}") + with self.subTest(msg="2. Alignments query with range"): + try: + query_obj = Alignments( + from_="NCBI_PROTEIN", + to="PDB_ENTITY", + queryId="XP_642496", + range=[1, 10], + return_data_list=["target_id"] + ) + query_obj.exec() + except Exception as error: + self.fail(f"Failed unexpectedly: {error}") def testGroupAlignments(self) -> None: with self.subTest(msg="1. group_alignments query without filter"): try: - query_obj = group_alignments( + query_obj = GroupAlignments( group="MATCHING_UNIPROT_ACCESSION", groupId="P01112", return_data_list=["target_id"], @@ -88,7 +100,7 @@ def testGroupAlignments(self) -> None: self.fail(f"Failed unexpectedly: {error}") with self.subTest(msg="2. group_alignments query with filter"): try: - query_obj = group_alignments( + query_obj = GroupAlignments( group="MATCHING_UNIPROT_ACCESSION", groupId="P01112", return_data_list=["target_id"], @@ -101,7 +113,7 @@ def testGroupAlignments(self) -> None: def testGroupAnnotations(self) -> None: with self.subTest(msg="1. group_annotations query without filter"): try: - query_obj = group_annotations( + query_obj = GroupAnnotations( group="MATCHING_UNIPROT_ACCESSION", groupId="P01112", sources=["PDB_ENTITY"], @@ -112,7 +124,7 @@ def testGroupAnnotations(self) -> None: self.fail(f"Failed unexpectedly: {error}") with self.subTest(msg="2. group_annotations query with filter"): try: - query_obj = group_annotations( + query_obj = GroupAnnotations( group="MATCHING_UNIPROT_ACCESSION", groupId="P01112", sources=["PDB_ENTITY"], @@ -130,10 +142,10 @@ def testGroupAnnotations(self) -> None: except Exception as error: self.fail(f"Failed unexpectedly: {error}") - def testGroupAnnotationsSummary(self): + def testGroupAnnotationsSummary(self) -> None: with self.subTest(msg="1. group_annotations_summary query without filter"): try: - query_obj = group_annotations_summary( + query_obj = GroupAnnotationsSummary( group="MATCHING_UNIPROT_ACCESSION", groupId="P01112", sources=["PDB_INSTANCE"], @@ -144,7 +156,7 @@ def testGroupAnnotationsSummary(self): self.fail(f"Failed unexpectedly: {error}") with self.subTest(msg="2. group_annotations_summary query with filter"): try: - query_obj = group_annotations_summary( + query_obj = GroupAnnotationsSummary( group="MATCHING_UNIPROT_ACCESSION", groupId="P01112", sources=["PDB_INSTANCE"], @@ -163,11 +175,13 @@ def testGroupAnnotationsSummary(self): self.fail(f"Failed unexpectedly: {error}") -def buildQuery(): +def buildQuery() -> unittest.TestSuite: suiteSelect = unittest.TestSuite() suiteSelect.addTest(SeqTests("testAnnotations")) suiteSelect.addTest(SeqTests("testAlignments")) suiteSelect.addTest(SeqTests("testGroupAlignments")) + suiteSelect.addTest(SeqTests("testGroupAnnotations")) + suiteSelect.addTest(SeqTests("testGroupAnnotationsSummary")) return suiteSelect From 8ba08f5c1acd5d56dea12538e4ef9f26a547fe45 Mon Sep 17 00:00:00 2001 From: Ivana Truong Date: Tue, 3 Dec 2024 16:04:28 -0800 Subject: [PATCH 04/12] WIP rewriting query construction --- rcsbapi/sequence/seq_schema.py | 122 ++++++++++++++++++++++++++++++++- 1 file changed, 119 insertions(+), 3 deletions(-) diff --git a/rcsbapi/sequence/seq_schema.py b/rcsbapi/sequence/seq_schema.py index ba0959b..23b84b2 100644 --- a/rcsbapi/sequence/seq_schema.py +++ b/rcsbapi/sequence/seq_schema.py @@ -439,6 +439,7 @@ def _recurse_fields(self, fields: dict[Any, Any], field_map: dict[Any, Any]) -> for target_idx, idx_path in fields.items(): mapped_path = field_map.get(target_idx, [target_idx]) mapped_path = mapped_path[: mapped_path.index(target_idx) + 1] # Only take the path up to the field itself + print(f"mapped path: {self._idx_path_to_name_path(mapped_path)}") for idx, subfield in enumerate(mapped_path): query_str += " " + self._idx_to_name(subfield) if idx < len(mapped_path) - 1 or (isinstance(idx_path, list) and idx_path): @@ -457,7 +458,7 @@ def _recurse_fields(self, fields: dict[Any, Any], field_map: dict[Any, Any]) -> for idx, subfield in enumerate(mapped_path): if idx < len(mapped_path) - 1 or (isinstance(idx_path, list) and idx_path): query_str += " " + "} " - return query_str + return query_str def _idx_dict_to_name_dict(self, idx_fields: dict[Any, Any] | list[int] | int) -> dict[str, Any] | list[str] | str: query_dict = {} @@ -475,7 +476,6 @@ def _idx_dict_to_name_dict(self, idx_fields: dict[Any, Any] | list[int] | int) - def _fields_to_string(self, idx_fields: dict[str, Any]) -> str: name_dict = self._idx_dict_to_name_dict(idx_fields) - print(name_dict) return ( # format the dict as a GraphQL query str(name_dict) @@ -605,6 +605,122 @@ def construct_query( # query = "" # return query + def _construct_query_new( + self, + query_type: str, + query_args: dict[str, str] | dict[str, list[Any]], + return_data_list: list[str], + suppress_autocomplete_warning: bool = False + ) -> None: + # Build first line of query where arguments are given + arg_list = self._root_dict[query_type] + arg_value_list = [self.format_args(arg_dict, query_args[arg_dict["name"]]) for arg_dict in arg_list if arg_dict["name"] in query_args] + + # Build query body + start_idx = self._root_to_idx[query_type] + return_data_path_dict = self._make_path_dict(start_idx, query_type, return_data_list) + print(return_data_path_dict) + # query_body = + query = ( + f"query {{{query_type}({str(arg_value_list).replace("[", "(").replace("]", ")")}" + f"{query_body}" + f"}}" + ) + + def _make_path_dict( + self, + start_idx: int, + query_type: str, + return_data_list: list[str], + ) -> dict[int, list[int]]: + return_data_paths: dict[int, list[int]] = {} + complete_path: int = 0 + + for field in return_data_list: + # Generate list of all possible paths to the final requested field. Try to find matching sequence to user input. + path_list = field.split(".") + possible_paths = self.find_paths(query_type, path_list[-1]) + matching_paths: list[str] = [] + for path in possible_paths: + possible_path_list = path.split(".") + possible_path_list.insert(0, str(query_type)) + + # If there is an exact path match, + # the path is fully specified and other possible_paths can be removed and loop can stop. + # Iterate complete path, so warning can be raised if autocompletion is used + path_list_with_input = [query_type, *path_list] + if possible_path_list in (path_list, path_list_with_input): + matching_paths = [".".join(possible_path_list)] + complete_path += 1 + break + # Else, check for matching path segments. + for i in range(len(possible_path_list)): + if possible_path_list[i: i + len(path_list)] == path_list: + matching_paths.append(".".join(possible_path_list)) + + idx_paths: list[list[int]] = [] + if len(matching_paths) > 0: + for path in matching_paths: + idx_paths.extend(self._parse_dot_path(path)) + + # remove paths not beginning with input_type + full_idx_paths: list[list[int]] = list(idx_paths) + input_type_idx = self._root_to_idx[query_type] + for idx_path in idx_paths: + if idx_path[0] != input_type_idx: + full_idx_paths.remove(idx_path) + idx_paths = full_idx_paths + + if len(idx_paths) > 1: + # Print error message that doesn't include input_type at beginning + # But keep input_type in matching_paths for query construction reasons + num_paths_to_print = 10 + path_choice_msg = " " + "\n ".join([".".join(path.split(".")[1:]) for path in matching_paths[:10]]) + len_path = min(len(matching_paths), num_paths_to_print) + + if len(matching_paths) > num_paths_to_print: + error_msg = ( + f'Given path "{field}" not specific enough. Use one or more of these paths in return_data_list argument:\n\n' + f"{len_path} of {len(matching_paths)} possible paths:\n" + f"{path_choice_msg}" + f"\n ...\n\n" + f"For all paths run:\n" + f" from rcsbapi.data import Schema\n" + f" schema = Schema()\n" + f' schema.find_paths("{query_type}", "{path_list[-1]}")' + ) + raise ValueError(error_msg) + + error_msg = ( + f'Given path "{field}" not specific enough. Use one or more of these paths in return_data_list argument:\n\n' + f"{len_path} of {len(matching_paths)} possible paths:\n" + f"{path_choice_msg}" + ) + raise ValueError(error_msg) + + # If path isn't in possible_paths_list, try using the graph to validate the path. Allows for queries with loops and paths that have repeated nodes. + if len(idx_paths) == 0: + possible_dot_paths: list[list[int]] = self._parse_dot_path(field) # Throws an error if path is invalid + shortest_full_paths: list[list[int]] = self._compare_paths(start_idx, possible_dot_paths) + if len(shortest_full_paths) > 1: + shortest_name_paths = [".".join([self._idx_to_name(idx) for idx in path[1:] if isinstance(self._schema_graph[idx], FieldNode)]) for path in shortest_full_paths] + shortest_name_paths.sort() + path_choice_msg = "" + for name_path in shortest_name_paths: + path_choice_msg += " " + name_path + "\n" + error_msg = ( + "Given path not specific enough. Use one or more of these paths in return_data_list argument:\n\n" + f"{path_choice_msg}\n" + "Please note that this list may not be complete. " + "If looking for a different path, you can search the interactive editor's documentation explorer: https://data.rcsb.org/graphql/index.html" + ) + raise ValueError(error_msg) + idx_paths = shortest_full_paths + final_idx: int = idx_paths[0][-1] + shortest_path: list[int] = idx_paths[0][1:] + return_data_paths[final_idx] = shortest_path + return return_data_paths + def _construct_query_rustworkx( self, query_type: str, @@ -805,7 +921,7 @@ def _construct_query_rustworkx( query += ", " query += ") { " - print(self._fields_to_string(final_fields)) + print(f"field_names: {self._fields_to_string(field_names)}") query += self._recurse_fields(final_fields, field_names) query += " } }" json_query = {"query": f"{query}"} From dc22ea4f592a83579803aa5666b493fd3e4f39fe Mon Sep 17 00:00:00 2001 From: Ivana Truong Date: Wed, 4 Dec 2024 10:45:13 -0800 Subject: [PATCH 05/12] WIP creating query as a dictionary --- rcsbapi/sequence/seq_schema.py | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/rcsbapi/sequence/seq_schema.py b/rcsbapi/sequence/seq_schema.py index 23b84b2..7e27ac6 100644 --- a/rcsbapi/sequence/seq_schema.py +++ b/rcsbapi/sequence/seq_schema.py @@ -461,6 +461,7 @@ def _recurse_fields(self, fields: dict[Any, Any], field_map: dict[Any, Any]) -> return query_str def _idx_dict_to_name_dict(self, idx_fields: dict[Any, Any] | list[int] | int) -> dict[str, Any] | list[str] | str: + """Format descendant fields into dictionary that can be easily converted to GraphQL string""" query_dict = {} if isinstance(idx_fields, dict): for field_idx, subfield in idx_fields.items(): @@ -474,11 +475,29 @@ def _idx_dict_to_name_dict(self, idx_fields: dict[Any, Any] | list[int] | int) - else: return self._idx_to_name(idx_fields) - def _fields_to_string(self, idx_fields: dict[str, Any]) -> str: - name_dict = self._idx_dict_to_name_dict(idx_fields) + def _idx_list_to_query( + self, + idx_list: list[int], + autopopulated_fields: list[int | dict[int, Any]], + partial_query: dict[Any, Any] | None = None, + ) -> dict[str, Any]: + if partial_query is None: + partial_query = {} + # Base case + if len(idx_list) == 0: + assert isinstance(partial_query, dict) # for mypy + return partial_query + # Add autopopulated fields + if len(idx_list) == 1: + return {self._idx_to_name(idx_list[0]): [autopopulated_fields]} + # Create a query with correct nesting + else: + return {self._idx_to_name(idx_list[0]): [self._idx_list_to_query(idx_list[1:], autopopulated_fields=autopopulated_fields)]} + + def _query_dict_to_graphql_string(self, query_dict: dict[str, Any]) -> str: # TODO: should take a full dict query return ( # format the dict as a GraphQL query - str(name_dict) + str(query_dict) .replace("'", "") .replace("[", "") .replace("]", "") @@ -619,7 +638,11 @@ def _construct_query_new( # Build query body start_idx = self._root_to_idx[query_type] return_data_path_dict = self._make_path_dict(start_idx, query_type, return_data_list) - print(return_data_path_dict) + query_body = {} + each_return_field_query = [] + for return_field, path in return_data_path_dict: + each_return_field_query.append(self._path_to_graphql_str()) + # print(return_data_path_dict) # query_body = query = ( f"query {{{query_type}({str(arg_value_list).replace("[", "(").replace("]", ")")}" From a22dc270af4e7067f13f2d174bf55edb5ea397ad Mon Sep 17 00:00:00 2001 From: Ivana Truong Date: Wed, 4 Dec 2024 15:41:00 -0800 Subject: [PATCH 06/12] WIP rewriting query construction --- rcsbapi/sequence/seq_query.py | 4 ++- rcsbapi/sequence/seq_schema.py | 57 ++++++++++++++++++---------------- 2 files changed, 34 insertions(+), 27 deletions(-) diff --git a/rcsbapi/sequence/seq_query.py b/rcsbapi/sequence/seq_query.py index cfd70a2..def77db 100644 --- a/rcsbapi/sequence/seq_query.py +++ b/rcsbapi/sequence/seq_query.py @@ -68,6 +68,9 @@ def construct_query(self, query_type: str) -> Dict: return_data_list=self.return_data_list, suppress_autocomplete_warning=self.suppress_autocomplete_warning, ) + print("COMPARE") + print(f"old query:\n {query}") + print(f"new query:\n {SEQ_SCHEMA._construct_query_new(query_type, self.to_dict(), return_data_list=self.return_data_list)}") return query @@ -76,7 +79,6 @@ def exec(self) -> Dict[str, Any]: # Assert attribute exists for mypy assert hasattr(self, "_query"), \ f"{self.__class__.__name__} must define '_query' attribute." - print(self._query) response_json = requests.post( json=dict(self._query), url=seq_const.API_ENDPOINT + "/graphql", diff --git a/rcsbapi/sequence/seq_schema.py b/rcsbapi/sequence/seq_schema.py index 7e27ac6..16f55cf 100644 --- a/rcsbapi/sequence/seq_schema.py +++ b/rcsbapi/sequence/seq_schema.py @@ -3,6 +3,7 @@ from __future__ import annotations import logging import json +import re from typing import Any from pathlib import Path import requests @@ -439,7 +440,6 @@ def _recurse_fields(self, fields: dict[Any, Any], field_map: dict[Any, Any]) -> for target_idx, idx_path in fields.items(): mapped_path = field_map.get(target_idx, [target_idx]) mapped_path = mapped_path[: mapped_path.index(target_idx) + 1] # Only take the path up to the field itself - print(f"mapped path: {self._idx_path_to_name_path(mapped_path)}") for idx, subfield in enumerate(mapped_path): query_str += " " + self._idx_to_name(subfield) if idx < len(mapped_path) - 1 or (isinstance(idx_path, list) and idx_path): @@ -458,7 +458,7 @@ def _recurse_fields(self, fields: dict[Any, Any], field_map: dict[Any, Any]) -> for idx, subfield in enumerate(mapped_path): if idx < len(mapped_path) - 1 or (isinstance(idx_path, list) and idx_path): query_str += " " + "} " - return query_str + return query_str def _idx_dict_to_name_dict(self, idx_fields: dict[Any, Any] | list[int] | int) -> dict[str, Any] | list[str] | str: """Format descendant fields into dictionary that can be easily converted to GraphQL string""" @@ -475,7 +475,7 @@ def _idx_dict_to_name_dict(self, idx_fields: dict[Any, Any] | list[int] | int) - else: return self._idx_to_name(idx_fields) - def _idx_list_to_query( + def _idxs_to_query( self, idx_list: list[int], autopopulated_fields: list[int | dict[int, Any]], @@ -489,27 +489,30 @@ def _idx_list_to_query( return partial_query # Add autopopulated fields if len(idx_list) == 1: - return {self._idx_to_name(idx_list[0]): [autopopulated_fields]} + return {self._idx_to_name(idx_list[0]): [self._idx_dict_to_name_dict(autopopulated_fields)]} # Create a query with correct nesting else: - return {self._idx_to_name(idx_list[0]): [self._idx_list_to_query(idx_list[1:], autopopulated_fields=autopopulated_fields)]} + return {self._idx_to_name(idx_list[0]): [self._idxs_to_query(idx_list[1:], autopopulated_fields=autopopulated_fields)]} def _query_dict_to_graphql_string(self, query_dict: dict[str, Any]) -> str: # TODO: should take a full dict query - return ( + formatted_str = ( # format the dict as a GraphQL query - str(query_dict) + str(json.dumps(query_dict, indent=2)) .replace("'", "") .replace("[", "") .replace("]", "") - .replace(",", "") - .replace("{", " ") - .replace(":", "{") + .replace(",", " ") + .replace("{", "") + .replace(": ", "{") ) + formatted_str = "\n".join(line for line in formatted_str.splitlines() if line.strip()) + return formatted_str - def _get_descendant_fields(self, node_idx: int, field_name: str, visited: None | set[int] = None) -> list[int | dict[int, Any]]: + def _get_descendant_fields(self, node_idx: int, visited: None | set[int] = None) -> list[int | dict[int, Any]]: if visited is None: visited = set() + field_name = self._idx_to_name(node_idx) result: list[int | dict[int, Any]] = [] children_idx = list(self._schema_graph.neighbors(node_idx)) @@ -523,7 +526,7 @@ def _get_descendant_fields(self, node_idx: int, field_name: str, visited: None | assert isinstance(child_data.index, int) # noqa: S101 (needed for mypy) if isinstance(child_data, FieldNode): - child_descendants = self._get_descendant_fields(idx, field_name, visited) + child_descendants = self._get_descendant_fields(idx, visited) # If further subfields append as dictionary. ex: {field index: [subfield1, subfield2, ...]} if child_descendants: result.append({child_data.index: child_descendants}) @@ -531,7 +534,7 @@ def _get_descendant_fields(self, node_idx: int, field_name: str, visited: None | else: result.append(child_data.index) elif isinstance(child_data, TypeNode): - type_descendants = self._get_descendant_fields(idx, field_name, visited) + type_descendants = self._get_descendant_fields(idx, visited) # If further subfields, append the list of descendants (indices and index dicts) if type_descendants: result.extend(type_descendants) @@ -630,26 +633,30 @@ def _construct_query_new( query_args: dict[str, str] | dict[str, list[Any]], return_data_list: list[str], suppress_autocomplete_warning: bool = False - ) -> None: + ) -> str: # Build first line of query where arguments are given arg_list = self._root_dict[query_type] arg_value_list = [self.format_args(arg_dict, query_args[arg_dict["name"]]) for arg_dict in arg_list if arg_dict["name"] in query_args] # Build query body start_idx = self._root_to_idx[query_type] - return_data_path_dict = self._make_path_dict(start_idx, query_type, return_data_list) - query_body = {} - each_return_field_query = [] - for return_field, path in return_data_path_dict: - each_return_field_query.append(self._path_to_graphql_str()) - # print(return_data_path_dict) - # query_body = + return_data_path_dict: dict[int, list[int]] = self._make_path_dict(start_idx, query_type, return_data_list) + return_field_query_list = [] + for return_field_idx, path in return_data_path_dict.items(): + return_field_query_dict = self._idxs_to_query(idx_list=path, autopopulated_fields=self._get_descendant_fields(return_field_idx)) + return_field_query_list.append(return_field_query_dict) + + # TODO: in idxs_to_query, add arguments to keys + # TODO: merge partial queries in list to full query + query_body = self._query_dict_to_graphql_string(return_field_query_list[0]) # TODO: needs to be changed query = ( - f"query {{{query_type}({str(arg_value_list).replace("[", "(").replace("]", ")")}" + f"query{{{query_type}{str(arg_value_list).replace("[", "(").replace("]", ")").replace("'", "")}" + f"{{" f"{query_body}" f"}}" ) - + return query + def _make_path_dict( self, start_idx: int, @@ -908,7 +915,7 @@ def _construct_query_rustworkx( final_fields = {} for target_idx in return_data_paths: - final_fields[target_idx] = self._get_descendant_fields(node_idx=target_idx, field_name=self._schema_graph[target_idx].name) + final_fields[target_idx] = self._get_descendant_fields(node_idx=target_idx) field_names: dict[Any, Any] = {} paths: dict[Any, Any] = {} @@ -944,11 +951,9 @@ def _construct_query_rustworkx( query += ", " query += ") { " - print(f"field_names: {self._fields_to_string(field_names)}") query += self._recurse_fields(final_fields, field_names) query += " } }" json_query = {"query": f"{query}"} - print(query) return json_query # noqa: RET504 def format_args(self, arg_dict: dict[str, list[Any]] | dict[str, str], input_value: str | list[str]) -> str: From 65aad6bf56b033ddb411b95e773b18a4e89a9e46 Mon Sep 17 00:00:00 2001 From: Ivana Truong Date: Tue, 10 Dec 2024 17:07:37 -0800 Subject: [PATCH 07/12] adding new construct query function --- rcsbapi/sequence/seq_query.py | 5 +- rcsbapi/sequence/seq_schema.py | 90 ++++++++++++++++++++++++---------- 2 files changed, 65 insertions(+), 30 deletions(-) diff --git a/rcsbapi/sequence/seq_query.py b/rcsbapi/sequence/seq_query.py index def77db..d879cf6 100644 --- a/rcsbapi/sequence/seq_query.py +++ b/rcsbapi/sequence/seq_query.py @@ -62,15 +62,12 @@ def construct_query(self, query_type: str) -> Dict: args=self.to_dict(), ) - query = SEQ_SCHEMA.construct_query( + query = SEQ_SCHEMA._construct_query_new( query_type=query_type, query_args=self.to_dict(), return_data_list=self.return_data_list, suppress_autocomplete_warning=self.suppress_autocomplete_warning, ) - print("COMPARE") - print(f"old query:\n {query}") - print(f"new query:\n {SEQ_SCHEMA._construct_query_new(query_type, self.to_dict(), return_data_list=self.return_data_list)}") return query diff --git a/rcsbapi/sequence/seq_schema.py b/rcsbapi/sequence/seq_schema.py index 16f55cf..e1da4c5 100644 --- a/rcsbapi/sequence/seq_schema.py +++ b/rcsbapi/sequence/seq_schema.py @@ -460,7 +460,7 @@ def _recurse_fields(self, fields: dict[Any, Any], field_map: dict[Any, Any]) -> query_str += " " + "} " return query_str - def _idx_dict_to_name_dict(self, idx_fields: dict[Any, Any] | list[int] | int) -> dict[str, Any] | list[str] | str: + def _idx_dict_to_name_dict(self, idx_fields: list[dict[int, Any] | int] | dict[int, Any] | int) -> dict[str, Any] | list[str] | str: """Format descendant fields into dictionary that can be easily converted to GraphQL string""" query_dict = {} if isinstance(idx_fields, dict): @@ -475,12 +475,12 @@ def _idx_dict_to_name_dict(self, idx_fields: dict[Any, Any] | list[int] | int) - else: return self._idx_to_name(idx_fields) - def _idxs_to_query( + def _idxs_to_idx_dict( self, idx_list: list[int], autopopulated_fields: list[int | dict[int, Any]], partial_query: dict[Any, Any] | None = None, - ) -> dict[str, Any]: + ) -> dict[int, Any] | list[dict[int, Any] | int]: if partial_query is None: partial_query = {} # Base case @@ -489,24 +489,31 @@ def _idxs_to_query( return partial_query # Add autopopulated fields if len(idx_list) == 1: - return {self._idx_to_name(idx_list[0]): [self._idx_dict_to_name_dict(autopopulated_fields)]} + if not autopopulated_fields: + return [idx_list[0]] + return {idx_list[0]: autopopulated_fields} # Create a query with correct nesting else: - return {self._idx_to_name(idx_list[0]): [self._idxs_to_query(idx_list[1:], autopopulated_fields=autopopulated_fields)]} + return {idx_list[0]: self._idxs_to_idx_dict(idx_list[1:], autopopulated_fields=autopopulated_fields)} - def _query_dict_to_graphql_string(self, query_dict: dict[str, Any]) -> str: # TODO: should take a full dict query - formatted_str = ( + def query_dict_to_graphql_string(self, query_dict: dict[str, Any]) -> str: + first_line = next(iter(query_dict["query"])) + print(f"FIRST LINE: {first_line}") + query_body = query_dict["query"][first_line] + formatted_query_body = ( # format the dict as a GraphQL query - str(json.dumps(query_dict, indent=2)) - .replace("'", "") + json.dumps(query_body, indent=2) + .replace('"', "") + .replace("'", '"') .replace("[", "") .replace("]", "") .replace(",", " ") .replace("{", "") .replace(": ", "{") ) - formatted_str = "\n".join(line for line in formatted_str.splitlines() if line.strip()) - return formatted_str + formatted_query_body = "\n".join(line for line in formatted_query_body.splitlines() if line.strip()) + query = f"query{{{first_line}{{\n{formatted_query_body}}}}}" + return query def _get_descendant_fields(self, node_idx: int, visited: None | set[int] = None) -> list[int | dict[int, Any]]: if visited is None: @@ -633,31 +640,62 @@ def _construct_query_new( query_args: dict[str, str] | dict[str, list[Any]], return_data_list: list[str], suppress_autocomplete_warning: bool = False - ) -> str: + ) -> dict[str, Any]: # Build first line of query where arguments are given arg_list = self._root_dict[query_type] - arg_value_list = [self.format_args(arg_dict, query_args[arg_dict["name"]]) for arg_dict in arg_list if arg_dict["name"] in query_args] + arg_value_list = tuple(self.format_args(arg_dict, query_args[arg_dict["name"]]) for arg_dict in arg_list if arg_dict["name"] in query_args) + query_args_str = f"{query_type}{str(arg_value_list).replace("'", '')}" # Build query body start_idx = self._root_to_idx[query_type] - return_data_path_dict: dict[int, list[int]] = self._make_path_dict(start_idx, query_type, return_data_list) + return_data_path_dict: dict[int, list[int]] = self.return_fields_to_paths(start_idx, query_type, return_data_list) return_field_query_list = [] for return_field_idx, path in return_data_path_dict.items(): - return_field_query_dict = self._idxs_to_query(idx_list=path, autopopulated_fields=self._get_descendant_fields(return_field_idx)) + return_field_query_dict = self._idxs_to_idx_dict(idx_list=path, autopopulated_fields=self._get_descendant_fields(return_field_idx)) return_field_query_list.append(return_field_query_dict) - + # TODO: in idxs_to_query, add arguments to keys - # TODO: merge partial queries in list to full query - query_body = self._query_dict_to_graphql_string(return_field_query_list[0]) # TODO: needs to be changed - query = ( - f"query{{{query_type}{str(arg_value_list).replace("[", "(").replace("]", ")").replace("'", "")}" - f"{{" - f"{query_body}" - f"}}" - ) - return query + idx_query_body = self._merge_query_list(return_field_query_list) + name_query_body = self._idx_dict_to_name_dict(idx_query_body) + query = self.query_dict_to_graphql_string({"query": {query_args_str: name_query_body}}) + return {"query": query} + + def _merge_query( + self, + query_1: dict[int, Any] | list[int], + query_2: dict[int, Any] | list[int] + ) -> list[int | dict[int, Any]] | list[dict[int, Any]] | list[int]: + if isinstance(query_1, dict) and isinstance(query_2, dict): + for key in query_1.keys(): + if (key in query_2): + return [{key: self._merge_query(query_1[key], query_2[key])}] + return [query_1, query_2] + elif isinstance(query_1, list) and isinstance(query_2, dict): + return query_1 + [query_2] + elif isinstance(query_1, dict) and isinstance(query_2, list): + return [query_1] + query_2 + elif isinstance(query_1, list) and isinstance(query_2, list): + return query_1 + query_2 + raise ValueError("Invalid query input") + + def _merge_query_list(self, query_list: list[dict[int, Any] | int]) -> list[dict[int, Any] | int]: + result = [query_list[0]] + for path in query_list[1:]: + for partial_path in result: + result = self._merge_query(partial_path, path) + return result - def _make_path_dict( + def _idx_query_to_name_query(self, idx_query: dict[int, Any] | list[int] | int) -> dict[str, Any] | list[str] | str: + if isinstance(idx_query, dict): + assert len(idx_query) == 1 + for key, value in idx_query.items(): + field_name = self._idx_to_name(key) + return {field_name: self._idx_query_to_name_query(value)} + elif isinstance(idx_query, list): + return [self._idx_query_to_name_query(idx) for idx in idx_query] + return self._idx_to_name(idx_query) + + def return_fields_to_paths( self, start_idx: int, query_type: str, From cc2c29963f01debf8747186294047da4d81db50b Mon Sep 17 00:00:00 2001 From: Ivana Truong Date: Tue, 10 Dec 2024 17:15:21 -0800 Subject: [PATCH 08/12] delete unused query construction code --- rcsbapi/sequence/seq_query.py | 2 +- rcsbapi/sequence/seq_schema.py | 243 +-------------------------------- 2 files changed, 2 insertions(+), 243 deletions(-) diff --git a/rcsbapi/sequence/seq_query.py b/rcsbapi/sequence/seq_query.py index d879cf6..da5fefa 100644 --- a/rcsbapi/sequence/seq_query.py +++ b/rcsbapi/sequence/seq_query.py @@ -62,7 +62,7 @@ def construct_query(self, query_type: str) -> Dict: args=self.to_dict(), ) - query = SEQ_SCHEMA._construct_query_new( + query = SEQ_SCHEMA.construct_query( query_type=query_type, query_args=self.to_dict(), return_data_list=self.return_data_list, diff --git a/rcsbapi/sequence/seq_schema.py b/rcsbapi/sequence/seq_schema.py index e1da4c5..687f907 100644 --- a/rcsbapi/sequence/seq_schema.py +++ b/rcsbapi/sequence/seq_schema.py @@ -435,31 +435,6 @@ def get_input_id_dict(self, input_type: str) -> dict[str, str]: input_dict[name] = description return input_dict - def _recurse_fields(self, fields: dict[Any, Any], field_map: dict[Any, Any]) -> str: - query_str = "" - for target_idx, idx_path in fields.items(): - mapped_path = field_map.get(target_idx, [target_idx]) - mapped_path = mapped_path[: mapped_path.index(target_idx) + 1] # Only take the path up to the field itself - for idx, subfield in enumerate(mapped_path): - query_str += " " + self._idx_to_name(subfield) - if idx < len(mapped_path) - 1 or (isinstance(idx_path, list) and idx_path): - query_str += "{ " - else: - query_str += " " - if isinstance(idx_path, list): - if idx_path: # Only recurse if the list is not empty - for item in idx_path: - if isinstance(item, dict): - query_str += self._recurse_fields(item, field_map) - else: - query_str += " " + self._idx_to_name(item) - else: - query_str += " " + idx_path - for idx, subfield in enumerate(mapped_path): - if idx < len(mapped_path) - 1 or (isinstance(idx_path, list) and idx_path): - query_str += " " + "} " - return query_str - def _idx_dict_to_name_dict(self, idx_fields: list[dict[int, Any] | int] | dict[int, Any] | int) -> dict[str, Any] | list[str] | str: """Format descendant fields into dictionary that can be easily converted to GraphQL string""" query_dict = {} @@ -623,18 +598,7 @@ def construct_query( ) return query # noqa: RET504 - # def _construct_query_networkx( - # self, - # input_type: str, - # input_ids: Union[dict[str, str], list[str]], - # return_data_list: list[str], - # add_rcsb_id: bool, - # suppress_autocomplete_warning: bool - # ): # Incomplete function - # query = "" - # return query - - def _construct_query_new( + def _construct_query_rustworkx( self, query_type: str, query_args: dict[str, str] | dict[str, list[Any]], @@ -789,211 +753,6 @@ def return_fields_to_paths( return_data_paths[final_idx] = shortest_path return return_data_paths - def _construct_query_rustworkx( - self, - query_type: str, - query_args: dict[str, str] | dict[str, list[Any]], - return_data_list: list[str], - suppress_autocomplete_warning: bool = False, - ) -> dict[str, Any]: - """Construct a GraphQL query as JSON using a rustworkx graph. - - Args: - query_type (str): root type ("alignments", "annotations") - query_args (dict[str, str] | dict[str, list]): dictionary where keys are argument names and - values are input values - return_data_list (list[str]): list of fields to request data for - suppress_autocomplete_warning (bool, optional): Whether to suppress warning for autocompletion of paths. - Defaults to False. - - Raises: - ValueError: input_ids dictionary keys don't match the input_type given - ValueError: input_ids dictionary keys missing - ValueError: input_ids dictionary value should be a string, but another type was passed in - ValueError: field in return_data_list exists, but is a redundant name and needs to be further specified - ValueError: path in return_data_list exists, but is a redundant and needs to be further specified - - Returns: - str: query in GraphQL syntax - """ - arg_list = self._root_dict[query_type] - # arg_name_list = [id["name"] for id in arg_list] # might need to revert back to this - - # # Check formatting of input_ids - # input_dict: Union[dict[str, str], dict[str, list[str]]] = {} - - # if isinstance(input_ids, Dict): - # input_dict = input_ids - # if not all(key in arg_name_list for key in input_dict.keys()): - # raise ValueError(f"Input IDs keys do not match: {input_dict.keys()} vs {arg_name_list}") - # missing_keys = [key_arg for key_arg in arg_name_list if key_arg not in input_dict] - # if len(missing_keys) > 0: - # raise ValueError( - # f"Missing input_id dictionary keys: {missing_keys}. Find input_id keys and descriptions by running:\n" - # f" from rcsbapi.data import Schema\n" - # f" schema = Schema()\n" - # f' schema.get_input_id_dict("{input_type}")' - # ) - # attr_kind = {attr["name"]: attr["kind"] for attr in attr_list} - # for key, value in input_dict.items(): - # if attr_kind[key] == "SCALAR": - # if not isinstance(value, str): - # raise ValueError(f"Input ID for {key} should be a single string") - # elif attr_kind[key] == "LIST": - # if not isinstance(value, list): - # raise ValueError(f"Input ID for {key} should be a list of strings") - # if not all(isinstance(item, str) for item in value): - # raise ValueError(f"Input ID for {key} should be a list of strings") - - start_node_index = self._root_to_idx[query_type] - - return_data_paths: dict[int, list[list[int]]] = {} - complete_path: int = 0 - - for field in return_data_list: - # Generate list of all possible paths to the final requested field. Try to find matching sequence to user input. - path_list = field.split(".") - possible_paths = self.find_paths(query_type, path_list[-1]) - matching_paths: list[str] = [] - for path in possible_paths: - possible_path_list = path.split(".") - possible_path_list.insert(0, str(query_type)) - - # If there is an exact path match, - # the path is fully specified and other possible_paths can be removed and loop can stop. - # Iterate complete path, so warning can be raised if autocompletion is used - path_list_with_input = [query_type, *path_list] - if possible_path_list in (path_list, path_list_with_input): - matching_paths = [".".join(possible_path_list)] - complete_path += 1 - break - # Else, check for matching path segments. - for i in range(len(possible_path_list)): - if possible_path_list[i: i + len(path_list)] == path_list: - matching_paths.append(".".join(possible_path_list)) - - idx_paths: list[list[int]] = [] - if len(matching_paths) > 0: - for path in matching_paths: - idx_paths.extend(self._parse_dot_path(path)) - - # remove paths not beginning with input_type - full_idx_paths: list[list[int]] = list(idx_paths) - input_type_idx = self._root_to_idx[query_type] - for idx_path in idx_paths: - if idx_path[0] != input_type_idx: - full_idx_paths.remove(idx_path) - idx_paths = full_idx_paths - - if len(idx_paths) > 1: - # Print error message that doesn't include input_type at beginning - # But keep input_type in matching_paths for query construction reasons - num_paths_to_print = 10 - path_choice_msg = " " + "\n ".join([".".join(path.split(".")[1:]) for path in matching_paths[:10]]) - len_path = min(len(matching_paths), num_paths_to_print) - - if len(matching_paths) > num_paths_to_print: - error_msg = ( - f'Given path "{field}" not specific enough. Use one or more of these paths in return_data_list argument:\n\n' - f"{len_path} of {len(matching_paths)} possible paths:\n" - f"{path_choice_msg}" - f"\n ...\n\n" - f"For all paths run:\n" - f" from rcsbapi.data import Schema\n" - f" schema = Schema()\n" - f' schema.find_paths("{query_type}", "{path_list[-1]}")' - ) - raise ValueError(error_msg) - - error_msg = ( - f'Given path "{field}" not specific enough. Use one or more of these paths in return_data_list argument:\n\n' - f"{len_path} of {len(matching_paths)} possible paths:\n" - f"{path_choice_msg}" - ) - raise ValueError(error_msg) - - # If path isn't in possible_paths_list, try using the graph to validate the path. Allows for queries with loops and paths that have repeated nodes. - if len(idx_paths) == 0: - possible_dot_paths: list[list[int]] = self._parse_dot_path(field) # Throws an error if path is invalid - shortest_full_paths: list[list[int]] = self._compare_paths(start_node_index, possible_dot_paths) - if len(shortest_full_paths) > 1: - shortest_name_paths = [".".join([self._idx_to_name(idx) for idx in path[1:] if isinstance(self._schema_graph[idx], FieldNode)]) for path in shortest_full_paths] - shortest_name_paths.sort() - path_choice_msg = "" - for name_path in shortest_name_paths: - path_choice_msg += " " + name_path + "\n" - error_msg = ( - "Given path not specific enough. Use one or more of these paths in return_data_list argument:\n\n" - f"{path_choice_msg}\n" - "Please note that this list may not be complete. " - "If looking for a different path, you can search the interactive editor's documentation explorer: https://data.rcsb.org/graphql/index.html" - ) - raise ValueError(error_msg) - idx_paths = shortest_full_paths - final_idx: int = idx_paths[0][-1] - return_data_paths[final_idx] = idx_paths - - if (complete_path != len(return_data_list)) and (suppress_autocomplete_warning is False): - info_list = [".".join(self._idx_path_to_name_path(path[0][1:])) for path in return_data_paths.values()] - - path_msg = "".join(f'\n\t"{item}",' for item in info_list) - logger.warning( - "\n" - "Some paths are being autocompleted based on the current API. If this code is meant for long-term use, use the set of fully-specified paths below:\n" - " [" - "%s\n" - " ]", - path_msg, - ) - - for return_data in return_data_list: - if any(not value for value in return_data_paths.values()): - error_msg = f'You can\'t access "{return_data}" from input type {query_type}' - raise ValueError(error_msg) - - final_fields = {} - for target_idx in return_data_paths: - final_fields[target_idx] = self._get_descendant_fields(node_idx=target_idx) - - field_names: dict[Any, Any] = {} - paths: dict[Any, Any] = {} - - for target_idx, paths_list in return_data_paths.items(): - node_data = self._schema_graph[target_idx] - if isinstance(node_data, FieldNode): - field_names[target_idx] = [] - paths[target_idx] = [] - for each_path in paths_list: - skip_first = True - path = [node_idx for node_idx in each_path if isinstance(self._schema_graph[node_idx], FieldNode)][1:] - paths[target_idx].append(path) - for node_idx in each_path: - node_data = self._schema_graph[node_idx] - if isinstance(node_data, FieldNode): - if skip_first: - skip_first = False - continue - field_names[target_idx].append(node_idx) - - query = "query { " + query_type + "(" - - num_arg_added = 0 - for arg_dict in arg_list: - arg_name = arg_dict["name"] - # If arg not in query_args, assume it's an optional (checking done earlier) - if arg_name not in query_args: - continue - query += self.format_args(arg_dict, query_args[arg_name]) - num_arg_added += 1 - if num_arg_added < (len(query_args) - 1): - query += ", " - - query += ") { " - query += self._recurse_fields(final_fields, field_names) - query += " } }" - json_query = {"query": f"{query}"} - return json_query # noqa: RET504 - def format_args(self, arg_dict: dict[str, list[Any]] | dict[str, str], input_value: str | list[str]) -> str: """Add double quotes or omit quotes around a single GraphQL argument. From 245bae851630940ff1e97754cc4b375130fca85c Mon Sep 17 00:00:00 2001 From: Ivana Truong Date: Thu, 12 Dec 2024 10:24:27 -0600 Subject: [PATCH 09/12] Add ability to pass in arguments for fields --- rcsbapi/sequence/seq_query.py | 28 +- rcsbapi/sequence/seq_schema.py | 540 +++++++++++++++++---------------- tests/test_seq_query.py | 16 + 3 files changed, 316 insertions(+), 268 deletions(-) diff --git a/rcsbapi/sequence/seq_query.py b/rcsbapi/sequence/seq_query.py index da5fefa..655a604 100644 --- a/rcsbapi/sequence/seq_query.py +++ b/rcsbapi/sequence/seq_query.py @@ -26,9 +26,9 @@ class Query(ABC): """Base class for all query types""" @abstractmethod - def to_dict(self) -> Dict: + def to_dict(self) -> Dict[str, Any]: """Get dictionary represented query and attributes, skips values of None""" - request_dict: Dict = {} + request_dict: Dict[str, Any] = {} for field in fields(self): field_name = field.name field_value = getattr(self, field_name) @@ -43,7 +43,7 @@ def to_dict(self) -> Dict: request_dict[field_name] = field_value return request_dict - def construct_query(self, query_type: str) -> Dict: + def construct_query(self, query_type: str) -> Dict[str, Any]: """type check based on the GraphQL schema, then construct the GraphQL query""" # Assert attributes exists for mypy. # Can't be defined in Query class because @@ -154,7 +154,7 @@ class Annotations(Query): suppress_autocomplete_warning: bool = False _query: MappingProxyType[str, Any] = MappingProxyType({}) - def to_dict(self) -> Dict: + def to_dict(self) -> Dict[str, Any]: return super().to_dict() def __post_init__(self) -> None: @@ -182,10 +182,10 @@ class GroupAlignments(Query): offset: Optional[int] = None first: Optional[int] = None - def to_dict(self) -> Dict: + def to_dict(self) -> Dict[str, Any]: return super().to_dict() - def __post_init__(self): + def __post_init__(self) -> None: query = super().construct_query("group_alignments") object.__setattr__(self, "_query", query) @@ -209,12 +209,12 @@ class GroupAnnotations(Query): return_data_list: list[str] filters: Optional[List["AnnotationFilterInput"]] = None suppress_autocomplete_warning: bool = False - _query: MappingProxyType = MappingProxyType({}) + _query: MappingProxyType[str, Any] = MappingProxyType({}) - def to_dict(self) -> Dict: + def to_dict(self) -> Dict[str, Any]: return super().to_dict() - def __post_init__(self): + def __post_init__(self) -> None: query = super().construct_query("group_annotations") object.__setattr__(self, "_query", query) @@ -238,12 +238,12 @@ class GroupAnnotationsSummary(Query): return_data_list: list[str] filters: Optional[List["AnnotationFilterInput"]] = None suppress_autocomplete_warning: bool = False - _query: MappingProxyType = MappingProxyType({}) + _query: MappingProxyType[str, Any] = MappingProxyType({}) - def to_dict(self) -> Dict: + def to_dict(self) -> Dict[str, Any]: return super().to_dict() - def __post_init__(self): + def __post_init__(self) -> None: query = super().construct_query("group_annotations_summary") object.__setattr__(self, "_query", query) @@ -275,10 +275,10 @@ def __init__( def to_string(self) -> str: """Generate string to insert in GraphQL query based on GraphQL schema""" - input_field_specs = [] + input_field_specs: list[Any] = [] for arg_dict in SEQ_SCHEMA._root_dict["annotations"]: if arg_dict["name"] == "filters": - input_field_specs = arg_dict["input_fields"] + input_field_specs = arg_dict["inputFields"] assert len(input_field_specs) > 0, '"filters" key not found in arg_dict' args = set() diff --git a/rcsbapi/sequence/seq_schema.py b/rcsbapi/sequence/seq_schema.py index 687f907..2de4cca 100644 --- a/rcsbapi/sequence/seq_schema.py +++ b/rcsbapi/sequence/seq_schema.py @@ -3,7 +3,6 @@ from __future__ import annotations import logging import json -import re from typing import Any from pathlib import Path import requests @@ -39,7 +38,7 @@ class FieldNode: index (int): graph index """ - def __init__(self, kind: str, node_type: str, name: str, description: str) -> None: + def __init__(self, kind: str, node_type: str, name: str, description: str, args: list[dict[str, str | None]]) -> None: """ Initialize FieldNodes. @@ -55,6 +54,7 @@ def __init__(self, kind: str, node_type: str, name: str, description: str) -> No self.kind: str = kind self.of_kind: str = "" self.type: str = node_type + self.args: list[dict[str, str | None]] = args self.index: None | int = None def __str__(self) -> str: @@ -142,7 +142,7 @@ def __init__(self) -> None: """Dict where keys are type names and the values are their associated fields""" self._field_names_list = self._construct_name_list() """list of all field names""" - self._root_dict: dict[str, list[dict[str, str]]] = self._construct_root_dict() + self._root_dict: dict[str, list[dict[str, Any]]] = self._construct_root_dict() self._schema_graph: rx.PyDiGraph[FieldNode | TypeNode, None | int] = rx.PyDiGraph() self._schema_graph = self._recurse_build_schema(self._schema_graph, "Query") self._root_to_idx: dict[str, int] = self._make_root_to_idx() @@ -164,7 +164,7 @@ def _request_root_types(self) -> dict[str, Any]: response = requests.post(headers={"Content-Type": "application/json"}, json=root_query, url=self.pdb_url, timeout=self.timeout) return dict(response.json()) - def _construct_root_dict(self) -> dict[str, list[dict[str, str]]]: + def _construct_root_dict(self) -> dict[str, list[dict[str, Any]]]: """Build a dictionary to organize information about schema root types. Returns: @@ -196,7 +196,7 @@ def _construct_root_dict(self) -> dict[str, list[dict[str, str]]]: if root_name not in root_dict: root_dict[root_name] = [] root_dict[root_name].append( - {"name": arg_name, "description": arg_description, "kind": arg_kind, "of_kind": arg_of_kind, "of_type": arg_of_type, "input_fields": input_fields} + {"name": arg_name, "description": arg_description, "kind": arg_kind, "ofKind": arg_of_kind, "ofType": arg_of_type, "inputFields": input_fields} ) return root_dict @@ -250,7 +250,9 @@ def _construct_type_dict(self) -> dict[str, dict[str, dict[str, str]]]: field_dict = {} if fields is not None: for field in fields: - field_dict[str(field["name"])] = dict(field["type"]) + info_dict = field["type"] + info_dict["args"] = field["args"] + field_dict[str(field["name"])] = info_dict type_fields_dict[type_name] = field_dict return type_fields_dict @@ -287,7 +289,11 @@ def make_type_subgraph(self, type_name: str) -> TypeNode: type_node.set_field_list(field_node_list) return type_node - def _recurse_build_schema(self, schema_graph: rx.PyDiGraph[FieldNode | TypeNode, None | int], type_name: str) -> rx.PyDiGraph: + def _recurse_build_schema( + self, + schema_graph: rx.PyDiGraph[FieldNode | TypeNode, None | int], + type_name: str + ) -> rx.PyDiGraph[FieldNode | TypeNode, None | int]: """Build the API schema by iterating through the fields of the given type and building subgraphs for each one recursively until a scalar (leaf) is reached. Args: @@ -351,32 +357,13 @@ def _make_type_node(self, type_name: str) -> TypeNode: type_node.set_index(index) return type_node - def _find_kind(self, field_dict: dict[str, Any]) -> Any | str: # noqa: ANN401 - if field_dict["name"] is not None: - return field_dict["kind"] - return self._find_kind(field_dict["ofType"]) - - def _find_type_name(self, field_dict: dict[str, Any]) -> Any | str: # noqa: ANN401 - if field_dict: - if field_dict["name"] is not None: - return field_dict["name"] - return self._find_type_name(field_dict["ofType"]) - return "" - - def _find_description(self, type_name: str, field_name: str) -> str: - for type_dict in self.schema["data"]["__schema"]["types"]: - if type_dict["name"] == type_name: - for field in type_dict["fields"]: - if (field["name"] == field_name) and isinstance(field["description"], str): - return field["description"] - return "" - def _make_field_node(self, parent_type: str, field_name: str) -> FieldNode: kind = self._type_fields_dict[parent_type][field_name]["kind"] field_type_dict: dict[str, Any] = self._type_fields_dict[parent_type][field_name] return_type = self._find_type_name(field_type_dict) description = self._find_description(parent_type, field_name) - field_node = FieldNode(kind, return_type, field_name, description) + args = [self._make_args_dict(args) for args in self._type_fields_dict[parent_type][field_name]["args"]] + field_node = FieldNode(kind, return_type, field_name, description, args) if kind in {"LIST", "NON_NULL"}: of_kind = self._find_kind(field_type_dict) field_node.set_of_kind(of_kind) @@ -401,6 +388,38 @@ def _make_field_node(self, parent_type: str, field_name: str) -> FieldNode: return field_node + def _find_kind(self, field_dict: dict[str, Any]) -> Any | str: # noqa: ANN401 + if field_dict["name"] is not None: + return field_dict["kind"] + return self._find_kind(field_dict["ofType"]) + + def _find_type_name(self, field_dict: dict[str, Any]) -> Any | str: # noqa: ANN401 + if field_dict: + if field_dict["name"] is not None: + return field_dict["name"] + return self._find_type_name(field_dict["ofType"]) + return "" + + def _find_description(self, type_name: str, field_name: str) -> str: + for type_dict in self.schema["data"]["__schema"]["types"]: + if type_dict["name"] == type_name: + for field in type_dict["fields"]: + if (field["name"] == field_name) and isinstance(field["description"], str): + return field["description"] + return "" + + def _make_args_dict(self, args: dict[str, Any]) -> dict[str, str | None]: + name = args["name"] + ofType = args["type"]["ofType"] + kind = args["type"]["kind"] + # ofKind is only needed for Lists. + # Currently no field args are lists. If that changes, this code may need correcting + ofKind = None + if (ofType) and ("kind" in ofType): + ofKind = args["type"]["ofType"]["kind"] + + return {"name": name, "ofType": ofType, "kind": kind, "ofKind": ofKind} + def _make_root_to_idx(self) -> dict[str, int]: root_to_idx: dict[str, int] = {} # Assumes 0 is the index for root Query node. @@ -410,86 +429,6 @@ def _make_root_to_idx(self) -> dict[str, int]: root_to_idx[root_node.name] = root_node.index return root_to_idx - def get_input_id_dict(self, input_type: str) -> dict[str, str]: - """Get keys input dictionary for given input_type. - - Args: - input_type (str): GraphQL input_type (ex: alignments) - - Raises: - ValueError: _description_ - - Returns: - dict[str, str]: _description_ - """ - if input_type not in self._root_dict: - error_msg = "Not a valid input_type, no available input_id dictionary" - raise ValueError(error_msg) - root_dict_entry = self._root_dict[input_type] - input_dict = {} - for arg in root_dict_entry: - name = arg["name"] - description = arg["description"] - if (len(root_dict_entry) == 1) and root_dict_entry[0]["name"] == "entry_id": - description = "ID" - input_dict[name] = description - return input_dict - - def _idx_dict_to_name_dict(self, idx_fields: list[dict[int, Any] | int] | dict[int, Any] | int) -> dict[str, Any] | list[str] | str: - """Format descendant fields into dictionary that can be easily converted to GraphQL string""" - query_dict = {} - if isinstance(idx_fields, dict): - for field_idx, subfield in idx_fields.items(): - field_name = self._idx_to_name(field_idx) - query_dict[field_name] = self._idx_dict_to_name_dict(subfield) - return query_dict - elif isinstance(idx_fields, list): - return [self._idx_dict_to_name_dict(field) for field in idx_fields] - elif not idx_fields: - return "" - else: - return self._idx_to_name(idx_fields) - - def _idxs_to_idx_dict( - self, - idx_list: list[int], - autopopulated_fields: list[int | dict[int, Any]], - partial_query: dict[Any, Any] | None = None, - ) -> dict[int, Any] | list[dict[int, Any] | int]: - if partial_query is None: - partial_query = {} - # Base case - if len(idx_list) == 0: - assert isinstance(partial_query, dict) # for mypy - return partial_query - # Add autopopulated fields - if len(idx_list) == 1: - if not autopopulated_fields: - return [idx_list[0]] - return {idx_list[0]: autopopulated_fields} - # Create a query with correct nesting - else: - return {idx_list[0]: self._idxs_to_idx_dict(idx_list[1:], autopopulated_fields=autopopulated_fields)} - - def query_dict_to_graphql_string(self, query_dict: dict[str, Any]) -> str: - first_line = next(iter(query_dict["query"])) - print(f"FIRST LINE: {first_line}") - query_body = query_dict["query"][first_line] - formatted_query_body = ( - # format the dict as a GraphQL query - json.dumps(query_body, indent=2) - .replace('"', "") - .replace("'", '"') - .replace("[", "") - .replace("]", "") - .replace(",", " ") - .replace("{", "") - .replace(": ", "{") - ) - formatted_query_body = "\n".join(line for line in formatted_query_body.splitlines() if line.strip()) - query = f"query{{{first_line}{{\n{formatted_query_body}}}}}" - return query - def _get_descendant_fields(self, node_idx: int, visited: None | set[int] = None) -> list[int | dict[int, Any]]: if visited is None: visited = set() @@ -523,142 +462,6 @@ def _get_descendant_fields(self, node_idx: int, visited: None | set[int] = None) # Skips appending if no further subfields (ENUMS) return result - def find_field_names(self, search_string: str) -> list[str]: - """Find field names that fully or partially match the search string. - - Args: - search_string (str): string to search field names for - - Raises: - ValueError: thrown when a type other than string is passed in for search_string - ValueError: thrown when no fields match search_string - - Returns: - list[str]: list of matching field names - """ - if not isinstance(search_string, str): - error_msg = f"Please input a string instead of {type(search_string)}" # type: ignore[unreachable] - raise TypeError(error_msg) - - field_names = [key for key in self._field_to_idx_dict if search_string.lower() in key.lower()] - if not field_names: - error_msg = f"No fields found matching '{search_string}'" - raise ValueError(error_msg) - return field_names - - def construct_query( - self, query_type: str, query_args: dict[str, str] | dict[str, list[Any]], return_data_list: list[str], suppress_autocomplete_warning: bool = False - ) -> dict[str, Any]: - """ - Construct a GraphQL query. Currently only uses rustworkx. - - Args: - query_type (str): root type ("alignments", "annotations") - query_args (dict[str, str] | dict[str, list]): dictionary where keys are argument names and - values are input values - return_data_list (list[str]): list of fields to request data for - suppress_autocomplete_warning (bool, optional): Whether to suppress warning for autocompletion of paths. - Defaults to False. - - Raises: - ValueError: unknown field in the return_data_list - - Returns: - dict: GraphQL query in JSON format - """ - unknown_return_list: list[str] = [] - for field in return_data_list: - if "." in field: - separate_fields = field.split(".") - for sep_field in separate_fields: - if sep_field not in self._field_names_list: - unknown_return_list.append(sep_field) # noqa: PERF401 - elif field not in self._field_names_list: - unknown_return_list.append(field) - if unknown_return_list: - error_msg = f"Unknown item in return_data_list: {unknown_return_list}" - raise ValueError(error_msg) - # if use_networkx: - # query = self._construct_query_networkx( - # input_type=input_type, - # input_ids=input_ids, - # return_data_list=return_data_list, - # suppress_autocomplete_warning=suppress_autocomplete_warning - # ) - # else: - # query = self._construct_query_rustworkx( - # input_type=input_type, - # input_ids=input_ids, - # return_data_list=return_data_list, - # add_rcsb_id=add_rcsb_id, - # suppress_autocomplete_warning=suppress_autocomplete_warning - # ) - query = self._construct_query_rustworkx( - query_type=query_type, query_args=query_args, return_data_list=return_data_list, suppress_autocomplete_warning=suppress_autocomplete_warning - ) - return query # noqa: RET504 - - def _construct_query_rustworkx( - self, - query_type: str, - query_args: dict[str, str] | dict[str, list[Any]], - return_data_list: list[str], - suppress_autocomplete_warning: bool = False - ) -> dict[str, Any]: - # Build first line of query where arguments are given - arg_list = self._root_dict[query_type] - arg_value_list = tuple(self.format_args(arg_dict, query_args[arg_dict["name"]]) for arg_dict in arg_list if arg_dict["name"] in query_args) - query_args_str = f"{query_type}{str(arg_value_list).replace("'", '')}" - - # Build query body - start_idx = self._root_to_idx[query_type] - return_data_path_dict: dict[int, list[int]] = self.return_fields_to_paths(start_idx, query_type, return_data_list) - return_field_query_list = [] - for return_field_idx, path in return_data_path_dict.items(): - return_field_query_dict = self._idxs_to_idx_dict(idx_list=path, autopopulated_fields=self._get_descendant_fields(return_field_idx)) - return_field_query_list.append(return_field_query_dict) - - # TODO: in idxs_to_query, add arguments to keys - idx_query_body = self._merge_query_list(return_field_query_list) - name_query_body = self._idx_dict_to_name_dict(idx_query_body) - query = self.query_dict_to_graphql_string({"query": {query_args_str: name_query_body}}) - return {"query": query} - - def _merge_query( - self, - query_1: dict[int, Any] | list[int], - query_2: dict[int, Any] | list[int] - ) -> list[int | dict[int, Any]] | list[dict[int, Any]] | list[int]: - if isinstance(query_1, dict) and isinstance(query_2, dict): - for key in query_1.keys(): - if (key in query_2): - return [{key: self._merge_query(query_1[key], query_2[key])}] - return [query_1, query_2] - elif isinstance(query_1, list) and isinstance(query_2, dict): - return query_1 + [query_2] - elif isinstance(query_1, dict) and isinstance(query_2, list): - return [query_1] + query_2 - elif isinstance(query_1, list) and isinstance(query_2, list): - return query_1 + query_2 - raise ValueError("Invalid query input") - - def _merge_query_list(self, query_list: list[dict[int, Any] | int]) -> list[dict[int, Any] | int]: - result = [query_list[0]] - for path in query_list[1:]: - for partial_path in result: - result = self._merge_query(partial_path, path) - return result - - def _idx_query_to_name_query(self, idx_query: dict[int, Any] | list[int] | int) -> dict[str, Any] | list[str] | str: - if isinstance(idx_query, dict): - assert len(idx_query) == 1 - for key, value in idx_query.items(): - field_name = self._idx_to_name(key) - return {field_name: self._idx_query_to_name_query(value)} - elif isinstance(idx_query, list): - return [self._idx_query_to_name_query(idx) for idx in idx_query] - return self._idx_to_name(idx_query) - def return_fields_to_paths( self, start_idx: int, @@ -753,7 +556,7 @@ def return_fields_to_paths( return_data_paths[final_idx] = shortest_path return return_data_paths - def format_args(self, arg_dict: dict[str, list[Any]] | dict[str, str], input_value: str | list[str]) -> str: + def _format_args(self, arg_dict: dict[str, list[Any]] | dict[str, str], input_value: str | list[str] | int) -> str: """Add double quotes or omit quotes around a single GraphQL argument. Args: @@ -764,18 +567,18 @@ def format_args(self, arg_dict: dict[str, list[Any]] | dict[str, str], input_val str: returns input value formatted with quotes, no quotes, or as a list """ format_arg = "" - if arg_dict["kind"] == "LIST" or arg_dict["of_kind"] == "LIST": - if arg_dict["of_type"] == "String": + if arg_dict["kind"] == "LIST" or arg_dict["ofKind"] == "LIST": + if arg_dict["ofType"] == "String": # Add double quotes around each item format_arg += f'{arg_dict["name"]}: {str(input_value).replace("'", '"')}' else: # Remove single quotes if not string format_arg += f'{arg_dict["name"]}: {str(input_value).replace("'", "")}' - elif arg_dict["of_type"] == "String": + elif arg_dict["ofType"] == "String": # If arg type is string, add double quotes around value format_arg += f'{arg_dict["name"]}: "{input_value}"' else: - assert isinstance(input_value, str) + assert isinstance(input_value, str) or isinstance(input_value, int) format_arg += f"{arg_dict["name"]}: {input_value}" return format_arg @@ -793,7 +596,7 @@ def _find_idx_path(self, dot_path: list[str], idx_list: list[int], node_idx: int if len(dot_path) == 0: idx_list.append(node_idx) return idx_list - if (self._schema_graph[node_idx].kind == "SCALAR") or (self._schema_graph[node_idx].of_kind == "SCALAR"): + if (getattr(self._schema_graph[node_idx], "kind") == "SCALAR") or (getattr(self._schema_graph[node_idx], "of_kind") == "SCALAR"): return self._find_idx_path(dot_path[1:], idx_list, node_idx) type_node = next(iter(self._schema_graph.successor_indices(node_idx))) field_nodes = self._schema_graph.successor_indices(type_node) @@ -949,7 +752,7 @@ def find_paths(self, input_type: str, return_data_name: str, descriptions: bool dot_paths.append(dot_path) if descriptions: final_field_idx = path[-1] - description = self._schema_graph[final_field_idx].description + description = getattr(self._schema_graph[final_field_idx], "description") if description is None: description = "" description_dict[dot_path] = description.replace("\n", " ") @@ -984,13 +787,13 @@ def check_typing(self, query_type: str, enum_types: "EnumTypes", args: dict[str, error_list = [] arg_dict_list = self._root_dict[query_type] for arg_dict in arg_dict_list: - arg_type = arg_dict["of_type"] + arg_type = arg_dict["ofType"] arg_name = arg_dict["name"] if arg_name not in args: continue - if arg_dict["kind"] == "NON_NULL" and arg_dict["of_kind"] == "ENUM" and args[arg_name] not in enum_types[arg_type].value: + if arg_dict["kind"] == "NON_NULL" and arg_dict["ofKind"] == "ENUM" and args[arg_name] not in enum_types[arg_type].value: error_list.append(f"Invalid value '{args[arg_name]}' for '{arg_name}': valid values are {enum_types[arg_type].value}") # If list. Does not do type-checking for items of list. @@ -1005,7 +808,7 @@ def check_typing(self, query_type: str, enum_types: "EnumTypes", args: dict[str, error_list.append(f"'{arg_name}' must be list of string(s)") # if list of ENUMs - if arg_dict["kind"] == "NON_NULL" and arg_dict["of_kind"] == "LIST": + if arg_dict["kind"] == "NON_NULL" and arg_dict["ofKind"] == "LIST": mismatch_type = [item for item in args[arg_name] if item not in enum_types[arg_type].value] if mismatch_type: error_msg = f"Invalid value(s) {mismatch_type} for '{arg_name}': valid values are {enum_types[arg_type].value}" @@ -1013,3 +816,232 @@ def check_typing(self, query_type: str, enum_types: "EnumTypes", args: dict[str, if error_list: raise ValueError("\n" + " " + "\n ".join(error_list)) + + def construct_query( + self, query_type: str, query_args: dict[str, str] | dict[str, list[Any]], return_data_list: list[str], suppress_autocomplete_warning: bool = False + ) -> dict[str, Any]: + """ + Construct a GraphQL query. Currently only uses rustworkx. + + Args: + query_type (str): root type ("alignments", "annotations") + query_args (dict[str, str] | dict[str, list]): dictionary where keys are argument names and + values are input values + return_data_list (list[str]): list of fields to request data for + suppress_autocomplete_warning (bool, optional): Whether to suppress warning for autocompletion of paths. + Defaults to False. + + Raises: + ValueError: unknown field in the return_data_list + + Returns: + dict: GraphQL query in JSON format + """ + unknown_return_list: list[str] = [] + for field in return_data_list: + if "." in field: + separate_fields = field.split(".") + for sep_field in separate_fields: + if sep_field not in self._field_names_list: + unknown_return_list.append(sep_field) # noqa: PERF401 + elif field not in self._field_names_list: + unknown_return_list.append(field) + if unknown_return_list: + error_msg = f"Unknown item in return_data_list: {unknown_return_list}" + raise ValueError(error_msg) + # if use_networkx: + # query = self._construct_query_networkx( + # input_type=input_type, + # input_ids=input_ids, + # return_data_list=return_data_list, + # suppress_autocomplete_warning=suppress_autocomplete_warning + # ) + # else: + # query = self._construct_query_rustworkx( + # input_type=input_type, + # input_ids=input_ids, + # return_data_list=return_data_list, + # add_rcsb_id=add_rcsb_id, + # suppress_autocomplete_warning=suppress_autocomplete_warning + # ) + query = self._construct_query_rustworkx( + query_type=query_type, query_args=query_args, return_data_list=return_data_list, suppress_autocomplete_warning=suppress_autocomplete_warning + ) + return query # noqa: RET504 + + def _construct_query_rustworkx( + self, + query_type: str, + query_args: dict[str, str] | dict[str, list[Any]], + return_data_list: list[str], + suppress_autocomplete_warning: bool = False + ) -> dict[str, Any]: + # Build first line of query where arguments are given + arg_list = self._root_dict[query_type] + arg_value_list = tuple(self._format_args(arg_dict, query_args[arg_dict["name"]]) for arg_dict in arg_list if arg_dict["name"] in query_args) + first_line = f"{query_type}{str(arg_value_list).replace("'", '')}" + + # Build query body + start_idx = self._root_to_idx[query_type] + return_data_path_dict: dict[int, list[int]] = self.return_fields_to_paths(start_idx, query_type, return_data_list) + return_field_query_list = [] + for return_field_idx, path in return_data_path_dict.items(): + return_field_query_dict = self._idxs_to_idx_dict(idx_list=path, autopopulated_fields=self._get_descendant_fields(return_field_idx)) + return_field_query_list.append(return_field_query_dict) + + idx_query_body = self._merge_query_list(return_field_query_list) + name_query_body = self._idx_dict_to_name_dict(idx_query_body, query_args) + query = self.query_dict_to_graphql_string(first_line, name_query_body) + return {"query": query} + + def _merge_query_list(self, query_list: list[dict[int, Any] | int]) -> list[dict[int, Any] | int]: + result = [query_list[0]] + for path in query_list[1:]: + for partial_path in result: + result = self._merge_query(partial_path, path) + return result + + def _merge_query( + self, + query_1: dict[int, Any] | list[int], + query_2: dict[int, Any] | list[int] + ) -> list[int | dict[int, Any]] | list[dict[int, Any]] | list[int]: + if isinstance(query_1, dict) and isinstance(query_2, dict): + for key in query_1.keys(): + if (key in query_2): + return [{key: self._merge_query(query_1[key], query_2[key])}] + return [query_1, query_2] + elif isinstance(query_1, list) and isinstance(query_2, dict): + return query_1 + [query_2] + elif isinstance(query_1, dict) and isinstance(query_2, list): + return [query_1] + query_2 + elif isinstance(query_1, list) and isinstance(query_2, list): + return query_1 + query_2 + raise ValueError("Invalid query input") + + def _idxs_to_idx_dict( + self, + idx_list: list[int], + autopopulated_fields: list[int | dict[int, Any]], + partial_query: dict[Any, Any] | None = None, + ) -> dict[int, Any] | list[int] | list[dict[int, Any] | int]: + if partial_query is None: + partial_query = {} + # Base case + if len(idx_list) == 0: + assert isinstance(partial_query, dict) # for mypy + return partial_query + # Add autopopulated fields + if len(idx_list) == 1: + if not autopopulated_fields: + return [idx_list[0]] + return {idx_list[0]: autopopulated_fields} + # Create a query with correct nesting + else: + return {idx_list[0]: self._idxs_to_idx_dict(idx_list[1:], autopopulated_fields=autopopulated_fields)} + + def _idx_dict_to_name_dict( + self, + idx_fields: list[dict[int, Any] | int] | dict[int, Any] | int, + query_args: dict[str, Any] + ) -> dict[str, Any] | list[str] | str: + """Turn dictionary of indices to dictionary of field names and add arguments if applicable.""" + query_dict = {} + if isinstance(idx_fields, dict): + for field_idx, subfield in idx_fields.items(): + field_name = self._idx_to_name(field_idx) + args = getattr(self._schema_graph[field_idx], "args") + if args: + field_name = self.add_field_args(field_name, args, query_args) + query_dict[field_name] = self._idx_dict_to_name_dict(subfield, query_args) + return query_dict + elif isinstance(idx_fields, list): + return [self._idx_dict_to_name_dict(field, query_args) for field in idx_fields] + elif not idx_fields: + return "" + else: + return self._idx_to_name(idx_fields) + + def add_field_args(self, field_name: str, args: list[dict[str, Any]], query_args: dict[str, Any]) -> str: + """Add arguments to a field, returning the fieldname and args as a formatted string. + + Args: + args (list[dict[str, Any]]): _description_ + query_args (dict[str, Any]): _description_ + + Returns: + str: _description_ + """ + formatted_args = [] + for arg in args: + arg_name = arg["name"] + if arg_name in query_args: + formatted_args.append(self._format_args(arg, query_args[arg_name])) + if formatted_args: + return f"{field_name}{str(tuple(formatted_args)).replace("'", "")}" + else: + return field_name + + def query_dict_to_graphql_string(self, first_line: str, query_body: dict[str, Any]) -> str: + formatted_query_body = ( + # format the dict as a GraphQL query + # TODO: bit janky, change? + json.dumps(query_body, indent=2, separators=(" ", "~")) + .replace('"', "") + .replace("'", '"') + .replace("[", "") + .replace("]", "") + .replace("{", "") + .replace("~", "{") + ) + formatted_query_body = "\n".join(line for line in formatted_query_body.splitlines() if line.strip()) + query = f"query{{{first_line}{{\n{formatted_query_body}}}}}" + return query + + def get_input_id_dict(self, input_type: str) -> dict[str, str]: + """Get keys input dictionary for given input_type. + + Args: + input_type (str): GraphQL input_type (ex: alignments) + + Raises: + ValueError: _description_ + + Returns: + dict[str, str]: _description_ + """ + if input_type not in self._root_dict: + error_msg = "Not a valid input_type, no available input_id dictionary" + raise ValueError(error_msg) + root_dict_entry = self._root_dict[input_type] + input_dict = {} + for arg in root_dict_entry: + name = arg["name"] + description = arg["description"] + if (len(root_dict_entry) == 1) and root_dict_entry[0]["name"] == "entry_id": + description = "ID" + input_dict[name] = description + return input_dict + + def find_field_names(self, search_string: str) -> list[str]: + """Find field names that fully or partially match the search string. + + Args: + search_string (str): string to search field names for + + Raises: + ValueError: thrown when a type other than string is passed in for search_string + ValueError: thrown when no fields match search_string + + Returns: + list[str]: list of matching field names + """ + if not isinstance(search_string, str): + error_msg = f"Please input a string instead of {type(search_string)}" # type: ignore[unreachable] + raise TypeError(error_msg) + + field_names = [key for key in self._field_to_idx_dict if search_string.lower() in key.lower()] + if not field_names: + error_msg = f"No fields found matching '{search_string}'" + raise ValueError(error_msg) + return field_names diff --git a/tests/test_seq_query.py b/tests/test_seq_query.py index a270db8..9f3ac12 100644 --- a/tests/test_seq_query.py +++ b/tests/test_seq_query.py @@ -74,6 +74,7 @@ def testAlignments(self) -> None: query_obj.exec() except Exception as error: self.fail(f"Failed unexpectedly: {error}") + with self.subTest(msg="2. Alignments query with range"): try: query_obj = Alignments( @@ -87,6 +88,21 @@ def testAlignments(self) -> None: except Exception as error: self.fail(f"Failed unexpectedly: {error}") + with self.subTest(msg="3. Alignments query with target_alignments args"): + try: + query_obj = Alignments( + from_="NCBI_PROTEIN", + to="PDB_ENTITY", + queryId="XP_642496", + range=[1, 10], + return_data_list=["target_alignments"], + first=1, + offset=10 + ) + query_obj.exec() + except Exception as error: + self.fail(f"Failed unexpectedly: {error}") + def testGroupAlignments(self) -> None: with self.subTest(msg="1. group_alignments query without filter"): try: From 083338b3c034d78bf6bd99ab6851066c3ba95dc2 Mon Sep 17 00:00:00 2001 From: Ivana Truong Date: Wed, 18 Dec 2024 17:10:16 -0600 Subject: [PATCH 10/12] Add doc examples to tests, add docstrings/comments, fixing merge query (WIP) --- rcsbapi/data/data_query.py | 1 - rcsbapi/sequence/seq_query.py | 9 ++ rcsbapi/sequence/seq_schema.py | 147 ++++++++++++++++++++++++++++----- tests/test_seq_query.py | 90 ++++++++++++++++++++ 4 files changed, 224 insertions(+), 23 deletions(-) diff --git a/rcsbapi/data/data_query.py b/rcsbapi/data/data_query.py index 4aefda1..164d05f 100644 --- a/rcsbapi/data/data_query.py +++ b/rcsbapi/data/data_query.py @@ -164,7 +164,6 @@ def exec(self) -> Dict[str, Any]: if len(self._input_ids) > batch_size: batched_ids = self._batch_ids(batch_size) response_json: Dict[str, Any] = {} - # count = 0 for id_batch in batched_ids: query = re.sub(r"\[([^]]+)\]", f"{id_batch}".replace("'", '"'), self._query) part_response = requests.post( diff --git a/rcsbapi/sequence/seq_query.py b/rcsbapi/sequence/seq_query.py index 655a604..66ba16a 100644 --- a/rcsbapi/sequence/seq_query.py +++ b/rcsbapi/sequence/seq_query.py @@ -120,6 +120,15 @@ class Alignments(Query): range: Optional[List[int]] = None suppress_autocomplete_warning: bool = False _query: MappingProxyType[str, Any] = MappingProxyType({}) + """ + `offset` and `first` are field arguments (currently the only ones). + Making them class attributes (below) would not work if there + were redundant field arg names. Other options: + 1. Use a string in `return_data_list` and parse later + return_data_list = ["target_alignments(first:0, offset:5)"] + 2. Create an attribute `field_args` and pass in args as a dict + field_args = {"target_alignments": {first:0, offset:5}, ...} + """ offset: Optional[int] = None first: Optional[int] = None diff --git a/rcsbapi/sequence/seq_schema.py b/rcsbapi/sequence/seq_schema.py index 2de4cca..de10818 100644 --- a/rcsbapi/sequence/seq_schema.py +++ b/rcsbapi/sequence/seq_schema.py @@ -821,7 +821,7 @@ def construct_query( self, query_type: str, query_args: dict[str, str] | dict[str, list[Any]], return_data_list: list[str], suppress_autocomplete_warning: bool = False ) -> dict[str, Any]: """ - Construct a GraphQL query. Currently only uses rustworkx. + Construct a GraphQL query - currently only uses rustworkx. Args: query_type (str): root type ("alignments", "annotations") @@ -876,6 +876,19 @@ def _construct_query_rustworkx( return_data_list: list[str], suppress_autocomplete_warning: bool = False ) -> dict[str, Any]: + """Construct a GraphQL query as a dict, if using rustworkx. + + Args: + query_type (str): type of query to make (ex: Alignments, Annotations, etc) + query_args (dict[str, str] | dict[str, list[Any]]): dict of query_type-specific args + return_data_list (list[str]): list of fields to request + suppress_autocomplete_warning (bool, optional): Whether to suppress warning when + autocompletion of paths is used. Defaults to False. + + Returns: + dict[str, Any]: GraphQL query as dict. + Dict is JSON format needed for POST requests (https://sequence-coordinates.rcsb.org/#gql-api) + """ # Build first line of query where arguments are given arg_list = self._root_dict[query_type] arg_value_list = tuple(self._format_args(arg_dict, query_args[arg_dict["name"]]) for arg_dict in arg_list if arg_dict["name"] in query_args) @@ -884,28 +897,54 @@ def _construct_query_rustworkx( # Build query body start_idx = self._root_to_idx[query_type] return_data_path_dict: dict[int, list[int]] = self.return_fields_to_paths(start_idx, query_type, return_data_list) - return_field_query_list = [] + # return_data_query_list is a list of queries, each one corresponding to one field in return_data_list + return_data_query_list = [] for return_field_idx, path in return_data_path_dict.items(): + # Format the paths with the correct nesting of fields. Still using indices at this point return_field_query_dict = self._idxs_to_idx_dict(idx_list=path, autopopulated_fields=self._get_descendant_fields(return_field_idx)) - return_field_query_list.append(return_field_query_dict) + return_data_query_list.append(return_field_query_dict) - idx_query_body = self._merge_query_list(return_field_query_list) + # Merge all the queries in merge_query_list so there are no redundant paths + idx_query_body = self._merge_query_list(return_data_query_list) + # print(f"return_data_query_list: {return_data_query_list}") name_query_body = self._idx_dict_to_name_dict(idx_query_body, query_args) - query = self.query_dict_to_graphql_string(first_line, name_query_body) + query = self._query_dict_to_graphql_string(first_line, name_query_body) return {"query": query} def _merge_query_list(self, query_list: list[dict[int, Any] | int]) -> list[dict[int, Any] | int]: + """Merge a list of query dicts, returning a merged query with unique indices/index dictionaries. + + Args: + query_list (list[dict[int, Any] | int]): list where each item is a query to a field + specified by return_data_list in construct_query + + Returns: + list[dict[int, Any] | int]: List of indices and index dicts representing the merged query + """ + print(f"query_list: {query_list}") result = [query_list[0]] for path in query_list[1:]: - for partial_path in result: - result = self._merge_query(partial_path, path) + result = self._merge_query(result, path) + print(f"FINAL: {result}") return result - def _merge_query( + def _merge_query_2( self, - query_1: dict[int, Any] | list[int], - query_2: dict[int, Any] | list[int] - ) -> list[int | dict[int, Any]] | list[dict[int, Any]] | list[int]: + query_1: dict[int, Any] | list[int | dict[int, Any]] | int, + query_2: dict[int, Any] | list[int | dict[int, Any]] | int + ) -> list[dict[int, Any] | int] | list[dict[int, Any]] | list[int]: + """Merge two queries if possible, else return empty list + + Args: + query_1 (dict[int, Any] | list[int | dict[int, Any]] | int): _description_ + query_2 (dict[int, Any] | list[int | dict[int, Any]] | int): _description_ + + Raises: + ValueError: _description_ + + Returns: + list[dict[int, Any] | int] | list[dict[int, Any]] | list[int]: _description_ + """ if isinstance(query_1, dict) and isinstance(query_2, dict): for key in query_1.keys(): if (key in query_2): @@ -919,12 +958,76 @@ def _merge_query( return query_1 + query_2 raise ValueError("Invalid query input") + def _merge_query( + self, + query_1: dict[int, Any] | list[int | dict[int, Any]] | int, + query_2: dict[int, Any] | list[int | dict[int, Any]] | int + ) -> list[dict[int, Any] | int] | list[dict[int, Any]] | list[int]: + """Merge two queries, removing redundancy. + + Returns: + list[dict[int, Any] | int] | list[dict[int, Any]]: merged query + """ + # Case where both queries are dicts: + # If share keys --> merge values + # If dicts are equal --> return one dict + # Else: return both dicts in list + if (isinstance(query_1, dict) and isinstance(query_2, dict)): + for key in query_1.keys(): + if (key in query_2): + return [{key: self._merge_query(query_1[key], query_2[key])}] + if query_1 == query_2: + return [query_1] + return [query_1, query_2] + + elif isinstance(query_1, (int, dict)) and isinstance(query_2, (int, dict)): + if query_1 == query_2: + return [query_1] + return [query_1, query_2] + + # Cases where one query is a list and one is a dict + # If query is already in the other query, return only more general query + # Else: return list with both queries + elif ( + (isinstance(query_1, list) and isinstance(query_2, dict)) + or (isinstance(query_1, list) and isinstance(query_2, int)) + ): + if query_2 in query_1: + return query_1 + return query_1 + [query_2] + + elif ( + (isinstance(query_1, dict) and isinstance(query_2, list)) + or (isinstance(query_1, int) and isinstance(query_2, list)) + ): + if query_1 in query_2: + return query_2 + return [query_1] + query_2 + + # Case where both queries are lists + # Merge lists, checking if items are unique + elif isinstance(query_1, list) and isinstance(query_2, list): + unique_query_1 = [path for path in query_1 if path not in query_2] + return unique_query_1 + query_2 + + raise ValueError("Invalid query input") + def _idxs_to_idx_dict( self, idx_list: list[int], autopopulated_fields: list[int | dict[int, Any]], partial_query: dict[Any, Any] | None = None, - ) -> dict[int, Any] | list[int] | list[dict[int, Any] | int]: + ) -> dict[int, Any] | list[int] | list[dict[int, Any] | int]: + """Construct a query with correct nesting of dicts/lists + + Args: + idx_list (list[int]): list of indices to a return_date_list field + autopopulated_fields (list[int | dict[int, Any]]): fields underneath return_data_list field (can be empty) + partial_query (dict[Any, Any] | None, optional): the query as it gets constructed by recursion. Defaults to None. + + Returns: + dict[int, Any] | list[int] | list[dict[int, Any] | int]: query dict/list with nesting + """ if partial_query is None: partial_query = {} # Base case @@ -936,7 +1039,7 @@ def _idxs_to_idx_dict( if not autopopulated_fields: return [idx_list[0]] return {idx_list[0]: autopopulated_fields} - # Create a query with correct nesting + # Add level of nesting else: return {idx_list[0]: self._idxs_to_idx_dict(idx_list[1:], autopopulated_fields=autopopulated_fields)} @@ -945,7 +1048,7 @@ def _idx_dict_to_name_dict( idx_fields: list[dict[int, Any] | int] | dict[int, Any] | int, query_args: dict[str, Any] ) -> dict[str, Any] | list[str] | str: - """Turn dictionary of indices to dictionary of field names and add arguments if applicable.""" + """Convert dictionary of indices to dictionary of field names and add arguments if applicable.""" query_dict = {} if isinstance(idx_fields, dict): for field_idx, subfield in idx_fields.items(): @@ -966,23 +1069,26 @@ def add_field_args(self, field_name: str, args: list[dict[str, Any]], query_args """Add arguments to a field, returning the fieldname and args as a formatted string. Args: - args (list[dict[str, Any]]): _description_ - query_args (dict[str, Any]): _description_ + args (list[dict[str, Any]]): args of a field, retrieved from the GraphQL schema/FieldNode object + query_args (dict[str, Any]): dictionary where keys are argument name and values are user input Returns: - str: _description_ + str: field name or field name with corresponding arguments """ + # Check FieldNode argument names and see if user has passed in corresponding values formatted_args = [] for arg in args: arg_name = arg["name"] if arg_name in query_args: formatted_args.append(self._format_args(arg, query_args[arg_name])) + if formatted_args: return f"{field_name}{str(tuple(formatted_args)).replace("'", "")}" else: return field_name - def query_dict_to_graphql_string(self, first_line: str, query_body: dict[str, Any]) -> str: + def _query_dict_to_graphql_string(self, first_line: str, query_body: dict[str, Any]) -> str: + """Turn query dictionary into a string in GraphQL syntax""" formatted_query_body = ( # format the dict as a GraphQL query # TODO: bit janky, change? @@ -1004,11 +1110,8 @@ def get_input_id_dict(self, input_type: str) -> dict[str, str]: Args: input_type (str): GraphQL input_type (ex: alignments) - Raises: - ValueError: _description_ - Returns: - dict[str, str]: _description_ + dict[str, str]: dictionary where keys are argument names and values are descriptions """ if input_type not in self._root_dict: error_msg = "Not a valid input_type, no available input_id dictionary" diff --git a/tests/test_seq_query.py b/tests/test_seq_query.py index 9f3ac12..e48557e 100644 --- a/tests/test_seq_query.py +++ b/tests/test_seq_query.py @@ -190,6 +190,95 @@ def testGroupAnnotationsSummary(self) -> None: except Exception as error: self.fail(f"Failed unexpectedly: {error}") + def testDocExamples(self) -> None: + # These examples come from: + # https://sequence-coordinates.rcsb.org/#examples + + with self.subTest(msg="1. UniProt - PDB Entity alignment"): + try: + query_obj = Alignments( + from_="UNIPROT", + to="PDB_ENTITY", + queryId="P01112", + # TODO: This errors because "target_alignments" contains "aligned_regions" fields. + # Can I fix it so that both set of fields remain in the query without repeat? + return_data_list=["query_sequence", "target_alignments", "aligned_regions"] + ) + query_obj.exec() + except Exception as error: + self.fail(f"Failed unexpectedly: {error}") + + with self.subTest(msg="2. Computed Structure Model - NCBI protein alignment"): + try: + query_obj = Alignments( + from_="PDB_ENTITY", + to="NCBI_PROTEIN", + queryId="AF_AFP68871F1_1", + return_data_list=["query_sequence", "target_alignments", "aligned_regions"] + ) + query_obj.exec() + except Exception as error: + self.fail(f"Failed unexpectedly: {error}") + + with self.subTest(msg="3. Mapping UniProt annotations to a PDB Instance"): + try: + query_obj = Annotations( # type: ignore + reference="PDB_INSTANCE", + sources=["UNIPROT"], + queryId="2UZI.C", + return_data_list=["target_id", "features"] + ) + query_obj.exec() + except Exception as error: + self.fail(f"Failed unexpectedly: {error}") + + with self.subTest(msg="4. Human Chromosome 1 - PDB Entity alignment"): + try: + query_obj = Alignments( + from_="NCBI_GENOME", + to="PDB_ENTITY", + queryId="NC_000001", + return_data_list=[ + "target_alignments.target_id", + "target_alignments.orientation", + "target_alignments.aligned_regions" + ] + ) + query_obj.exec() + except Exception as error: + self.fail(f"Failed unexpectedly: {error}") + + with self.subTest(msg="5. Mapping PDB Instance ligands binding sites to Human Chromosome 1"): + try: + query_obj = Annotations( # type: ignore + reference="NCBI_GENOME", + sources=["PDB_INSTANCE"], + queryId="NC_000001", + filters=[ + AnnotationFilterInput( + field="TYPE", + operation="EQUALS", + values=["BINDING_SITE"], + ) + ], + return_data_list=["target_id", "features"] + ) + query_obj.exec() + except Exception as error: + self.fail(f"Failed unexpectedly: {error}") + + with self.subTest(msg="6. Mapping a PDB Instance to NCBI RefSeq proteins"): + try: + query_obj = Alignments( + from_="PDB_INSTANCE", + to="NCBI_PROTEIN", + queryId="4Z36.A", + return_data_list=["query_sequence", "target_alignments"] + ) + query_obj.exec() + except Exception as error: + self.fail(f"Failed unexpectedly: {error}") + def buildQuery() -> unittest.TestSuite: suiteSelect = unittest.TestSuite() @@ -198,6 +287,7 @@ def buildQuery() -> unittest.TestSuite: suiteSelect.addTest(SeqTests("testGroupAlignments")) suiteSelect.addTest(SeqTests("testGroupAnnotations")) suiteSelect.addTest(SeqTests("testGroupAnnotationsSummary")) + suiteSelect.addTest(SeqTests("testDocExamples")) return suiteSelect From 44e9a5c73135aff8b8411648348146b52b0c7237 Mon Sep 17 00:00:00 2001 From: Ivana Truong Date: Thu, 19 Dec 2024 16:36:57 -0600 Subject: [PATCH 11/12] merge_query fix --- rcsbapi/sequence/seq_schema.py | 125 ++++++++++++++++----------------- 1 file changed, 62 insertions(+), 63 deletions(-) diff --git a/rcsbapi/sequence/seq_schema.py b/rcsbapi/sequence/seq_schema.py index de10818..c1f629e 100644 --- a/rcsbapi/sequence/seq_schema.py +++ b/rcsbapi/sequence/seq_schema.py @@ -906,12 +906,11 @@ def _construct_query_rustworkx( # Merge all the queries in merge_query_list so there are no redundant paths idx_query_body = self._merge_query_list(return_data_query_list) - # print(f"return_data_query_list: {return_data_query_list}") name_query_body = self._idx_dict_to_name_dict(idx_query_body, query_args) query = self._query_dict_to_graphql_string(first_line, name_query_body) return {"query": query} - def _merge_query_list(self, query_list: list[dict[int, Any] | int]) -> list[dict[int, Any] | int]: + def _merge_query_list(self, query_list: list[dict[int, Any] | list[int]]) -> list[dict[int, Any] | int]: """Merge a list of query dicts, returning a merged query with unique indices/index dictionaries. Args: @@ -921,96 +920,96 @@ def _merge_query_list(self, query_list: list[dict[int, Any] | int]) -> list[dict Returns: list[dict[int, Any] | int]: List of indices and index dicts representing the merged query """ - print(f"query_list: {query_list}") + # print(f"query_list: {query_list}") + if isinstance(query_list[0], list): + result = query_list[0] result = [query_list[0]] + for path in query_list[1:]: - result = self._merge_query(result, path) - print(f"FINAL: {result}") + for i, result_path in enumerate(result): + merged_query = self._merge_query(result_path, path) + if merged_query: + result.pop(i) + result.extend(merged_query) + break + if not merged_query and isinstance(path, dict): + result.append(path) + # print(f"result: {result}") + + # print(f"FINAL: {result}") return result - def _merge_query_2( + def _merge_query( self, - query_1: dict[int, Any] | list[int | dict[int, Any]] | int, - query_2: dict[int, Any] | list[int | dict[int, Any]] | int + query_1: dict[int, Any] | list[int | dict[int, Any]] | list[int] | int, + query_2: dict[int, Any] | list[int | dict[int, Any]] | list[int] | int ) -> list[dict[int, Any] | int] | list[dict[int, Any]] | list[int]: """Merge two queries if possible, else return empty list - Args: - query_1 (dict[int, Any] | list[int | dict[int, Any]] | int): _description_ - query_2 (dict[int, Any] | list[int | dict[int, Any]] | int): _description_ - - Raises: - ValueError: _description_ - Returns: list[dict[int, Any] | int] | list[dict[int, Any]] | list[int]: _description_ """ - if isinstance(query_1, dict) and isinstance(query_2, dict): - for key in query_1.keys(): - if (key in query_2): - return [{key: self._merge_query(query_1[key], query_2[key])}] - return [query_1, query_2] - elif isinstance(query_1, list) and isinstance(query_2, dict): - return query_1 + [query_2] - elif isinstance(query_1, dict) and isinstance(query_2, list): - return [query_1] + query_2 - elif isinstance(query_1, list) and isinstance(query_2, list): - return query_1 + query_2 - raise ValueError("Invalid query input") - - def _merge_query( - self, - query_1: dict[int, Any] | list[int | dict[int, Any]] | int, - query_2: dict[int, Any] | list[int | dict[int, Any]] | int - ) -> list[dict[int, Any] | int] | list[dict[int, Any]] | list[int]: - """Merge two queries, removing redundancy. - - Returns: - list[dict[int, Any] | int] | list[dict[int, Any]]: merged query - """ # Case where both queries are dicts: # If share keys --> merge values # If dicts are equal --> return one dict # Else: return both dicts in list - if (isinstance(query_1, dict) and isinstance(query_2, dict)): + if isinstance(query_1, dict) and isinstance(query_2, dict): for key in query_1.keys(): if (key in query_2): - return [{key: self._merge_query(query_1[key], query_2[key])}] - if query_1 == query_2: - return [query_1] - return [query_1, query_2] - - elif isinstance(query_1, (int, dict)) and isinstance(query_2, (int, dict)): - if query_1 == query_2: - return [query_1] - return [query_1, query_2] - + return [{key: self._merge_dicts(query_1[key], query_2[key])}] # Cases where one query is a list and one is a dict # If query is already in the other query, return only more general query # Else: return list with both queries - elif ( - (isinstance(query_1, list) and isinstance(query_2, dict)) - or (isinstance(query_1, list) and isinstance(query_2, int)) - ): + elif isinstance(query_1, list) and isinstance(query_2, dict): if query_2 in query_1: return query_1 - return query_1 + [query_2] - - elif ( - (isinstance(query_1, dict) and isinstance(query_2, list)) - or (isinstance(query_1, int) and isinstance(query_2, list)) - ): + elif isinstance(query_1, dict) and isinstance(query_2, list): if query_1 in query_2: return query_2 - return [query_1] + query_2 - # Case where both queries are lists # Merge lists, checking if items are unique elif isinstance(query_1, list) and isinstance(query_2, list): unique_query_1 = [path for path in query_1 if path not in query_2] - return unique_query_1 + query_2 + if unique_query_1 != query_1: + return unique_query_1 + query_2 + return [] + + def _merge_dicts( + self, + dict_1: dict[int, Any] | list[int | dict[int, Any]] | int, + dict_2: dict[int, Any] | list[int | dict[int, Any]] | int, + ) -> list[dict[int, Any] | int] | list[dict[int, Any]] | list[int]: + """Merge two dictionaries without overwriting values. + #TODO: This is very similar to _merge_query in logic, but I had to make it separate to recursively + merge dictionaries (above func will return empty list if no merge can be made) Can this be cleaned up? + + Args: + dict_1 (dict[int, Any] | list[int | dict[int, Any]] | int): _description_ + dict_2 (dict[int, Any] | list[int | dict[int, Any]] | int): _description_ + + Raises: + ValueError: _description_ - raise ValueError("Invalid query input") + Returns: + list[dict[int, Any] | int] | list[dict[int, Any]] | list[int]: _description_ + """ + if isinstance(dict_1, dict) and isinstance(dict_2, dict): + for key in dict_1.keys(): + if (key in dict_2): + return [{key: self._merge_dicts(dict_1[key], dict_2[key])}] + return [dict_1, dict_2] + elif isinstance(dict_1, list) and isinstance(dict_2, dict): + if dict_2 in dict_1: + return dict_1 + return dict_1 + [dict_2] + elif isinstance(dict_1, dict) and isinstance(dict_2, list): + if dict_1 in dict_2: + return dict_2 + return [dict_1] + dict_2 + elif isinstance(dict_1, list) and isinstance(dict_2, list): + unique_query_1 = [path for path in dict_1 if path not in dict_2] + return unique_query_1 + dict_2 + raise ValueError("Invalid dictionary input") def _idxs_to_idx_dict( self, From 3c9144866292c13b07a0a64c68be468aaf0d1d9d Mon Sep 17 00:00:00 2001 From: Ivana Truong Date: Fri, 20 Dec 2024 11:43:36 -0600 Subject: [PATCH 12/12] delete to simplify merging --- rcsbapi/sequence/seq_schema.py | 72 +++++++++++++--------------------- 1 file changed, 28 insertions(+), 44 deletions(-) diff --git a/rcsbapi/sequence/seq_schema.py b/rcsbapi/sequence/seq_schema.py index c1f629e..c2a5fd9 100644 --- a/rcsbapi/sequence/seq_schema.py +++ b/rcsbapi/sequence/seq_schema.py @@ -910,7 +910,7 @@ def _construct_query_rustworkx( query = self._query_dict_to_graphql_string(first_line, name_query_body) return {"query": query} - def _merge_query_list(self, query_list: list[dict[int, Any] | list[int]]) -> list[dict[int, Any] | int]: + def _merge_query_list(self, query_list: list[dict[int, Any] | list[int]]) -> list[dict[int, Any] | list[int]]: """Merge a list of query dicts, returning a merged query with unique indices/index dictionaries. Args: @@ -922,66 +922,39 @@ def _merge_query_list(self, query_list: list[dict[int, Any] | list[int]]) -> lis """ # print(f"query_list: {query_list}") if isinstance(query_list[0], list): - result = query_list[0] + result: list[dict[int, Any] | list[int]] | list[int] = query_list[0] result = [query_list[0]] for path in query_list[1:]: for i, result_path in enumerate(result): - merged_query = self._merge_query(result_path, path) - if merged_query: + merged_query = self._merge_queries(result_path, path) + # TODO: Rather than checking if merged query and having to use two separate functions. + # Maybe check if length of merged_query is == len(result_path) + len(path) (with some additional logic obviously) + path_len = 1 + if isinstance(path, list): + path_len = len(path) + result_path_len = 1 + if isinstance(result_path, list): + result_path_len = len(result_path) + + if len(merged_query) < (path_len + result_path_len): result.pop(i) result.extend(merged_query) + # print(f"merging: {path} and {result_path}") break - if not merged_query and isinstance(path, dict): + if len(merged_query) == (path_len + result_path_len): result.append(path) # print(f"result: {result}") # print(f"FINAL: {result}") return result - def _merge_query( - self, - query_1: dict[int, Any] | list[int | dict[int, Any]] | list[int] | int, - query_2: dict[int, Any] | list[int | dict[int, Any]] | list[int] | int - ) -> list[dict[int, Any] | int] | list[dict[int, Any]] | list[int]: - """Merge two queries if possible, else return empty list - - Returns: - list[dict[int, Any] | int] | list[dict[int, Any]] | list[int]: _description_ - """ - # Case where both queries are dicts: - # If share keys --> merge values - # If dicts are equal --> return one dict - # Else: return both dicts in list - if isinstance(query_1, dict) and isinstance(query_2, dict): - for key in query_1.keys(): - if (key in query_2): - return [{key: self._merge_dicts(query_1[key], query_2[key])}] - # Cases where one query is a list and one is a dict - # If query is already in the other query, return only more general query - # Else: return list with both queries - elif isinstance(query_1, list) and isinstance(query_2, dict): - if query_2 in query_1: - return query_1 - elif isinstance(query_1, dict) and isinstance(query_2, list): - if query_1 in query_2: - return query_2 - # Case where both queries are lists - # Merge lists, checking if items are unique - elif isinstance(query_1, list) and isinstance(query_2, list): - unique_query_1 = [path for path in query_1 if path not in query_2] - if unique_query_1 != query_1: - return unique_query_1 + query_2 - return [] - - def _merge_dicts( + def _merge_queries( self, dict_1: dict[int, Any] | list[int | dict[int, Any]] | int, dict_2: dict[int, Any] | list[int | dict[int, Any]] | int, ) -> list[dict[int, Any] | int] | list[dict[int, Any]] | list[int]: """Merge two dictionaries without overwriting values. - #TODO: This is very similar to _merge_query in logic, but I had to make it separate to recursively - merge dictionaries (above func will return empty list if no merge can be made) Can this be cleaned up? Args: dict_1 (dict[int, Any] | list[int | dict[int, Any]] | int): _description_ @@ -993,11 +966,19 @@ def _merge_dicts( Returns: list[dict[int, Any] | int] | list[dict[int, Any]] | list[int]: _description_ """ + # Case where both queries are dicts: + # If share keys --> merge values + # If dicts are equal --> return one dict + # Else: return both dicts in list if isinstance(dict_1, dict) and isinstance(dict_2, dict): for key in dict_1.keys(): if (key in dict_2): - return [{key: self._merge_dicts(dict_1[key], dict_2[key])}] + return [{key: self._merge_queries(dict_1[key], dict_2[key])}] return [dict_1, dict_2] + + # Cases where one query is a list and one is a dict + # If query is already in the other query, return only more general query + # Else: return list with both queries elif isinstance(dict_1, list) and isinstance(dict_2, dict): if dict_2 in dict_1: return dict_1 @@ -1006,6 +987,9 @@ def _merge_dicts( if dict_1 in dict_2: return dict_2 return [dict_1] + dict_2 + + # Case where both queries are lists + # Merge lists, checking if items are unique elif isinstance(dict_1, list) and isinstance(dict_2, list): unique_query_1 = [path for path in dict_1 if path not in dict_2] return unique_query_1 + dict_2