diff --git a/viewer/target_loader.py b/viewer/target_loader.py index 67fca327..89b5e791 100644 --- a/viewer/target_loader.py +++ b/viewer/target_loader.py @@ -1,12 +1,16 @@ +import functools import hashlib import logging +import math import os import tarfile +import traceback import uuid -from dataclasses import dataclass +from dataclasses import dataclass, field +from enum import Enum from pathlib import Path from tempfile import TemporaryDirectory -from typing import Any +from typing import Any, Iterable, Optional, TypeVar import yaml from celery import Task @@ -14,6 +18,8 @@ from django.contrib.auth import get_user_model from django.core.exceptions import MultipleObjectsReturned from django.db import IntegrityError, transaction +from django.db.models import Model +from django.db.models.base import ModelBase from django.utils import timezone from fragalysis.settings import TARGET_LOADER_MEDIA_DIRECTORY @@ -35,36 +41,286 @@ XtalformSite, ) -# from hypothesis.definitions import VectTypes +logger = logging.getLogger(__name__) -# from hypothesis.models import Vector3D -# from hypothesis.models import Vector +# data that goes to tables are in the following files +# assemblies and xtalforms +XTALFORMS_FILE = "crystalforms.yaml" -# from frag.network.decorate import get_3d_vects_for_mol +# target name, nothing else +CONFIG_FILE = "config*.yaml" +# everything else +METADATA_FILE = "meta_aligner.yaml" -logger = logging.getLogger(__name__) -# data that goes to tables are in the following files -METADATA_FILE = "meta_aligner.yaml" -ASSEMBLIES_FILE = "assemblies.yaml" -XTALFORMS_FILE = "xtalforms.yaml" -ASSIGNED_XTALFORMS_FILE = "assigned_xtalforms.yaml" -CONFIG_FILE = "config*.yaml" +# type hint for Djanog model instance +ModelInstance = TypeVar("ModelInstance", bound=Model) + + +class UploadState(str, Enum): + """Target loader progress state. + + PROCESSING - all good, upload in progress + REPORTING - upload failed, loader in reporting mode for diagnostics + SUCCESS - processing complete, all good + FAILED - processing complete, failed + """ + + PROCESSING = "PROCESSING" + REPORTING = "REPORTING" + SUCCESS = "SUCCESS" + FAILED = "FAILED" + + +class Level(str, Enum): + INFO = "INFO" + WARNING = "WARNING" + FATAL = "FATAL" + + +@dataclass +class MetadataObject: + """Data structure to store freshly created model instances. + + data blocks from from meta_aligner.yaml are processed into + dictionaries: { some_id: MetadataObjects, ...} + + Reason being, quite often I need to refer to these by some + alternative ID. With the dataclass, I'm able to create temporary + dicts with key that are needed. + """ + + instance: ModelInstance + index_data: dict = field(default_factory=dict) + + +# type hint for wrapped yaml block processors +MetDict = TypeVar("MetDict", bound=dict[int | str, MetadataObject]) + + +@dataclass +class ProcessedObject: + """Data structure for creating model instances. + + Returned from methods that process yaml blocks to dictionaries + that can be sent to django's model's get_or_create methods. + """ + + model_class: ModelBase + fields: dict + key: str + defaults: dict = field(default_factory=dict) + index_data: dict = field(default_factory=dict) + identifier: Optional[str] = "" -# data blocks from from meta_aligner.yaml are processed into dictionaries: -# { some_id: MetadataObjects, ...} -# reason being, quite often I need to refer to these by -# some alternative id. with the dataclass, I'm able -# to create temporary dicts with key that's needed atm. -# 'new' as a flag for newly created objects (as opposed to fetched -# from the database) @dataclass -class MetadataObjects: - instance: Any - data: Any = None - new: bool = True +class UploadReportEntry: + message: str + level: Level | None = None + + def __str__(self): + return ": ".join([k for k in (self.level, self.message) if k]) + + +@dataclass +class UploadReport: + task: Task | None + stack: list[UploadReportEntry] = field(default_factory=list) + upload_state: UploadState = UploadState.PROCESSING + failed: bool = False + + def __post_init__(self) -> None: + self.task_id = f"task {self.task.request.id}: " if self.task else "" + + def log(self, level: Level, message: str) -> None: + msg = f"{self.task_id}{message}" + if level == Level.FATAL: + self.failed = True + self.upload_state = UploadState.REPORTING + logger.error(msg) + elif level == Level.WARNING: + logger.warning(msg) + else: + # must be info + logger.info(msg) + + self.stack.append(UploadReportEntry(level=level, message=message)) + self._update_task(message) + + def final(self, archive_name): + if self.upload_state == UploadState.PROCESSING: + self.upload_state = UploadState.SUCCESS + message = f"{archive_name} uploaded successfully." + else: + self.upload_state = UploadState.FAILED + message = f"Uploading {archive_name} failed." + + self.stack.append(UploadReportEntry(message=message)) + self._update_task(self.json()) + + def json(self): + return [str(k) for k in self.stack] + + def _update_task(self, message: str | list): + try: + self.task.update_state( + state=self.upload_state, + meta={ + "description": message, + }, + ) + except AttributeError: + # no task passed to method, nothing to do + pass + + +def _flatten_dict_gen(d: dict, parent_key: tuple | str | int, depth: int): + for k, v in d.items(): + if parent_key: + if isinstance(parent_key, tuple): + new_key = (*parent_key, k) + else: + new_key = (parent_key, k) + else: + new_key = k + + try: + deep_enough = any([isinstance(x, dict) for x in v.values()]) + except AttributeError: + continue + + if deep_enough and depth > 1: + yield from flatten_dict(v, new_key, depth - 1) + else: + if isinstance(new_key, str): + yield new_key, v + else: + yield *new_key, v + + +def flatten_dict(d: dict, parent_key: tuple | int | str = "", depth: int = 1): + """Flatten nested dict to specified depth.""" + return _flatten_dict_gen(d, parent_key, depth) + + +def set_directory_permissions(path, permissions): + for root, dirs, files in os.walk(path): + # Set permissions for directories + for directory in dirs: + dir_path = os.path.join(root, directory) + os.chmod(dir_path, permissions) + + # Set permissions for files + for file in files: + file_path = os.path.join(root, file) + os.chmod(file_path, permissions) + + +# borrowed from SO +def calculate_sha256(filepath): + sha256_hash = hashlib.sha256() + with open(filepath, "rb") as f: + # Read the file in chunks of 4096 bytes + for chunk in iter(lambda: f.read(4096), b""): + sha256_hash.update(chunk) + return sha256_hash.hexdigest() + + +def create_objects(func=None, *, depth=math.inf): + """Wrapper function for saving database objects. + + Handles common part of saving model instances, actual saving, + logging, reporting and error handling. + + Inner functions are yaml data processing functions that return + the model class and the data to pass to model's get_or_create + function. + + """ + if func is None: + return functools.partial(create_objects, depth=depth) + + @functools.wraps(func) + def wrapper_create_objects( + self, *args, yaml_data: dict, **kwargs + ) -> dict[int | str, MetadataObject]: + logger.debug("+ wrapper_service_query") + # logger.debug("args passed: %s", args) + logger.debug("kwargs passed: %s", kwargs) + + flattened_data = flatten_dict(yaml_data, depth=depth) + result = {} + created, existing, failed = 0, 0, 0 + for item in flattened_data: + logger.debug("flattened data item: %s", item) + instance_data = func(self, *args, item_data=item, **kwargs) + logger.debug("Instance data returned: %s", instance_data) + if not instance_data: + continue + + obj = None + try: + obj, new = instance_data.model_class.filter_manager.by_target( + self.target + ).get_or_create( + **instance_data.fields, + defaults=instance_data.defaults, + ) + logger.debug( + "%s object %s created", + instance_data.model_class._meta.object_name, # pylint: disable=protected-access + obj, + ) + if new: + created = created + 1 + else: + existing = existing + 1 + except MultipleObjectsReturned: + msg = "{}.get_or_create returned multiple objects for {}".format( + instance_data.model_class._meta.object_name, # pylint: disable=protected-access + instance_data.fields, + ) + self.report.log(Level.FATAL, msg) + failed = failed + 1 + except IntegrityError: + msg = "{} object {} failed to save".format( + instance_data.model_class._meta.object_name, # pylint: disable=protected-access + instance_data.key, + ) + self.report.log(Level.FATAL, msg) + failed = failed + 1 + + if not obj: + # create fake object so I can just push the upload + # through and compile report for user feedback + obj = instance_data.model_class( + **instance_data.fields | instance_data.defaults + ) + logger.warning( + "Fake %s object created: %s", + instance_data.model_class._meta.object_name, # pylint: disable=protected-access + obj, + ) + + m = MetadataObject(instance=obj, index_data=instance_data.index_data) + # index data here probs + result[instance_data.key] = m + + msg = "{} {} objects processed, {} created, {} fetched from database".format( + created + existing + failed, + next( # pylint: disable=protected-access + iter(result.values()) + ).instance._meta.model._meta.object_name, # pylint: disable=protected-access + created, + existing, + ) # pylint: disable=protected-access + self.report.log(Level.INFO, msg) + + return result + + return wrapper_create_objects class TargetLoader: @@ -74,7 +330,7 @@ def __init__( proposal_ref: str, tempdir: str, user_id=None, - task=None, + task: Task | None = None, ): self.data_bundle = Path(data_bundle).name self.bundle_name = Path(data_bundle).stem @@ -88,6 +344,8 @@ def __init__( self.previous_version_dirs = None self.user_id = user_id + self.report = UploadReport(task=task) + self.raw_data.mkdir() # create exp upload object @@ -125,7 +383,7 @@ def __init__( # to be used in logging messages, if no task, means invoked # directly, likely from management command - self.task_id = f"task {task.request.id}: " if task else "" + # self.task_id = f"task {task.request.id}: " if task else "" # these will be filled later self.target_name = None @@ -133,10 +391,6 @@ def __init__( self.target = None self.project = None - @property - def target_root(self) -> Path: - return self._target_root - @property def final_path(self) -> Path: return self._final_path @@ -145,99 +399,238 @@ def final_path(self) -> Path: def abs_final_path(self) -> Path: return self._abs_final_path - def _load_yaml(self, yaml_file: Path) -> dict: - try: - with open(yaml_file, "r", encoding="utf-8") as file: - contents = yaml.safe_load(file) - except FileNotFoundError as exc: - msg = f"{yaml_file.stem} file not found in data archive!" - logger.error("%s%s", self.task_id, msg) - raise FileNotFoundError(msg) from exc + def validate_files( + self, + obj_identifier: str, + file_struct: dict, + required: Iterable[str] = (), + recommended: Iterable[str] = (), + ) -> list[str | None]: + """Check if file exists and if sha256 hash matches (if given). - return contents + file struct can come in 2 configurations: + {file_key: {file: , sha265: [smiles: ]}, ...} + or simply + {file_key: } + Detect which one and take appropriate action. - def _get_meta_blocks(self, yaml_file, blocks) -> list: - validation_errors = [] - error_text = "'{}' section missing in input file" - result = [] - for block in blocks: + Once the filename is extracted, check if it exists and if + sha256 hash is given, calculate the hash and compare to the + one in file. + + params: + - file_struct: dictionary read from yaml file + - required: mandatory filename keys + - recommended: optional filename keys + - protein_name: experiment_identifier (used for logging) + + return: + - list of all file paths required + + Checks for 4 possible errors: + - file is expected by the db schema but not referenced in METADATA_FILE + - file is referenced METADATA_FILE but not present in uploaded archive + - calculated hash doesn't match with the one in METADATA_FILE + - dictionary in unexpected format, unable to extract filename + + """ + + def logfunc(key, message): + if key in required: + self.report.log(Level.FATAL, message) + else: + self.report.log(Level.WARNING, message) + + result = {} + for key, value in file_struct.items(): + if key not in required and key not in recommended: + # schema isn't looking for this file, ignore + continue + + filename, file_hash = None, None + + # sort out the filename + if isinstance(value, dict): + file_hash = value.get("sha256", None) + try: + filename = value["file"] + except KeyError: + # this is rather unexpected, haven't seen it yet + logfunc( + key, + "{}: malformed dict, key 'file' missing".format(obj_identifier), + ) + + # unable to extract file from dict, no point to + # continue with hash checking + continue + + elif isinstance(value, str): + filename = value + else: + # this is probably the list of panddas event files, don't + # need them here + # although.. should i validate them here nevertheless? + # i'd have to do this on copy otherwise.. + continue + + # file key should go to result dict no matter what + result[key] = filename + logger.debug("Adding key %s: %s", key, filename) + + # filename resolved, check if exists and if given, hash + file_path = self.raw_data.joinpath(filename) + if file_path.is_file(): + if file_hash and file_hash != calculate_sha256(file_path): + logfunc(key, "Invalid hash for file {}".format(filename)) + else: + logfunc( + key, + "{} referenced in {}: {} but not found in archive".format( + key, + METADATA_FILE, + obj_identifier, + ), + ) + + files = [] + for f in list(required) + list(recommended): try: - result.append(yaml_file[block]) + files.append(result[f]) except KeyError: - msg = error_text.format(block) - validation_errors.append(msg) - update_task(self.task, "ERROR", msg) - logger.error("%s%s", self.task_id, msg) + logfunc( + f, + "{}: file {} expected but not found in {} file".format( + obj_identifier, + f, + METADATA_FILE, + ), + ) + files.append(None) + + logger.debug("Returning files: %s", files) + + return files + + @create_objects(depth=1) + def process_experiment( + self, item_data: tuple[str, dict] | None = None, **kwargs + ) -> ProcessedObject | None: + """Extract data from yaml block for creating Experiment instance. + + Incoming data format (relevant bits): + ( + protein_name: , + { + 'type': 'manual', + 'crystallographic_files': { + 'xtal_pdb': { + 'file': 'upload_1/crystallographic_files/5rgs/5rgs.pdb', + 'sha256': sha , + }, + 'xtal_mtz': { + 'file': 'upload_1/crystallographic_files/5rgs/5rgs.mtz', + 'sha256': sha , + } + }, + 'status': 'new', + } + ) - # wonder if it's worth throwing a custom exception here.. easier - # to scan the logs. then again, if errors are passed to user, - # inspecting logs isn't really necessary? - if validation_errors: - raise KeyError(validation_errors) + This is enough to save full instance + """ + del kwargs + logger.debug("incoming data: %s", item_data) + experiment_name, data = item_data - return result + if "aligned_files" not in data.keys(): + return None - def _process_experiment(self, existing_objects=None, protein_name=None, data=None): - """Create Experiment model instance from data. + extract = functools.partial( + self._extract, + data=data, + section_name="crystals", + item_name=experiment_name, + ) - Incoming data format: - type: - last_updated: - crystallographic_files: - xtal_pdb: {file: , sha256: } - xtal_mtz: {file: , sha256: } - ligand_cif: {file: , sha256: , smiles: } - panddas_event_files: - - {file: , sha256: , - model: , chain: , res: , index: , bdc: } - - {file: , sha256: , - model: , chain: , res: , index: , bdc: } - status: - assigned_xtalform: - aligned_files: [...] - - Manages to save all references to other tables - """ - logger.debug("Creating experiment object: %s", protein_name) + ( # pylint: disable=unbalanced-tuple-unpacking + pdb_info, + mtz_info, + cif_info, + ) = self.validate_files( + obj_identifier=experiment_name, + file_struct=data["crystallographic_files"], + required=("xtal_pdb",), + recommended=( + "xtal_mtz", + "ligand_cif", + ), + ) - try: - experiment = existing_objects.get(code=protein_name) - new = False - except Experiment.DoesNotExist: - new = True - files = self._check_file_struct( - self.target_root, data["crystallographic_files"] + dtype = extract(key="type") + + if dtype == "manual": + exp_type = 1 + elif dtype == "model_building": + exp_type = 0 + else: + exp_type = -1 + self.report.log( + Level.FATAL, f"Unexpected 'type' '{dtype}' value for {experiment_name}" ) - experiment = Experiment( - experiment_upload=self.experiment_upload, - code=protein_name, - status=1, - version=1, - type=1 if data["type"] == "manual" else 0, # FIXME - pdb_info=str(self._get_final_path(files["xtal_pdb"])), - mtz_info=str(self._get_final_path(files["xtal_mtz"])), - # this may be missing from the struct - cif_info=str(self._get_final_path(files.get("cif_info", None))), - # this doesn't seem to be present - # pdb_sha256= + + dstatus = extract(key="status") + + if dstatus == "new": + status = 0 + elif dstatus == "deprecated": + status = 1 + elif dstatus == "superseded": + status = 2 + else: + status = -1 + self.report.log( + Level.FATAL, f"Unexpected status '{dstatus}' for {experiment_name}" ) - try: - experiment.save() - except IntegrityError as exc: - msg = f"Failed to save Experiment: {protein_name}" - update_task(self.task, "ERROR", msg) - logger.error("%s%s", self.task_id, msg) - raise IntegrityError(msg) from exc - except ValueError as exc: - update_task(self.task, "ERROR", exc.args[0]) - raise ValueError(exc.args[0]) from exc - - return MetadataObjects( - instance=experiment, data=data.get("aligned_files", None), new=new + + # TODO: unhandled atm + # version int old versions are kept target loader + version = 1 + + fields = { + "experiment_upload": self.experiment_upload, + "code": experiment_name, + } + defaults = { + "status": status, + "version": version, + "type": exp_type, + "pdb_info": str(self._get_final_path(pdb_info)), + "mtz_info": str(self._get_final_path(mtz_info)), + "cif_info": str(self._get_final_path(cif_info)), + # this doesn't seem to be present + # pdb_sha256: + } + + assigned_xtalform = extract(key="assigned_xtalform") + + index_fields = { + "xtalform": assigned_xtalform, + } + + return ProcessedObject( + model_class=Experiment, + fields=fields, + key=experiment_name, + defaults=defaults, + index_data=index_fields, ) - def _process_compound(self, existing_objects=None, protein_name=None, data=None): - """Create Compound model instance from data. + @create_objects(depth=1) + def process_compound( + self, item_data: tuple[str, dict] | None = None, **kwargs + ) -> ProcessedObject | None: + """Extract data from yaml block for creating Compound instance. Incoming data format: xtal_pdb: {file: , sha256: } @@ -248,39 +641,48 @@ def _process_compound(self, existing_objects=None, protein_name=None, data=None) model: , chain: , res: , index: , bdc: } - {file: , sha256: , model: , chain: , res: , index: , bdc: } - """ - # TODO: there's a method, calc_cpd, could I use that? as I - # understand, they don't need to be unique anymore, so I can - # just remove the uniqueness validation? + NB! After creation, many2many with project needs to be populated + """ + del kwargs + logger.debug("incoming data: %s", item_data) + protein_name, data = item_data + if ( + "aligned_files" not in data.keys() + or "crystallographic_files" not in data.keys() + ): + return None - smiles = data["ligand_cif"]["smiles"] try: - compound = existing_objects.get(smiles=smiles) - new = False - except Compound.DoesNotExist: - new = True + smiles = data["crystallographic_files"]["ligand_cif"]["smiles"] + except KeyError as exc: + # just setting the var to something + smiles = ( + "crystallographic_files" + if exc.args[0] == "ligand_cif" + else "ligand_cif" + ) + self.report.log( + Level.FATAL, + "{} missing from {} in '{}' experiment section".format( + exc, smiles, protein_name + ), + ) - compound = Compound(smiles=smiles) + fields = { + "smiles": smiles, + } - try: - compound.save() - except IntegrityError as exc: - msg = f"Failed to save Compound: {protein_name}" - update_task(self.task, "ERROR", msg) - logger.error("%s%s", self.task_id, msg) - raise IntegrityError(msg) from exc - except KeyError as exc: - # this means ligand info missing - raise KeyError from exc - - compound.project_id.add(self.experiment_upload.project) - - # data basically just contains file locations, need to copy them - # later - return MetadataObjects(instance=compound, data=data, new=new) - - def _process_xtalform(self, existing_objects=None, idx=None, data=None): + return ProcessedObject( + model_class=Compound, + fields=fields, + key=protein_name, + ) + + @create_objects(depth=1) + def process_xtalform( + self, item_data: tuple[str, dict] | None = None, **kwargs + ) -> ProcessedObject | None: """Create Xtalform model instance from data. Incoming data format (from meta_aligner.yaml): @@ -299,28 +701,39 @@ def _process_xtalform(self, existing_objects=None, idx=None, data=None): Saves all references to other tables (QuatAssembly and Experiment). """ - logger.debug("Creating Xtalform object: %s", data["xtalform_ref"]) - try: - xtalform = existing_objects.get(name=idx) - new = False - except Xtalform.DoesNotExist: - new = True - xtalform = Xtalform( - name=idx, - space_group=data["xtalform_space_group"], - unit_cell_info=data["xtalform_cell"], - ) - try: - xtalform.save() - except IntegrityError as exc: - msg = f"Failed to save Xtalform: {data['xtalform_ref']}" - update_task(self.task, "ERROR", msg) - logger.error("%s%s", self.task_id, msg) - raise IntegrityError(msg) from exc + del kwargs + # weirdly, none of the fields is mandatory in Xtalform + xtalform_name, data = item_data - return MetadataObjects(instance=xtalform, data=None, new=new) + extract = functools.partial( + self._extract, + data=data, + section_name="xtalforms", + item_name=xtalform_name, + ) + + fields = { + "name": xtalform_name, + } + space_group = extract(key="xtalform_space_group") + unit_cell_info = extract(key="xtalform_cell") - def _process_quat_assembly(self, existing_objects=None, idx=None, data=None): + defaults = { + "space_group": space_group, + "unit_cell_info": unit_cell_info, + } + + return ProcessedObject( + model_class=Xtalform, + fields=fields, + key=xtalform_name, + defaults=defaults, + ) + + @create_objects(depth=1) + def process_quat_assembly( + self, item_data: tuple[str, dict] | None = None, **kwargs + ) -> ProcessedObject | None: """Create QuatAssemblylform model instance from data. Incoming data format: @@ -331,34 +744,134 @@ def _process_quat_assembly(self, existing_objects=None, idx=None, data=None): No references to other models. """ - logger.debug("Creating QuatAssembly object: %d", idx) - try: - quat_assembly = existing_objects.get(chains=data["chains"]) - new = False - except QuatAssembly.DoesNotExist: - new = True - - quat_assembly = QuatAssembly( - chains=data["chains"], - name=idx, - ) - try: - quat_assembly.save() - except IntegrityError as exc: - msg = f"Failed to save QuatAssembly: {idx}" - update_task(self.task, "ERROR", msg) - logger.error("%s%s", self.task_id, msg) - raise IntegrityError(msg) from exc - - return MetadataObjects( - instance=quat_assembly, - data=None, - new=new, + del kwargs + assembly_name, data = item_data + + extract = functools.partial( + self._extract, + data=data, + section_name="assemblies", + item_name=assembly_name, ) - def _process_canon_site_conf( - self, existing_objects=None, canon_site=None, idx=None, data=None - ): + chains = extract(key="chains", level=Level.WARNING) + + fields = { + "name": assembly_name, + "chains": chains, + } + + return ProcessedObject( + model_class=QuatAssembly, + fields=fields, + key=assembly_name, + ) + + @create_objects(depth=3) + def process_xtalform_quatassembly( + self, + xtalforms: dict[int | str, MetadataObject], + quat_assemblies: dict[int | str, MetadataObject], + item_data: tuple[str, str, int, dict] | None = None, + **kwargs, + ) -> ProcessedObject | None: + """Create XtalformQuatAssembly model instance from data. + + Incoming data format: + : + assembly: + chains: + + """ + del kwargs + xtalform_id, _, _, data = item_data + + # hm.. doesn't reflect the fact that it's from a different + # file.. and the message should perhaps be a bit different + extract = functools.partial( + self._extract, + data=data, + section_name="xtalforms", + item_name=xtalform_id, + ) + + xtalform = xtalforms[xtalform_id].instance + + assembly_id = extract(key="assembly") + + # TODO: need to key check these as well.. + assembly = quat_assemblies[assembly_id].instance + + fields = { + "assembly_id": assembly_id, + "xtalform": xtalform, + "quat_assembly": assembly, + "chains": data["chains"], + } + + return ProcessedObject( + model_class=XtalformQuatAssembly, + fields=fields, + key=xtalform_id, + ) + + @create_objects(depth=1) + def process_canon_site( + self, item_data: tuple[str, dict] | None = None, **kwargs + ) -> ProcessedObject | None: + """Create CanonSite model instance from data. + + Incoming data format: + : + conformer_site_ids: + global_reference_dtag: + reference_conformer_site_id: + residues: + + Unable to add references to: + - CanonSiteConf (ref_conf_site) + + """ + del kwargs + canon_site_id, data = item_data + + extract = functools.partial( + self._extract, + data=data, + section_name="canon_sites", + item_name=canon_site_id, + ) + + residues = extract(key="residues", return_type=list) + + fields = { + "name": canon_site_id, + "residues": residues, + } + + conf_sites_ids = extract(key="conformer_site_ids", return_type=list) + ref_conf_site_id = extract(key="reference_conformer_site_id") + + index_data = { + "ref_conf_site": ref_conf_site_id, + "conformer_site_ids": conf_sites_ids, + "reference_conformer_site_id": ref_conf_site_id, + } + + return ProcessedObject( + model_class=CanonSite, + fields=fields, + index_data=index_data, + key=canon_site_id, + ) + + @create_objects(depth=1) + def process_canon_site_conf( + self, + canon_sites: dict[str, ModelInstance], + item_data: tuple[str, dict] | None = None, + **kwargs, + ) -> ProcessedObject | None: """Create Xtalform model instance from data. Incoming data format: @@ -370,265 +883,230 @@ def _process_canon_site_conf( Unable to add references to: - SiteObservation (ref_site_observation) """ - logger.debug("Creating CanonSiteConf object: %s", idx) - try: - canon_site_conf = existing_objects.get(name=idx) - new = False - except CanonSiteConf.DoesNotExist: - new = True - - canon_site_conf = CanonSiteConf( - name=idx, - residues=data["residues"], - canon_site=canon_site, - ) + del kwargs + conf_site_name, data = item_data + canon_site = canon_sites[conf_site_name] - try: - canon_site_conf.save() - except IntegrityError as exc: - msg = f"Failed to save CanonSiteConf: {data['name']}" - update_task(self.task, "ERROR", msg) - logger.error("%s%s", self.task_id, msg) - raise IntegrityError(msg) from exc - - return MetadataObjects( - instance=canon_site_conf, - data=data["reference_ligand_id"], - new=new, + extract = functools.partial( + self._extract, + data=data, + section_name="conformer_sites", + item_name=conf_site_name, + return_type=list, ) - def _process_site_observation( - self, - existing_objects=None, - experiment=None, - compound=None, - xtalform_site=None, - canon_site_conf=None, - chain=None, - ligand=None, - idx=None, - data=None, - ): - """Create SiteObservation model instance from data. + residues = extract(key="residues") - Incoming data format: - : { - structure: , - artefacts: , - event_map: , - x_map: , - pdb_apo: , - pdb_apo_solv: , - pdb_apo_desolv: , - ligand_mol: , - ligand_pdb: , - ligand_smiles: , + fields = { + "name": conf_site_name, + "residues": residues, + "canon_site": canon_site, } - """ - code = f"{experiment.code}_{chain}_{str(ligand)}_{str(idx)}" - logger.debug("Creating SiteObservation object: %s", code) - try: - site_observation = existing_objects.get(code=code) - new = False - except SiteObservation.DoesNotExist: - new = True - files = self._check_file_struct(self.target_root, data) + # members = extract(key="members") + ref_ligands = extract(key="reference_ligand_id") - mol_data = None - with open( - self.target_root.joinpath(files["ligand_mol"]), - "r", - encoding="utf-8", - ) as f: - mol_data = f.read() + index_fields = { + # "members": members, + "reference_ligands": ref_ligands, + } - try: - site_observation = SiteObservation( - # Code for this protein (e.g. Mpro_Nterm-x0029_A_501_0) - code=code, - experiment=experiment, - cmpd=compound, - xtalform_site=xtalform_site, - canon_site_conf=canon_site_conf, - bound_file=str(self._get_final_path(files["structure"])), - apo_solv_file=str(self._get_final_path(files["pdb_apo_solv"])), - apo_desolv_file=str(self._get_final_path(files["pdb_apo_desolv"])), - apo_file=str(self._get_final_path(files["pdb_apo"])), - xmap_2fofc_file=str(self._get_final_path(files["2Fo-Fc_map"])), - xmap_fofc_file=str(self._get_final_path(files["Fo-Fc_map"])), - event_file=str(self._get_final_path(files["event_map"])), - # artefacts file currently missing, hence the get - artefacts_file=str( - self._get_final_path(files.get("artefacts", None)) - ), - pdb_header_file="currently missing", - smiles=data["ligand_smiles"], - seq_id=ligand, - chain_id=chain, - ligand_mol_file=mol_data, - ) - except KeyError as exc: - logger.debug("exc: %s", exc) - msg = ( - f"Reference to {exc} file missing from aligned_files/" - + f"{experiment.code}/{chain}/{str(ligand)}/{str(idx)}" - ) - update_task(self.task, "ERROR", msg) - logger.error("%s%s", self.task_id, msg) - raise KeyError(msg) from exc + return ProcessedObject( + model_class=CanonSiteConf, + fields=fields, + index_data=index_fields, + key=conf_site_name, + ) - try: - site_observation.save() - except IntegrityError as exc: - msg = f"Failed to save SiteObservation: {site_observation.code}" - update_task(self.task, "ERROR", msg) - logger.error("%s%s", self.task_id, msg) - raise IntegrityError(msg) from exc - - # TODO: may have to implement KeyError, in case file is - # missing. but that's only when it's guaranteed that it - # should exist. - - return MetadataObjects( - instance=site_observation, - data=None, - new=new, + @create_objects(depth=1) + def process_xtalform_site( + self, + xtalforms: dict[int | str, MetadataObject], + canon_sites: dict[str, ModelInstance], + item_data: tuple[str, dict] | None = None, + **kwargs, + ) -> ProcessedObject | None: + """Create Xtalform model instance from data. + + Incoming data format: + : + xtalform_id: + canonical_site_id: + crystallographic_chain: A + members: + + Saves references to all other tables (Xtalform and CanonSite). + """ + del kwargs + xtalform_site_name, data = item_data + + extract = functools.partial( + self._extract, + data=data, + section_name="xtalform_sites", + item_name=xtalform_site_name, ) - def _process_canon_site(self, existing_objects=None, idx=None, data=None): - """Create CanonSite model instance from data. + xtalform_id = extract(key="xtalform_id") + + canon_site_id = extract(key="canonical_site_id") + + xtalform = xtalforms[xtalform_id].instance + canon_site = canon_sites[canon_site_id] - Incoming data format: - : - conformer_site_ids: - global_reference_dtag: - reference_conformer_site_id: - residues: + lig_chain = extract(key="crystallographic_chain") + residues = extract(key="members", return_type=list) - Unable to add references to: - - CanonSiteConf (ref_conf_site) + fields = { + "xtalform_site_id": xtalform_site_name, + "xtalform": xtalform, + "canon_site": canon_site, + } - """ - logger.debug("Creating CanonSite object: %d", idx) - try: - canon_site = existing_objects.get(name=idx) - new = False - except CanonSite.DoesNotExist: - new = True - - canon_site = CanonSite( - name=idx, - residues=data["residues"], - ) - try: - canon_site.save() - except IntegrityError as exc: - msg = f"Failed to save CanonSite: {idx}" - update_task(self.task, "ERROR", msg) - logger.error("%s%s", self.task_id, msg) - raise IntegrityError(msg) from exc - - return MetadataObjects( - instance=canon_site, - data=data, - new=new, + defaults = { + "lig_chain": lig_chain, + "residues": residues, + } + + return ProcessedObject( + model_class=XtalformSite, + fields=fields, + defaults=defaults, + key=xtalform_site_name, ) - def _process_xtalform_quatassembly( + @create_objects(depth=5) + def process_site_observation( self, - existing_objects=None, - xtalform=None, - quat_assembly=None, - idx=None, - data=None, - ): - """Create XtalformQuatAssembly model instance from data. + experiments: dict[int | str, MetadataObject], + compounds: dict[int | str, MetadataObject], + xtalform_sites: dict[str, ModelInstance], + canon_site_confs: dict[int | str, MetadataObject], + item_data: tuple[str, str, str, int | str, int | str, dict] | None = None, + # chain: str, + # ligand: str, + # idx: int | str, + # data: dict, + **kwargs, + ) -> ProcessedObject | None: + """Create SiteObservation model instance from data. Incoming data format: - : - assembly: - chains: - + : { + structure: , + artefacts: , + event_map: , + x_map: , + pdb_apo: , + pdb_apo_solv: , + pdb_apo_desolv: , + ligand_mol: , + ligand_pdb: , + ligand_smiles: , + } """ - logger.debug("Creating XtalformQuatAssembly object: %d", idx) + del kwargs try: - xtal_quat = existing_objects.get( - xtalform=xtalform, - quat_assembly=quat_assembly, - assembly_id=idx, - ) - new = False - except XtalformQuatAssembly.DoesNotExist: - new = True - - xtal_quat = XtalformQuatAssembly( - xtalform=xtalform, - quat_assembly=quat_assembly, - chains=data["chains"], - assembly_id=idx, - ) - try: - xtal_quat.save() - except IntegrityError as exc: - msg = f"Failed to save XtalformQuatAssembly: {idx}" - update_task(self.task, "ERROR", msg) - logger.error("%s%s", self.task_id, msg) - raise IntegrityError(msg) from exc - - return MetadataObjects( - instance=xtal_quat, + experiment_id, _, chain, ligand, idx, data = item_data + except ValueError: + # wrong data item + return None + + extract = functools.partial( + self._extract, data=data, - new=new, + section_name="crystals", + item_name=experiment_id, + level=Level.WARNING, ) - def _process_xtalform_site( - self, existing_objects=None, xtalform=None, canon_site=None, idx=None, data=None - ): - """Create Xtalform model instance from data. + experiment = experiments[experiment_id].instance - Incoming data format: - : - xtalform_id: - canonical_site_id: - crystallographic_chain: A - members: + code = f"{experiment.code}_{chain}_{str(ligand)}_{str(idx)}" + key = f"{experiment.code}/{chain}/{str(ligand)}" + + compound = compounds[experiment_id].instance + canon_site_conf = canon_site_confs[idx].instance + xtalform_site = xtalform_sites[key] + + ( # pylint: disable=unbalanced-tuple-unpacking + bound_file, + apo_solv_file, + apo_desolv_file, + apo_file, + artefacts_file, + ligand_mol, + xmap_2fofc_file, + xmap_fofc_file, + event_file, + ) = self.validate_files( + obj_identifier=experiment_id, + file_struct=data, + required=( + "structure", + "pdb_apo_solv", + "pdb_apo_desolv", + "pdb_apo", + ), + recommended=( + "artefacts", + "ligand_mol", + "2Fo-Fc_map", + "Fo-Fc_map", + "event_map", + ), + ) - Saves references to all other tables (Xtalform and CanonSite). - """ - logger.debug("Creating XtalformSite object: %d", idx) + mol_data = None try: - xtalform_site = existing_objects.get(xtalform_site_id=idx) - new = False - except XtalformSite.DoesNotExist: - new = True - - xtalform_site = XtalformSite( - xtalform=xtalform, - canon_site=canon_site, - lig_chain=data["crystallographic_chain"], - residues=data["members"], - xtalform_site_id=idx, - ) + with open( + self.raw_data.joinpath(ligand_mol), + "r", + encoding="utf-8", + ) as f: + mol_data = f.read() + except TypeError: + # this site observation doesn't have a ligand. perfectly + # legitimate case + pass + + smiles = extract(key="ligand_smiles") + + fields = { + # Code for this protein (e.g. Mpro_Nterm-x0029_A_501_0) + "code": code, + "experiment": experiment, + "cmpd": compound, + "xtalform_site": xtalform_site, + "canon_site_conf": canon_site_conf, + "smiles": smiles, + "seq_id": ligand, + "chain_id": chain, + } - try: - xtalform_site.save() - except IntegrityError as exc: - msg = f"Failed to save Xtalform: {idx}" - update_task(self.task, "ERROR", msg) - logger.error("%s%s", self.task_id, msg) - raise IntegrityError(msg) from exc - - return MetadataObjects( - instance=xtalform_site, - data=data["members"], - new=new, + defaults = { + "bound_file": str(self._get_final_path(bound_file)), + "apo_solv_file": str(self._get_final_path(apo_solv_file)), + "apo_desolv_file": str(self._get_final_path(apo_desolv_file)), + "apo_file": str(self._get_final_path(apo_file)), + "xmap_2fofc_file": str(self._get_final_path(xmap_2fofc_file)), + "xmap_fofc_file": str(self._get_final_path(xmap_fofc_file)), + "event_file": str(self._get_final_path(event_file)), + "artefacts_file": str(self._get_final_path(artefacts_file)), + "pdb_header_file": "currently missing", + "ligand_mol_file": mol_data, + } + + return ProcessedObject( + model_class=SiteObservation, + fields=fields, + defaults=defaults, + key=key, ) def process_metadata( self, - upload_root: Path = None, - task: Task = None, + upload_root: Path, ): """Extract model instances from metadata file and save them to db.""" # TODO: this method is quite long and should perhaps be broken @@ -636,8 +1114,8 @@ def process_metadata( # create model instances, so logically it's just doing the # same thing. - update_task(task, "PROCESSING", "Processing metadata") - logger.info("%sProcessing %s", self.task_id, upload_root) + # update_task(task, "PROCESSING", "Processing metadata") + logger.info("%sProcessing %s", self.report.task_id, upload_root) # moved this bit from init self.target, target_created = Target.objects.get_or_create( @@ -659,7 +1137,9 @@ def process_metadata( if self._is_already_uploaded(target_created, project_created): # remove uploaded file Path(self.bundle_path).unlink() - raise FileExistsError(f"{self.bundle_name} already uploaded, skipping.") + msg = f"{self.bundle_name} already uploaded, skipping." + self.report.log(Level.INFO, msg) + raise FileExistsError(msg) if project_created and committer.pk == settings.ANONYMOUS_USER: self.project.open_to_public = True @@ -673,284 +1153,123 @@ def process_metadata( self.experiment_upload.committer = committer self.experiment_upload.save() - assemblies = self._load_yaml(Path(upload_root).joinpath(ASSEMBLIES_FILE)) - xtalform_assemblies = self._load_yaml( - Path(upload_root).joinpath(XTALFORMS_FILE) + ( # pylint: disable=unbalanced-tuple-unpacking + assemblies, + xtalform_assemblies, + ) = self._get_yaml_blocks( + yaml_data=self._load_yaml(Path(upload_root).joinpath(XTALFORMS_FILE)), + blocks=("assemblies", "xtalforms"), ) + meta = self._load_yaml(Path(upload_root).joinpath(METADATA_FILE)) - assigned_xtalforms = self._load_yaml( - Path(upload_root).joinpath(ASSIGNED_XTALFORMS_FILE) - ) # collect top level info self.version_number = meta["version_number"] self.version_dir = meta["version_dir"] self.previous_version_dirs = meta["previous_version_dirs"] - blocks = [ - "crystals", - "xtalforms", - "canon_sites", - "conformer_sites", - "xtalform_sites", - ] - try: - ( # pylint: disable=unbalanced-tuple-unpacking - crystals, - xtalforms, - canon_sites, - conformer_sites, - xtalform_sites, - ) = self._get_meta_blocks(meta, blocks) - except FileNotFoundError as exc: - raise FileNotFoundError(exc.args[0]) from exc - except KeyError as exc: - raise KeyError(exc.args[0]) from exc - - result = [] - - # memo to self: the order of saving objects is dictated by db - # relations - when handling version 2 upload, I need to - # occasionally check if the object already exists and if yes, - # fetch it from the db. cannot do this without quering target - # (or visit?) so need to save objects along the relationships - - # fetch existing objects - old_experiments = Experiment.filter_manager.by_target(self.target) - logger.debug("%s existing Experiment objects found", old_experiments.count()) - - old_compounds = Compound.filter_manager.by_target(self.target) - logger.debug("%s existing Compound objects found", old_compounds.count()) - - old_xtalforms = Xtalform.filter_manager.by_target(self.target) - logger.debug("%s existing Xtalform objects found", old_xtalforms.count()) - - old_xtalquatasm = XtalformQuatAssembly.filter_manager.by_target(self.target) - logger.debug( - "%s existing XtalformQuatAssembly objects found", old_xtalquatasm.count() - ) - - old_quatassemblies = QuatAssembly.filter_manager.by_target(self.target) - logger.debug( - "%s existing QuatAssembly objects found", old_quatassemblies.count() - ) - - old_xtalformsites = XtalformSite.filter_manager.by_target(self.target) - logger.debug( - "%s existing XtalformSite objects found", old_xtalformsites.count() + ( # pylint: disable=unbalanced-tuple-unpacking + crystals, + xtalforms, + canon_sites, + conformer_sites, + xtalform_sites, + ) = self._get_yaml_blocks( + yaml_data=meta, + blocks=( + "crystals", + "xtalforms", + "canon_sites", + "conformer_sites", + "xtalform_sites", + ), ) - old_canonsites = CanonSite.filter_manager.by_target(self.target) - logger.debug("%s existing CanonSite objects found", old_canonsites.count()) - - old_canonsiteconfs = CanonSiteConf.filter_manager.by_target(self.target) - logger.debug( - "%s existing CanonSiteConf objects found", old_canonsiteconfs.count() - ) - - old_siteobservations = SiteObservation.filter_manager.by_target(self.target) - logger.debug( - "%s existing SiteObservaton objects found", old_siteobservations.count() - ) - - experiment_objects = {} - compound_objects = {} - for prot_name, prot_data in crystals.items(): - # TODO: unclear if I need to save experiment that doesn't have - # aligned crystal files section or should I move this beyond - # continue as well. - try: - experiment_objects[prot_name] = self._process_experiment( - existing_objects=old_experiments, - protein_name=prot_name, - data=prot_data, - ) - except IntegrityError as exc: - raise IntegrityError(exc.args[0]) from exc - - cmpd_data = prot_data["crystallographic_files"] - try: - compound_objects[prot_name] = self._process_compound( - existing_objects=old_compounds, - protein_name=prot_name, - data=cmpd_data, - ) - except IntegrityError as exc: - raise IntegrityError(exc.args[0]) from exc - except KeyError: - # this particular block doesn't have compound info - # continue with the loop, nothing to do - continue - - result.append(self._log_msg(experiment_objects)) - result.append(self._log_msg(compound_objects)) + experiment_objects = self.process_experiment(yaml_data=crystals) + compound_objects = self.process_compound(yaml_data=crystals) # save components manytomany to experiment - # TODO: is it 1:1 relationship? looking at the meta_align it seems to be, - # but why the m2m then? - for comp_code, comp_meta in compound_objects.items(): + # TODO: is it 1:1 relationship? looking at the meta_align it + # seems to be, but why the m2m then? + for ( + comp_code, + comp_meta, + ) in compound_objects.items(): # pylint: disable=no-member experiment = experiment_objects[comp_code].instance experiment.compounds.add(comp_meta.instance) - xtalform_objects = {} - for idx, obj in xtalforms.items(): - try: - xtalform_objects[idx] = self._process_xtalform( - existing_objects=old_xtalforms, - idx=idx, - data=obj, - ) - except IntegrityError as exc: - raise IntegrityError(exc.args[0]) from exc - - result.append(self._log_msg(xtalform_objects)) + xtalform_objects = self.process_xtalform(yaml_data=xtalforms) # add xtalform fk to experiment - for _, obj in experiment_objects.items(): + for _, obj in experiment_objects.items(): # pylint: disable=no-member try: obj.instance.xtalform = xtalform_objects[ - assigned_xtalforms[obj.instance.code] + obj.index_data["xtalform"] ].instance obj.instance.save() except KeyError: - # TODO: don't know at this point whether ignoring this - # error is a right thing to do + # TODO: message may need tweaking msg = f"xtalform {obj.instance.code} undefined for {obj}" - result.append(msg) logger.warning(msg) - quat_assembly_objects = {} - for idx, obj in assemblies.items(): - try: - quat_assembly_objects[idx] = self._process_quat_assembly( - existing_objects=old_quatassemblies, - idx=idx, - data=obj, - ) - except IntegrityError as exc: - raise IntegrityError(exc.args[0]) from exc - - result.append(self._log_msg(quat_assembly_objects)) - - # this is used just for logging, no other function - xtalform_quat_assembly_objects = {} - for xtalform_id, data in xtalform_assemblies.items(): - xtalform = xtalform_objects[xtalform_id].instance - for idx, obj in data["assemblies"].items(): - quat_assembly = quat_assembly_objects[obj["assembly"]].instance - key = f"{xtalform_id} {quat_assembly.id} {idx}" - xtalform_quat_assembly_objects[ - key - ] = self._process_xtalform_quatassembly( - existing_objects=old_xtalquatasm, - xtalform=xtalform, - quat_assembly=quat_assembly, - idx=idx, - data=obj, - ) + quat_assembly_objects = self.process_quat_assembly(yaml_data=assemblies) - result.append(self._log_msg(xtalform_quat_assembly_objects)) + _ = self.process_xtalform_quatassembly( + yaml_data=xtalform_assemblies, + xtalforms=xtalform_objects, + quat_assemblies=quat_assembly_objects, + ) - canon_site_objects = {} - for idx, obj in canon_sites.items(): - try: - canon_site_objects[idx] = self._process_canon_site( - existing_objects=old_canonsites, - idx=idx, - data=obj, - ) - except IntegrityError as exc: - raise IntegrityError(exc.args[0]) from exc + canon_site_objects = self.process_canon_site(yaml_data=canon_sites) # NB! missing fk's: # - ref_conf_site # - quat_assembly - result.append(self._log_msg(canon_site_objects)) - # reindex canon sites by canon_sites_conf_sites # NB! this is also used below for ref_conf_site in canon_site canon_sites_by_conf_sites = { conf: obj.instance - for obj in canon_site_objects.values() - for conf in obj.data["conformer_site_ids"] + for obj in canon_site_objects.values() # pylint: disable=no-member + for conf in obj.index_data["conformer_site_ids"] } - canon_site_conf_objects = {} - for idx, obj in conformer_sites.items(): - try: - canon_site_conf_objects[idx] = self._process_canon_site_conf( - existing_objects=old_canonsiteconfs, - canon_site=canon_sites_by_conf_sites[idx], - idx=idx, - data=obj, - ) - except IntegrityError as exc: - raise IntegrityError(exc.args[0]) from exc + canon_site_conf_objects = self.process_canon_site_conf( + yaml_data=conformer_sites, canon_sites=canon_sites_by_conf_sites + ) # NB! missing fk's: # - site_observation - result.append(self._log_msg(canon_site_conf_objects)) - - xtalform_sites_objects = {} - for idx, obj in xtalform_sites.items(): - try: - xtalform_sites_objects[idx] = self._process_xtalform_site( - existing_objects=old_xtalformsites, - xtalform=xtalform_objects[obj["xtalform_id"]].instance, - canon_site=canon_site_objects[obj["canonical_site_id"]].instance, - idx=idx, - data=obj, - ) - except IntegrityError as exc: - raise IntegrityError(exc.args[0]) from exc - - result.append(self._log_msg(xtalform_sites_objects)) + xtalform_sites_objects = self.process_xtalform_site( + yaml_data=xtalform_sites, + canon_sites=canon_sites_by_conf_sites, + xtalforms=xtalform_objects, + ) # now can update CanonSite with ref_conf_site - for val in canon_site_objects.values(): + for val in canon_site_objects.values(): # pylint: disable=no-member val.instance.ref_conf_site = canon_site_conf_objects[ - val.data["reference_conformer_site_id"] + val.index_data["reference_conformer_site_id"] ].instance val.instance.save() # canon site instances are now complete # still missing fk to site_observation in canon_site_conf - # reindex xtalform site to grab for site observation + # reindex xtalform site to grab for site observation I don't + # need this anywhere else, why won't i just give the correct + # key for xtal sites objects? xtalform_site_by_tag = {} - for val in xtalform_sites_objects.values(): - for k in val.data: + for val in xtalform_sites_objects.values(): # pylint: disable=no-member + for k in val.instance.residues: xtalform_site_by_tag[k] = val.instance - site_observation_objects = {} - # TODO: would be nice to get rid of quadruple for - for experiment_meta in experiment_objects.values(): - if experiment_meta.data is None: - continue - for chain, ligand in experiment_meta.data.items(): - for ligand, ligand_data in ligand.items(): - for idx, obj in ligand_data.items(): - key = f"{experiment_meta.instance.code}/{chain}/{ligand}" - try: - site_observation_objects[ - key - ] = self._process_site_observation( - existing_objects=old_siteobservations, - experiment=experiment_meta.instance, - compound=compound_objects[ - experiment_meta.instance.code - ].instance, - xtalform_site=xtalform_site_by_tag[key], - canon_site_conf=canon_site_conf_objects[idx].instance, - chain=chain, - ligand=ligand, - idx=idx, - data=obj, - ) - except IntegrityError as exc: - raise IntegrityError(exc.args[0]) from exc - - result.append(self._log_msg(site_observation_objects)) + site_observation_objects = self.process_site_observation( + yaml_data=crystals, + experiments=experiment_objects, + compounds=compound_objects, + xtalform_sites=xtalform_site_by_tag, + canon_site_confs=canon_site_conf_objects, + ) tag_categories = ( "ConformerSites", @@ -964,18 +1283,116 @@ def process_metadata( self._tag_site_observations(site_observation_objects, cat) # final remaining fk, attach reference site observation to canon_site_conf - for val in canon_site_conf_objects.values(): + for val in canon_site_conf_objects.values(): # pylint: disable=no-member val.instance.ref_site_observation = site_observation_objects[ - val.data + val.index_data["reference_ligands"] ].instance val.instance.save() - result.append( - f"{self.bundle_name} {upload_root.name}: " - + f"User {self.experiment_upload.committer} " - + f"uploaded target {self.target}" + def process_bundle(self): + """Resolves subdirs in uploaded data bundle. + + If called from task, takes task as a parameter for status updates. + """ + + # by now I should have archive unpacked, get target name from + # config.yaml + up_iter = self.raw_data.glob("upload_*") + try: + upload_dir = next(up_iter) + except StopIteration as exc: + msg = "Upload directory missing from uploaded file" + self.report.log(Level.FATAL, msg) + # what do you mean unused?! + raise StopIteration( + msg + ) from exc # pylint: disable=# pylint: disable=protected-access + + try: + upload_dir = next(up_iter) + self.report.log(Level.WARNING, "Multiple upload directories in archive") + except StopIteration: + # just a warning, ignoring the second one + pass + + # now that target name is not included in path, I don't need + # it here, need it just before creating target object. Also, + # there's probably no need to throw a fatal here, I can + # reasonably well deduce it from meta (I think) + config_it = upload_dir.glob(CONFIG_FILE) + try: + config_file = next(config_it) + except StopIteration as exc: + msg = f"config file missing from {str(upload_dir)}" + self.report.log(Level.FATAL, msg) + raise StopIteration() from exc + + config = self._load_yaml(config_file) + logger.debug("config: %s", config) + + try: + self.target_name = config["target_name"] + except KeyError as exc: + raise KeyError("target_name missing in config file") from exc + + self.process_metadata( + upload_root=upload_dir, ) + def _load_yaml(self, yaml_file: Path) -> dict: + try: + with open(yaml_file, "r", encoding="utf-8") as file: + contents = yaml.safe_load(file) + except FileNotFoundError as exc: + msg = f"{yaml_file.stem} file not found in data archive" + # logger.error("%s%s", self.task_id, msg) + self.report.log(Level.FATAL, msg) + raise FileNotFoundError(msg) from exc + + return contents + + # TODOL error handling. what's the correct response when + # something's missing? push through and compile report? + def _get_yaml_blocks(self, yaml_data: dict, blocks: Iterable) -> list[dict]: + error_text = "'{}' section missing in input file" + result = [] + for block in blocks: + try: + result.append(yaml_data[block]) + except KeyError: + msg = error_text.format(block) + self.report.log(Level.FATAL, msg) + + return result + + def _extract( + self, + data: dict, + key: str | int, + section_name: str, + item_name: str, + level: Level = Level.FATAL, + return_type: type = str, + ) -> Any: + try: + result = data[key] + except KeyError as exc: + if level == Level.INFO: + result = "" + else: + result = "missing" + if return_type == list: + result = [result] + + self.report.log( + level, + "{} missing from {}: {} section".format( + exc, + section_name, + item_name, + ), + ) + return result def _tag_site_observations(self, site_observation_objects, category): @@ -1081,7 +1498,7 @@ def _is_already_uploaded(self, target_created, project_created): return self.data_bundle in uploaded_files - def _get_final_path(self, path: Path): + def _get_final_path(self, path: str | None) -> Path | None: """Update relative path to final storage path NB! this returns a relative path that can be used in queries @@ -1089,188 +1506,11 @@ def _get_final_path(self, path: Path): database tables. """ try: - return self.final_path.joinpath(self.target_name).joinpath(path) + return self.final_path.joinpath(path) except TypeError: # received invalid path return None - def process_bundle(self, task=None): - """Resolves subdirs in uploaded data bundle. - - If called from task, takes task as a parameter for status updates. - """ - # result is final report, list of messages passed back to user - result = [] - - # by now I should have archive unpacked, get target name from - # config.yaml - - # this a bit of an chicken and an egg problem. I need to get - # to the config file in upload_N directory, but the path to - # that goes through another, which is usually the same as - # target. But.. I don't know if I can trust that, which is why - # I grab it from the config file where says this is the target - it = self.raw_data.iterdir() - try: - target_dir = next(it) - except StopIteration as exc: - raise StopIteration("Target directory missing from uploaded file!") from exc - - logger.debug("target_dir: %s", target_dir) - - # a quick sanity check, assuming only one target per upload - if sum(1 for _ in it) != 0: - tgd = "; ".join([str(p) for p in it]) - raise AssertionError( - f"More than one target directory in uploaded file: {tgd}" - ) - - target_path = self.raw_data.joinpath(target_dir) - it = target_path.iterdir() - try: - upload_dir = next(it) - except StopIteration as exc: - raise StopIteration("Upload directory missing from uploaded file!") from exc - - # technically, what I could do here is the same validation as - # with targed dir above, there should be only on upload_N - # directory under target. but it doesn't matter if there's - # more, I'll just use the first one - - config_it = upload_dir.glob(CONFIG_FILE) - try: - config_file = next(config_it) - except StopIteration as exc: - raise StopIteration(f"config file missing from {upload_dir}") from exc - - config = self._load_yaml(config_file) - logger.debug("config: %s", config) - - try: - self.target_name = config["target_name"] - except KeyError as exc: - raise KeyError("target_name missing in config file!") from exc - - self._target_root = self.raw_data.joinpath(self.target_name) - - # as mentioned above, there should be only one upload_N - # directory, so I suppose I could get rid of the loop - # here. doesn't hurt though, so I'll leave it for now - for path in Path(self.target_root).iterdir(): - if path.is_dir(): - logger.info("Found upload dir: %s", str(path)) - try: - upload_report = self.process_metadata( - upload_root=path, - task=task, - ) - except FileNotFoundError as exc: - result.append(exc.args[0]) - raise FileNotFoundError(exc.args[0]) from exc - except IntegrityError as exc: - result.append(exc.args[0]) - raise IntegrityError(exc.args[0]) from exc - except ValueError as exc: - result.append(exc.args[0]) - raise ValueError(exc.args[0]) from exc - except FileExistsError as exc: - # this target has been uploaded at- least once and - # this upload already exists. skip - result.append(exc.args[0]) - raise FileExistsError(exc.args[0]) from exc - - result.extend(upload_report) - - return result - - # standardized logging when processing metadata file - def _log_msg(self, obj_dict): - new_obj = sum([1 for k in obj_dict.values() if k.new]) - item = next(iter(obj_dict.values())) - msg = f"{len(obj_dict.keys())} {item.instance._meta.model} objects processed, {new_obj} created" # pylint: disable=protected-access - update_task(self.task, "PROCESSING", msg) - logger.info("%s%s", self.task_id, msg) - return f"{self.bundle_name} {self.version_dir}: {msg}" - - @staticmethod - def _check_file_struct(upload_root, file_struct): - """Check if file exists and if sha256 hash matches (if given). - - file struct can come in 2 configurations: - {file_key: {file: , sha265: [smiles: ]}, ...} - or simply - {file_key: } - Detect which one and take appropriate action. - """ - result = {} - for key, value in file_struct.items(): - if isinstance(value, dict): - try: - filename = value["file"] - - # I guess I don't care if it's not given? - file_hash = value.get("sha256", None) - - # TODO: error handling. don't know how to - # implement this because data structure/metadata - # not at final structure yet - if TargetLoader._check_file( - upload_root.joinpath(filename), file_hash - ): - result[key] = str(filename) - - except KeyError: - logger.warning("%s file info missing in %s!", key, METADATA_FILE) - except ValueError as exc: - logger.error("Invalid hash for file %s!", filename) - raise ValueError(f"Invalid hash for file {filename}!") from exc - - elif isinstance(value, str) and not key == "ligand_smiles": - # this is a bit of a workaround but ligand_smiles - # happens to be specified on the same level as files - if TargetLoader._check_file(upload_root.joinpath(value)): - result[key] = str(value) - else: - # wait, I think I should actually raise exp here.. - logger.warning( - "%s referenced in %s but not found in archive", - value, - METADATA_FILE, - ) - else: - # this is probably the list of panddas event files, don't - # need them here - # although.. should i validate them here nevertheless? - # i'd have to do this on copy otherwise.. - pass - - return result - - @staticmethod - def _check_file(file_path: Path, file_hash=None): - """Check if file exist and compare with hash.""" - if file_path.is_file(): - if file_hash: - if file_hash == TargetLoader._calculate_sha256(file_path): - return True - else: - # not logging error here because don't have - # correct file path - raise ValueError - else: - return True - return False - - # borrowed from SO - @staticmethod - def _calculate_sha256(filepath): - sha256_hash = hashlib.sha256() - with open(filepath, "rb") as f: - # Read the file in chunks of 4096 bytes - for chunk in iter(lambda: f.read(4096), b""): - sha256_hash.update(chunk) - return sha256_hash.hexdigest() - def load_target( data_bundle, @@ -1279,10 +1519,8 @@ def load_target( user_id=None, task=None, ): - # Unused args - del contact_email - # TODO: do I need to sniff out correct archive format? + del contact_email with TemporaryDirectory(dir=settings.MEDIA_ROOT) as tempdir: target_loader = TargetLoader( data_bundle, proposal_ref, tempdir, user_id=user_id, task=task @@ -1291,39 +1529,39 @@ def load_target( # archive is first extracted to temporary dir and moved later with tarfile.open(target_loader.bundle_path, "r") as archive: msg = f"Extracting bundle: {data_bundle}" - logger.info("%s%s", target_loader.task_id, msg) - update_task(task, "PROCESSING", msg) + logger.info("%s%s", target_loader.report.task_id, msg) + # update_task(task, "PROCESSING", msg) archive.extractall(target_loader.raw_data) msg = f"Data extraction complete: {data_bundle}" - logger.info("%s%s", target_loader.task_id, msg) + logger.info("%s%s", target_loader.report.task_id, msg) except FileNotFoundError as exc: - msg = f"{data_bundle} file does not exist!" - logger.exception("%s%s", target_loader.task_id, msg) + msg = f"{data_bundle} file does not exist" + logger.exception("%s%s", target_loader.report.task_id, msg) target_loader.experiment_upload.message = exc.args[0] raise FileNotFoundError(msg) from exc try: with transaction.atomic(): - upload_report = target_loader.process_bundle(task=task) - except FileNotFoundError as exc: - logger.error(exc.args[0]) - target_loader.experiment_upload.message = exc.args[0] - raise FileNotFoundError(exc.args[0]) from exc + target_loader.process_bundle() + if target_loader.report.failed: + # need to trigger transaction failure + raise IntegrityError( + f"Uploading {target_loader.data_bundle} failed" + ) except IntegrityError as exc: logger.error(exc, exc_info=True) - target_loader.experiment_upload.message = exc.args[0] - raise IntegrityError(exc.args[0]) from exc - except ValueError as exc: + target_loader.report.final(target_loader.data_bundle) + target_loader.experiment_upload.message = target_loader.report.json() + raise IntegrityError from exc + + except (FileExistsError, FileNotFoundError, StopIteration) as exc: + raise Exception from exc + + except Exception as exc: + # catching and logging any other error logger.error(exc, exc_info=True) - raise IntegrityError(exc.args[0]) from exc - except FileExistsError as exc: - logger.error(exc.args[0]) - target_loader.experiment_upload.message = exc.args[0] - raise FileExistsError(exc.args[0]) from exc - except AssertionError as exc: - logger.error(exc.args[0]) - target_loader.experiment_upload.message = exc.args[0] - raise AssertionError(exc.args[0]) from exc + target_loader.report.log(Level.FATAL, traceback.format_exc()) + raise Exception from exc # move to final location target_loader.abs_final_path.mkdir(parents=True) @@ -1334,35 +1572,9 @@ def load_target( set_directory_permissions(target_loader.abs_final_path, 0o755) - update_task(task, "SUCCESS", upload_report) - target_loader.experiment_upload.message = upload_report + target_loader.report.final(target_loader.data_bundle) + target_loader.experiment_upload.message = target_loader.report.json() - logger.debug("%s", upload_report) + # logger.debug("%s", upload_report) target_loader.experiment_upload.save() - - -def update_task(task, state, message): - try: - task.update_state( - state=state, - meta={ - "description": message, - }, - ) - except AttributeError: - # no task passed to method, nothing to do - pass - - -def set_directory_permissions(path, permissions): - for root, dirs, files in os.walk(path): - # Set permissions for directories - for directory in dirs: - dir_path = os.path.join(root, directory) - os.chmod(dir_path, permissions) - - # Set permissions for files - for file in files: - file_path = os.path.join(root, file) - os.chmod(file_path, permissions) diff --git a/viewer/tasks.py b/viewer/tasks.py index 053fa1a3..3ca82116 100644 --- a/viewer/tasks.py +++ b/viewer/tasks.py @@ -536,62 +536,10 @@ def task_load_target( user_id=user_id, task=self, ) - except KeyError as err: - logger.error('KeyError: %s', err, exc_info=True) - self.update_state( - state="ERROR", - meta={ - "description": err.args, - }, - ) - except IntegrityError as err: - logger.error('IntegrityError: %s', err, exc_info=True) - self.update_state( - state="ERROR", - meta={ - "description": err.args[0], - }, - ) - except ValueError as err: - logger.error('ValueError: %s', err, exc_info=True) - self.update_state( - state="ERROR", - meta={ - "description": err.args[0], - }, - ) - except FileNotFoundError as err: - logger.error('FileNotFoundError: %s', err, exc_info=True) - self.update_state( - state="ERROR", - meta={ - "description": err.args[0], - }, - ) - except FileExistsError as err: - logger.error('FileExistsError: %s', err, exc_info=True) - self.update_state( - state="ERROR", - meta={ - "description": err.args[0], - }, - ) - except AssertionError as err: - logger.error('AssertionError: %s', err, exc_info=True) - self.update_state( - state="ERROR", - meta={ - "description": err.args[0], - }, - ) - except OSError as err: - logger.error('OSError: %s', err, exc_info=True) - self.update_state( - state="ERROR", - meta={ - "description": err.args[1], - }, - ) + + except (IntegrityError, Exception): + # everything regarding logging and reporting has already been done + pass logger.info( 'TASK %s load_target completed, target_zip=%s', self.request.id, data_bundle diff --git a/viewer/tests/test_file_validation.py b/viewer/tests/test_file_validation.py index be3e9699..c572b8b0 100644 --- a/viewer/tests/test_file_validation.py +++ b/viewer/tests/test_file_validation.py @@ -1,16 +1,17 @@ -import copy from pathlib import Path from unittest import TestCase -from django.test import tag +# from viewer.target_loader import TargetLoader +from viewer.target_loader import calculate_sha256 + +# import copy -from viewer.target_loader import TargetLoader test_file1_path = Path(__file__).absolute().parent.joinpath("hash_test_file1.txt") -test_file1_hash = "35360b39bff52650367f7d2c08e29fa00a63a374c777b4055c75e4f97b173271" +test_file1_hash = "b3d5a81a0ab1b2cc5248c0ab1a27606c3e5aa23a765cc3cb93946e8870bcba36" test_file2_path = Path(__file__).absolute().parent.joinpath("hash_test_file2.txt") -test_file2_hash = "f2b6d99b58c4b32966779533b0987a769c11f0cfd9be4f20520a97032e41cb23" +test_file2_hash = "fca99022dfca9783570b8d1407b54df089febf02682a5acdff4a63fcac4ad8c7" file_struct_flat = { @@ -31,13 +32,8 @@ class FileValidationTests(TestCase): - @tag("broken") - def test__calculate_sha256_positive(self): - calculated_hash = ( - TargetLoader._calculate_sha256( # pylint: disable=protected-access - test_file1_path - ) - ) # pylint: disable=protected-access + def test_calculate_sha256_positive(self): + calculated_hash = calculate_sha256(test_file1_path) self.assertEqual( calculated_hash, @@ -45,13 +41,9 @@ def test__calculate_sha256_positive(self): "Hashes do not match for the positive test case.", ) - def test__calculate_sha256_negative(self): + def test_calculate_sha256_negative(self): incorrect_hash = "imagine if this were the actual hash" - calculated_hash = ( - TargetLoader._calculate_sha256( # pylint: disable=protected-access - test_file1_path - ) - ) # pylint: disable=protected-access + calculated_hash = calculate_sha256(test_file1_path) self.assertNotEqual( calculated_hash, @@ -59,77 +51,73 @@ def test__calculate_sha256_negative(self): "Hashes should not match for the negative test case.", ) - def test__check_file_existence(self): - file_path = Path(test_file1_path) - result = TargetLoader._check_file(file_path) # pylint: disable=protected-access - self.assertTrue( - result, "File existence check failed for the positive test case." - ) - - def test__check_nonexistent_file(self): - invalid_file_path = Path("path/to/nonexistent/file.txt") - result = TargetLoader._check_file( # pylint: disable=protected-access - invalid_file_path - ) # pylint: disable=protected-access - self.assertFalse( - result, "File existence check succeeded for the nonexistent file case." - ) - - def test__check_file_struct_flat_positive(self): - result = TargetLoader._check_file_struct( # pylint: disable=protected-access - Path(test_file1_path.root), file_struct_flat - ) - - self.assertEqual( - result, - file_struct_flat, - "File structure check failed for the positive test case.", - ) - - def test__check_file_struct_flat_incomplete_positive(self): - file_struct = copy.deepcopy(file_struct_flat) - del file_struct["file2"] - - result = TargetLoader._check_file_struct( # pylint: disable=protected-access - Path(test_file1_path.root), file_struct - ) - - expected_result = { - "file1": str(test_file1_path), - } - - self.assertEqual( - result, - expected_result, - "File structure check failed for the positive test case.", - ) - - @tag("broken") - def test_check_file_struct_nested_positive(self): - result = TargetLoader._check_file_struct( # pylint: disable=protected-access - Path(test_file1_path.root), file_struct_nested - ) - - self.assertEqual( - result, - file_struct_flat, - "File structure check failed for the positive test case.", - ) - - @tag("broken") - def test_check_file_struct_nested_incomplete_positive(self): - file_struct = copy.deepcopy(file_struct_nested) - file_struct["file2"]["sha256"] = "incorrect hash" - - expected_result = copy.deepcopy(file_struct_flat) - del expected_result["file2"] - - result = TargetLoader._check_file_struct( # pylint: disable=protected-access - Path(test_file1_path.root), file_struct - ) - - self.assertEqual( - result, - expected_result, - "File structure check failed for the positive test case.", - ) + # def test__check_file_existence(self): + # file_path = Path(test_file1_path) + # result = TargetLoader._check_file(file_path) # pylint: disable=protected-access + # self.assertTrue( + # result, "File existence check failed for the positive test case." + # ) + + # def test__check_nonexistent_file(self): + # invalid_file_path = Path("path/to/nonexistent/file.txt") + # result = TargetLoader._check_file(invalid_file_path) # pylint: disable=protected-access + # self.assertFalse( + # result, "File existence check succeeded for the nonexistent file case." + # ) + + # def test__check_file_struct_flat_positive(self): + # result = TargetLoader._check_file_struct( # pylint: disable=protected-access + # Path(test_file1_path.root), file_struct_flat + # ) + + # self.assertEqual( + # result, + # file_struct_flat, + # "File structure check failed for the positive test case.", + # ) + + # def test__check_file_struct_flat_incomplete_positive(self): + # file_struct = copy.deepcopy(file_struct_flat) + # del file_struct["file2"] + + # result = TargetLoader._check_file_struct( # pylint: disable=protected-access + # Path(test_file1_path.root), file_struct + # ) + + # expected_result = { + # "file1": str(test_file1_path), + # } + + # self.assertEqual( + # result, + # expected_result, + # "File structure check failed for the positive test case.", + # ) + + # def test_check_file_struct_nested_positive(self): + # result = TargetLoader._check_file_struct( # pylint: disable=protected-access + # Path(test_file1_path.root), file_struct_nested + # ) + + # self.assertEqual( + # result, + # file_struct_flat, + # "File structure check failed for the positive test case.", + # ) + + # def test_check_file_struct_nested_incomplete_positive(self): + # file_struct = copy.deepcopy(file_struct_nested) + # file_struct["file2"]["sha256"] = "incorrect hash" + + # expected_result = copy.deepcopy(file_struct_flat) + # del expected_result["file2"] + + # result = TargetLoader._check_file_struct( # pylint: disable=protected-access + # Path(test_file1_path.root), file_struct + # ) + + # self.assertEqual( + # result, + # expected_result, + # "File structure check failed for the positive test case.", + # ) diff --git a/viewer/tests/test_loader.py b/viewer/tests/test_loader.py index 9d4dc0f1..f9189acd 100644 --- a/viewer/tests/test_loader.py +++ b/viewer/tests/test_loader.py @@ -1,70 +1,68 @@ -import tarfile from pathlib import Path -from tempfile import TemporaryDirectory - -from django.conf import settings # from django.db import IntegrityError -from django.test import TestCase, tag +from django.test import TestCase + +# from tempfile import TemporaryDirectory +# import tarfile + + +# from django.conf import settings -from viewer.models import QuatAssembly -from viewer.target_loader import ASSEMBLIES_FILE, MetadataObjects, TargetLoader # from viewer import target_loader +# from viewer.target_loader import TargetLoader +# from viewer.target_loader import MetadataObject + -test_mpro_v1 = str(Path(__file__).absolute().parent.joinpath("Mpro-v1-zero.tgz")) -test_mpro_v2 = str(Path(__file__).absolute().parent.joinpath("Mpro-v2-zero.tgz")) +# from viewer.models import QuatAssembly + + +test_mpro_v1 = Path(__file__).absolute().parent.joinpath("Mpro-v1-zero.tgz") +test_mpro_v2 = Path(__file__).absolute().parent.joinpath("Mpro-v2-zero.tgz") class LoaderTests(TestCase): tempdir = None target_loader = None - @classmethod - def setUpTestData(cls): - cls.tempdir = TemporaryDirectory(dir=settings.MEDIA_ROOT) - - # set up target loader object - # I'm not calling .name in live code, how is it working there?? - cls.target_loader = TargetLoader(test_mpro_v1, "lb-test", cls.tempdir.name) - - with tarfile.open(cls.target_loader.bundle_path, "r") as archive: - archive.extractall(cls.target_loader.raw_data) - - # because I know where it is - upload_root = Path(cls.target_loader.target_root).joinpath("upload_1") - - cls.assemblies = ( - cls.target_loader._load_yaml( # pylint: disable=protected-access - upload_root.joinpath(ASSEMBLIES_FILE) - ) - ) # pylint: disable=protected-access - - @classmethod - def tearDownClass(cls): - if cls.tempdir: - cls.tempdir.cleanup() - super().tearDownClass() - - @tag("broken") - def test__process_quat_assembly_positive(self): - idx = next(iter(self.assemblies)) - data = self.assemblies[idx] - - assert self.target_loader - result = self.target_loader._process_quat_assembly( # pylint: disable=protected-access - QuatAssembly.objects.none(), idx, data - ) # pylint: disable=protected-access - - self.assertIsInstance( - result, MetadataObjects, "Returned object is not of MetadataObjects type." - ) - self.assertIsInstance( - result.instance, - QuatAssembly, - "Returned instance is not of QuatAssembly type.", - ) - - # result.instance.pk = None - # self.assertEqual(result.instance, quat_assembly, "Returned instance does not match the mock instance.") + # @classmethod + # def setUpTestData(cls): + + # cls.tempdir = TemporaryDirectory(dir=settings.MEDIA_ROOT) + + # # set up target loader object + # # I'm not calling .name in live code, how is it working there?? + # cls.target_loader = TargetLoader(test_mpro_v1, "lb-test", cls.tempdir.name) + + # with tarfile.open(cls.target_loader.bundle_path, "r") as archive: + # archive.extractall(cls.target_loader.raw_data) + + # # because I know where it is + # upload_root = Path(cls.target_loader.target_root).joinpath("upload_1") + + # cls.assemblies = cls.target_loader._load_yaml(upload_root.joinpath(ASSEMBLIES_FILE)) # pylint: disable=protected-access + + # @classmethod + # def tearDownClass(cls): + # cls.tempdir.cleanup() + # super().tearDownClass() + + # def test__process_quat_assembly_positive(self): + + # idx = next(iter(self.assemblies)) + # data = self.assemblies[idx] + + # quat_assembly = QuatAssembly( + # chains=data["chains"], + # name=idx, + # ) + + # result = self.target_loader._process_quat_assembly(QuatAssembly.objects.none(), idx, data) # pylint: disable=protected-access + + # self.assertIsInstance(result, MetadataObject, "Returned object is not of MetadataObjects type.") + # self.assertIsInstance(result.instance, QuatAssembly, "Returned instance is not of QuatAssembly type.") + + # # result.instance.pk = None + # # self.assertEqual(result.instance, quat_assembly, "Returned instance does not match the mock instance.")