From 7c30a223c85fe5a1a2d3f21dda86eec1c2cb8f59 Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Fri, 20 Sep 2024 14:51:27 +0100 Subject: [PATCH 01/17] feat: allow multiple target uploads to different projects Main difference - remove m2m between target and project and add fk to project in target model. Quite a lot of implications to security and target object retrieval. Locally operations know to me (LHS and RHS upload, download) work. Next step, test in my stack --- api/security.py | 7 +- fragalysis/settings.py | 1 + hotspots/views.py | 2 +- hypothesis/views.py | 6 +- media_serve/views.py | 44 +-- scoring/views.py | 4 +- viewer/compound_set_upload.py | 130 --------- viewer/cset_upload.py | 18 +- viewer/forms.py | 32 +-- viewer/migrations/0065_auto_20240919_1502.py | 24 ++ viewer/migrations/0066_auto_20240919_1505.py | 45 +++ .../0067_remove_target_project_id.py | 17 ++ ...68_rename_project_rename_target_project.py | 18 ++ .../migrations/0069_alter_target_project.py | 19 ++ .../migrations/0070_alter_target_project.py | 19 ++ .../0071_target_unique_target_in_project.py | 17 ++ viewer/models.py | 20 +- viewer/permissions.py | 2 +- viewer/serializers.py | 14 +- viewer/tags.py | 10 + viewer/target_loader.py | 14 +- viewer/target_set_upload.py | 4 +- viewer/tasks.py | 4 + viewer/urls.py | 1 - viewer/utils.py | 4 +- viewer/views.py | 256 +++++++++--------- 26 files changed, 361 insertions(+), 371 deletions(-) delete mode 100644 viewer/compound_set_upload.py create mode 100644 viewer/migrations/0065_auto_20240919_1502.py create mode 100644 viewer/migrations/0066_auto_20240919_1505.py create mode 100644 viewer/migrations/0067_remove_target_project_id.py create mode 100644 viewer/migrations/0068_rename_project_rename_target_project.py create mode 100644 viewer/migrations/0069_alter_target_project.py create mode 100644 viewer/migrations/0070_alter_target_project.py create mode 100644 viewer/migrations/0071_target_unique_target_in_project.py diff --git a/api/security.py b/api/security.py index 12cc555d..f0c8bfcd 100644 --- a/api/security.py +++ b/api/security.py @@ -373,17 +373,16 @@ def user_is_member_of_target( """ Returns true if the user has access to any proposal the target belongs to. """ - target_proposals = [p.title for p in target.project_id.all()] user_proposals = self.get_proposals_for_user( user, restrict_public_to_membership=restrict_public_to_membership ) - is_member = any(proposal in user_proposals for proposal in target_proposals) + is_member = target.project.title in user_proposals if not is_member: logger.warning( "Failed membership check user='%s' target='%s' target_proposals=%s", user.username, - target.title, - target_proposals, + target.pk, + target.project.title, ) return is_member diff --git a/fragalysis/settings.py b/fragalysis/settings.py index 3e44409c..8c8ac067 100644 --- a/fragalysis/settings.py +++ b/fragalysis/settings.py @@ -638,6 +638,7 @@ # To simplify error messages when the match fails you can also # add an error message. TAS_REGEX: str = os.environ.get("TAS_REGEX", r"^(lb\d{5})(-(\d+)){0,1}$") + TAS_REGEX_ERROR_MSG: str = os.environ.get( "TAS_REGEX_ERROR_MSG", "Must begin 'lb' followed by 5 digits, optionally followed by a hyphen and a number.", diff --git a/hotspots/views.py b/hotspots/views.py index b7e5b762..f70de9f9 100644 --- a/hotspots/views.py +++ b/hotspots/views.py @@ -7,4 +7,4 @@ class HotspotView(ISPyBSafeQuerySet): queryset = HotspotMap.objects.all() serializer_class = HotspotMapSerializer filterset_fields = ("map_type", "target", "site_observation") - filter_permissions = "target__project_id" + filter_permissions = "target__project" diff --git a/hypothesis/views.py b/hypothesis/views.py index 1872bfa6..14c590de 100644 --- a/hypothesis/views.py +++ b/hypothesis/views.py @@ -21,18 +21,18 @@ class InteractionView(ISPyBSafeQuerySet): "prot_smarts", "mol_smarts", ) - filter_permissions = "interaction_point__targ_res__target_id__project_id" + filter_permissions = "interaction_point__targ_res__target_id__project" class InteractionPointView(ISPyBSafeQuerySet): queryset = InteractionPoint.objects.all() serializer_class = InteractionPointSerializer filterset_fields = ("site_observation", "protein_atom_name", "molecule_atom_name") - filter_permissions = "targ_res__target_id__project_id" + filter_permissions = "targ_res__target_id__project" class TargetResidueView(ISPyBSafeQuerySet): queryset = TargetResidue.objects.all() serializer_class = TargetResidueSerialzier filterset_fields = ("target_id", "res_name", "res_num", "chain_id") - filter_permissions = "target_id__project_id" + filter_permissions = "target_id__project" diff --git a/media_serve/views.py b/media_serve/views.py index ce599aeb..91227155 100644 --- a/media_serve/views.py +++ b/media_serve/views.py @@ -5,28 +5,6 @@ logger = logging.getLogger(__name__) -# def prot_download(request, file_path): -# """ -# Download a protein by nginx redirect -# :param request: the initial request -# :param file_path: the file path we're getting from the static -# :return: the response (a redirect to nginx internal) -# """ -# logger.info("+ Received file_download file path: %s", file_path) -# ispy_b_static = ISpyBSafeStaticFiles2() -# ispy_b_static.model = SiteObservation -# ispy_b_static.request = request -# # ispy_b_static.permission_string = "target_id__project_id" -# # ispy_b_static.permission_string = "target__project_id" -# ispy_b_static.permission_string = "experiment__experiment_upload__target__project_id" -# # ispy_b_static.field_name = "pdb_info" -# ispy_b_static.field_name = "apo_file" -# ispy_b_static.content_type = "application/x-pilot" -# ispy_b_static.prefix = "/pdbs/" -# # ispy_b_static.prefix = "/target_loader_data/" -# ispy_b_static.input_string = file_path -# return ispy_b_static.get_response() - def file_download(request, file_path): """ @@ -40,11 +18,8 @@ def file_download(request, file_path): # ispy_b_static = ISpyBSafeStaticFiles() ispy_b_static.model = SiteObservation ispy_b_static.request = request - # ispy_b_static.permission_string = "target_id__project_id" # the following 2 aren't used atm - ispy_b_static.permission_string = ( - "experiment__experiment_upload__target__project_id" - ) + ispy_b_static.permission_string = "experiment__experiment_upload__target__project" # ispy_b_static.field_name = "pdb_info" ispy_b_static.field_name = "apo_file" ispy_b_static.content_type = "application/x-pilot" @@ -68,11 +43,8 @@ def tld_download(request, file_path): # ispy_b_static = ISpyBSafeStaticFiles() ispy_b_static.model = SiteObservation ispy_b_static.request = request - # ispy_b_static.permission_string = "target_id__project_id" # the following 2 aren't used atm - ispy_b_static.permission_string = ( - "experiment__experiment_upload__target__project_id" - ) + ispy_b_static.permission_string = "experiment__experiment_upload__target__project" ispy_b_static.field_name = "apo_file" ispy_b_static.content_type = "application/x-pilot" ispy_b_static.prefix = "/target_loader_data/" @@ -92,9 +64,7 @@ def cspdb_download(request, file_path): ispy_b_static.model = SiteObservation ispy_b_static.request = request # the following 2 aren't used atm - ispy_b_static.permission_string = ( - "experiment__experiment_upload__target__project_id" - ) + ispy_b_static.permission_string = "experiment__experiment_upload__target__project" ispy_b_static.field_name = "apo_file" ispy_b_static.content_type = "application/x-pilot" ispy_b_static.prefix = "/computed_set_data/" @@ -112,7 +82,7 @@ def bound_download(request, file_path): ispy_b_static = ISPyBSafeStaticFiles() ispy_b_static.model = SiteObservation ispy_b_static.request = request - ispy_b_static.permission_string = "target_id__project_id" + ispy_b_static.permission_string = "target_id__project" ispy_b_static.field_name = "bound_info" ispy_b_static.content_type = "application/x-pilot" ispy_b_static.prefix = "/bound/" @@ -130,7 +100,7 @@ def map_download(request, file_path): ispy_b_static = ISPyBSafeStaticFiles() ispy_b_static.model = SiteObservation ispy_b_static.request = request - ispy_b_static.permission_string = "target_id__project_id" + ispy_b_static.permission_string = "target_id__project" substrings = file_path.split('.')[0].split('_') substring = [x for x in substrings if x in ['2fofc', 'fofc', 'event']] @@ -166,7 +136,7 @@ def metadata_download(request, file_path): ispy_b_static = ISPyBSafeStaticFiles() ispy_b_static.model = Target ispy_b_static.request = request - ispy_b_static.permission_string = "project_id" + ispy_b_static.permission_string = "project" ispy_b_static.field_name = "metadata" ispy_b_static.content_type = "application/x-pilot" ispy_b_static.prefix = "/metadata/" @@ -184,7 +154,7 @@ def archive_download(request, file_path): ispy_b_static = ISPyBSafeStaticFiles() ispy_b_static.model = Target ispy_b_static.request = request - ispy_b_static.permission_string = "project_id" + ispy_b_static.permission_string = "project" ispy_b_static.field_name = "zip_archive" ispy_b_static.content_type = "application/zip" ispy_b_static.prefix = "/targets/" diff --git a/scoring/views.py b/scoring/views.py index 1706b0a8..4fc25e0e 100644 --- a/scoring/views.py +++ b/scoring/views.py @@ -34,7 +34,7 @@ class ViewSceneView( # filter_backends = (filters.DjangoFilterBackend,) serializer_class = ViewSceneSerializer filterset_fields = ("user_id", "uuid") - filter_permissions = "snapshot__session_project__target__project_id" + filter_permissions = "snapshot__session_project__target__project" permission_classes = [IsObjectProposalMember] def put(self, request, *args, **kwargs): @@ -113,7 +113,7 @@ class SiteObservationGroupView( queryset = SiteObservationGroup.objects.all() serializer_class = SiteObservationGroupSerializer filterset_fields = ("group_type", "site_observation", "target", "description") - filter_permissions = "target__project_id" + filter_permissions = "target__project" permission_classes = [IsObjectProposalMember] diff --git a/viewer/compound_set_upload.py b/viewer/compound_set_upload.py deleted file mode 100644 index d1ac566c..00000000 --- a/viewer/compound_set_upload.py +++ /dev/null @@ -1,130 +0,0 @@ -import datetime -import os - -os.environ.setdefault("DJANGO_SETTINGS_MODULE", "fragalysis.settings") -import django - -django.setup() -import os.path - -from django.conf import settings -from rdkit import Chem - -from viewer.models import ( - ComputedSetSubmitter, - ScoreDescription, - SiteObservation, - Target, -) - - -def get_inspiration_frags(cpd, compound_set): - # Don't need... - del cpd - del compound_set - - -def process_pdb(pdb_code, target, zfile, zfile_hashvals): - # pdb_fn = zfile[pdb_code].split('/')[-1] - - if zfile_hashvals: - for key in zfile_hashvals.keys(): - if key == pdb_code: - pdb_code = f'{pdb_code}-{zfile_hashvals[pdb_code]}' - - pdb_fp = zfile[pdb_code] - pdb_fn = zfile[pdb_code].split('/')[-1] - - # Move and save the protein pdb from tmp to pdbs folder - # pdb may have already been moved and Protein object created - - # prot_objs = Protein.objects.filter(code=pdb_code) - # if len(prot_objs) > 0: - # raise Exception(f'Something went wrong with pdb zip upload: {[c.code for c in prot_objs]}') - - ## THIS BIT ISN'T MOVING THE FILES PROPERLY - new_filename = settings.MEDIA_ROOT + 'pdbs/' + pdb_fn - old_filename = settings.MEDIA_ROOT + pdb_fp - os.renames(old_filename, new_filename) - - # Create Protein object - site_obvs = SiteObservation() - site_obvs.code = pdb_code - target_obj = Target.objects.get(title=target) - site_obvs.target_id = target_obj - site_obvs.pdb_info = 'pdbs/' + pdb_fn - site_obvs.save() - - # Get Protein object - prot_obj = SiteObservation.objects.get(code=pdb_code) - - return prot_obj - - -# use zfile object for pdb files uploaded in zip -def get_prot(mol, target, compound_set, zfile, zfile_hashvals=None): - pdb_fn = mol.GetProp('ref_pdb').split('/')[-1] - - if zfile: - pdb_code = pdb_fn.replace('.pdb', '') - site_obvs = process_pdb( - pdb_code=pdb_code, target=target, zfile=zfile, zfile_hashvals=zfile_hashvals - ) - field = site_obvs.pdb_info - - else: - name = compound_set.target.title + '-' + pdb_fn - site_obvs = SiteObservation.objects.get( - code__contains=name.split(':')[0].split('_')[0] - ) - field = site_obvs.pdb_info - - return field - - -def get_submission_info(description_mol): - y_m_d = description_mol.GetProp('generation_date').split('-') - - submitter = ComputedSetSubmitter.objects.get_or_create( - name=description_mol.GetProp('submitter_name'), - method=description_mol.GetProp('method'), - email=description_mol.GetProp('submitter_email'), - institution=description_mol.GetProp('submitter_institution'), - generation_date=datetime.date(int(y_m_d[0]), int(y_m_d[1]), int(y_m_d[2])), - )[0] - - return submitter - - -def get_additional_mols(filename, compound_set): - suppl = Chem.SDMolSupplier(str(filename)) - mols = [] - - for i in range(0, len(suppl)): - mols.append(suppl[i]) - - descriptions_list = list( - set( - [ - item - for sublist in [list(m.GetPropsAsDict().keys()) for m in mols] - for item in sublist - ] - ) - ) - - missing = [] - - for desc in descriptions_list: - existing = ScoreDescription.objects.filter(computed_set=compound_set, name=desc) - if len(existing) == 0 and desc not in [ - 'original SMILES', - 'ref_mols', - 'ref_pdb', - ]: - missing.append(desc) - - if len(missing) > 0: - return f"Missing score descriptions for: {', '.join(missing)}, please re-upload" - - return mols diff --git a/viewer/cset_upload.py b/viewer/cset_upload.py index cad958b1..4db5f1a6 100644 --- a/viewer/cset_upload.py +++ b/viewer/cset_upload.py @@ -140,6 +140,7 @@ def __init__( zfile, zfile_hashvals, computed_set_name, + project_name, ): self.user_id = user_id self.sdf_filename = sdf_filename @@ -150,6 +151,7 @@ def __init__( self.zfile = zfile self.zfile_hashvals = zfile_hashvals self.computed_set_name = computed_set_name + self.project_name = project_name def process_pdb(self, pdb_code, zfile, zfile_hashvals) -> str | None: for key in zfile_hashvals.keys(): @@ -293,17 +295,7 @@ def create_mol(self, inchi, target, name=None) -> Compound: cpd.save() # This is a new compound. # We must now set relationships to the Proposal that it applies to. - # We do this by copying the relationships from the Target. - num_target_proposals = len(target.project_id.all()) - assert num_target_proposals > 0 - if num_target_proposals > 1: - logger.warning( - 'Compound Target %s has more than one Proposal (%d)', - target.title, - num_target_proposals, - ) - for project in target.project_id.all(): - cpd.project_id.add(project) + cpd.project_id.add(target.project) except MultipleObjectsReturned as exc: # NB! when processing new uploads, Compound is always # fetched by inchi_key, so this shouldn't ever create @@ -628,7 +620,9 @@ def task(self) -> ComputedSet: today: datetime.date = datetime.date.today() new_ordinal: int = 1 try: - target = Target.objects.get(title=self.target) + target = Target.objects.get( + title=self.target, project=self.project_name + ) except Target.DoesNotExist as exc: # probably wrong target name supplied logger.error('Target %s does not exist', self.target) diff --git a/viewer/forms.py b/viewer/forms.py index 086b2994..2e26f4f8 100644 --- a/viewer/forms.py +++ b/viewer/forms.py @@ -1,5 +1,4 @@ from django import forms -from django.conf import settings CSET_CHOICES = [ ('V', 'Validate'), @@ -16,6 +15,7 @@ class CSetForm(forms.Form): """Used for uploading Computed sets at viewer/upload_cset""" + proposal_ref = forms.CharField(required=False, label='Proposal', max_length=200) target_name = forms.CharField(label='Target', max_length=100, required=False) sdf_file = forms.FileField(label='All compounds sdf (.sdf)', required=False) pdb_zip = forms.FileField(label='PDB files (.zip)', required=False) @@ -23,33 +23,3 @@ class CSetForm(forms.Form): # The upload_key was removed as part of development if issue-859, # the phase 2 Job launcher, # The user-specific 'upload_key' is/was key generated by `viewer.views.cset_key` - - -class CSetUpdateForm(forms.Form): - """Used for updating Computed sets at viewer/update_cset""" - - target_name = forms.CharField(label='Target', max_length=100) - sdf_file = forms.FileField(label='All compounds sdf (.sdf)') - pdb_zip = forms.FileField(required=False, label='PDB files (.zip)') - - -class TSetForm(forms.Form): - """Used for uploading Target sets at viewer/upload_tset""" - - target_name = forms.CharField(label='Target', max_length=100) - target_zip = forms.FileField(required=True, label='Target data (.zip)') - submit_choice = forms.CharField(widget=forms.RadioSelect(choices=TSET_CHOICES)) - - # For Diamond, a proposal is always required. - # For other implementations, it may be optional or omitted. - if settings.PROPOSAL_SUPPORTED and settings.PROPOSAL_REQUIRED: - proposal_ref = forms.CharField(required=True, label='Proposal', max_length=200) - elif settings.PROPOSAL_SUPPORTED: - proposal_ref = forms.CharField(required=False, label='Proposal', max_length=200) - else: - proposal_ref = '' - - contact_email = forms.EmailField( - widget=forms.TextInput(attrs={'class': 'form-control', 'autocomplete': 'off'}), - required=False, - ) diff --git a/viewer/migrations/0065_auto_20240919_1502.py b/viewer/migrations/0065_auto_20240919_1502.py new file mode 100644 index 00000000..3837e091 --- /dev/null +++ b/viewer/migrations/0065_auto_20240919_1502.py @@ -0,0 +1,24 @@ +# Generated by Django 3.2.25 on 2024-09-19 15:02 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('viewer', '0064_auto_20240918_1256'), + ] + + operations = [ + migrations.AddField( + model_name='target', + name='project_rename', + field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE, related_name='target_rename', to='viewer.project'), + ), + migrations.AlterField( + model_name='target', + name='title', + field=models.CharField(help_text='A title, i.e. Mpro', max_length=200), + ), + ] diff --git a/viewer/migrations/0066_auto_20240919_1505.py b/viewer/migrations/0066_auto_20240919_1505.py new file mode 100644 index 00000000..5001eef2 --- /dev/null +++ b/viewer/migrations/0066_auto_20240919_1505.py @@ -0,0 +1,45 @@ +# Generated by Django 3.2.25 on 2024-09-19 15:05 + + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('viewer', '0065_auto_20240919_1502'), + ] + + def restruct_projects(apps, schema_editor): + Project = apps.get_model('viewer', 'Project') + Target = apps.get_model('viewer', 'Target') + + # atm, target has m2m to project + # added foreign key to project to target model + # go through projects and set the correct key + + # NB! this schema allows target to be in multiple + # projects. with new schema this is not possible + + for target in Target.objects.all(): + projects = target.project_id.all() + if projects.count() > 1: + # this shouldn't exist, but if it for some reason + # does, make some noise + raise ValueError( + f'Target id={target.pk} is under multiple projects!' + + ' Resolve conflict before re-running migration.', + ) + else: + target.project_rename = projects[0] + target.save() + + + + # This is unlikely to be possible and even less likely ever required + def destruct_projects(apps, schema_editor): + pass + + operations = [ + migrations.RunPython(restruct_projects, destruct_projects) + ] diff --git a/viewer/migrations/0067_remove_target_project_id.py b/viewer/migrations/0067_remove_target_project_id.py new file mode 100644 index 00000000..94dcaf40 --- /dev/null +++ b/viewer/migrations/0067_remove_target_project_id.py @@ -0,0 +1,17 @@ +# Generated by Django 3.2.25 on 2024-09-19 15:26 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('viewer', '0066_auto_20240919_1505'), + ] + + operations = [ + migrations.RemoveField( + model_name='target', + name='project_id', + ), + ] diff --git a/viewer/migrations/0068_rename_project_rename_target_project.py b/viewer/migrations/0068_rename_project_rename_target_project.py new file mode 100644 index 00000000..f251a42f --- /dev/null +++ b/viewer/migrations/0068_rename_project_rename_target_project.py @@ -0,0 +1,18 @@ +# Generated by Django 3.2.25 on 2024-09-19 15:30 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('viewer', '0067_remove_target_project_id'), + ] + + operations = [ + migrations.RenameField( + model_name='target', + old_name='project_rename', + new_name='project', + ), + ] diff --git a/viewer/migrations/0069_alter_target_project.py b/viewer/migrations/0069_alter_target_project.py new file mode 100644 index 00000000..a8d943cf --- /dev/null +++ b/viewer/migrations/0069_alter_target_project.py @@ -0,0 +1,19 @@ +# Generated by Django 3.2.25 on 2024-09-19 15:33 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('viewer', '0068_rename_project_rename_target_project'), + ] + + operations = [ + migrations.AlterField( + model_name='target', + name='project', + field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE, to='viewer.project'), + ), + ] diff --git a/viewer/migrations/0070_alter_target_project.py b/viewer/migrations/0070_alter_target_project.py new file mode 100644 index 00000000..376dc956 --- /dev/null +++ b/viewer/migrations/0070_alter_target_project.py @@ -0,0 +1,19 @@ +# Generated by Django 3.2.25 on 2024-09-19 15:41 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('viewer', '0069_alter_target_project'), + ] + + operations = [ + migrations.AlterField( + model_name='target', + name='project', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='viewer.project'), + ), + ] diff --git a/viewer/migrations/0071_target_unique_target_in_project.py b/viewer/migrations/0071_target_unique_target_in_project.py new file mode 100644 index 00000000..40bba019 --- /dev/null +++ b/viewer/migrations/0071_target_unique_target_in_project.py @@ -0,0 +1,17 @@ +# Generated by Django 3.2.25 on 2024-09-20 10:38 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('viewer', '0070_alter_target_project'), + ] + + operations = [ + migrations.AddConstraint( + model_name='target', + constraint=models.UniqueConstraint(fields=('title', 'project'), name='unique_target_in_project'), + ), + ] diff --git a/viewer/models.py b/viewer/models.py index e104399a..68626b7e 100644 --- a/viewer/models.py +++ b/viewer/models.py @@ -77,13 +77,12 @@ class Target(models.Model): (REVOKED, 'REVOKED'), ) - title = models.CharField( - unique=True, max_length=200, help_text="A title, i.e. Mpro" - ) + title = models.CharField(max_length=200, help_text="A title, i.e. Mpro") display_name = models.TextField(null=False, blank=True) init_date = models.DateTimeField(auto_now_add=True) - project_id = models.ManyToManyField(Project) + project = models.ForeignKey(Project, on_delete=models.CASCADE) + uniprot_id = models.CharField( max_length=100, null=True, @@ -131,9 +130,20 @@ def __repr__(self) -> str: self.id, self.title, self.display_name, - self.project_id, + self.project, ) + class Meta: + constraints = [ + models.UniqueConstraint( + fields=[ + "title", + "project", + ], + name="unique_target_in_project", + ), + ] + class ExperimentUpload(models.Model): LOADING = "LOADING" diff --git a/viewer/permissions.py b/viewer/permissions.py index b9541013..d105a6fb 100644 --- a/viewer/permissions.py +++ b/viewer/permissions.py @@ -34,7 +34,7 @@ def has_object_permission(self, request, view, obj): ) # The object's proposal records (one or many) can be obtained via # the view's 'filter_permissions' property. A standard - # django property reference, e.g. 'target__project_id'. + # django property reference, e.g. 'target__project'. object_proposals = [] attr_value = getattr(obj, view.filter_permissions) diff --git a/viewer/serializers.py b/viewer/serializers.py index 5d070565..2154f562 100644 --- a/viewer/serializers.py +++ b/viewer/serializers.py @@ -83,12 +83,7 @@ def validate(self, data): raise serializers.ValidationError(msg) from exc assert project_obj # Now get the proposals from the Project(s)... - if project_obj.__class__.__name__ == "ManyRelatedManager": - # Potential for many proposals... - object_proposals = [p.title for p in project_obj.all()] - else: - # Only one proposal... - object_proposals = [project_obj.title] + object_proposals = project_obj.values_list('title', flat=True) if not object_proposals: raise PermissionDenied( detail="Authority cannot be granted - the object is not a part of any Project" @@ -216,7 +211,7 @@ class Meta: "id", "title", "display_name", - "project_id", + "project", "default_squonk_project", "template_protein", "metadata", @@ -226,7 +221,7 @@ class Meta: extra_kwargs = { "id": {"read_only": True}, "title": {"read_only": True}, - "project_id": {"read_only": True}, + "project": {"read_only": True}, "default_squonk_project": {"read_only": True}, "template_protein": {"read_only": True}, "metadata": {"read_only": True}, @@ -781,6 +776,9 @@ class Meta: class DownloadStructuresSerializer(serializers.Serializer): target_name = serializers.CharField(max_length=200, default=None, allow_blank=True) + target_access_string = serializers.CharField( + max_length=200, default=None, allow_blank=True + ) proteins = serializers.CharField(max_length=5000, default='', allow_blank=True) all_aligned_structures = serializers.BooleanField(default=False) pdb_info = serializers.BooleanField(default=False) diff --git a/viewer/tags.py b/viewer/tags.py index c0e30b69..086ebd0f 100644 --- a/viewer/tags.py +++ b/viewer/tags.py @@ -298,6 +298,16 @@ def load_tags_from_file(filename: str, target: Target) -> list[str]: # type: ig logger.debug('so_df %s', so_df) so_qs = qs.filter(longcode__in=so_df) logger.debug('pose so queryset: %s', so_qs) + + if not so_qs.exists(): + msg = ( + f'No observations found for pose {pose_name}.' + + ' Most likely upload data is incompatible with target data.' + ) + logger.error(msg) + errors.append(msg) + continue + try: main_so = so_qs.get(code=pose_name) except SiteObservation.DoesNotExist: diff --git a/viewer/target_loader.py b/viewer/target_loader.py index fb61054c..dfea7039 100644 --- a/viewer/target_loader.py +++ b/viewer/target_loader.py @@ -1502,9 +1502,16 @@ def process_bundle(self): self.report.log(logging.ERROR, msg) raise KeyError(msg) from exc + # project needs to be created before target + # TODO: original target loader's function get_create_projects + # seems to handle more cases. adopt or copy + visit = self.proposal_ref.split()[0] + self.project, project_created = Project.objects.get_or_create(title=visit) + self.target, target_created = Target.objects.get_or_create( title=self.target_name, display_name=self.target_name, + project=self.project, ) if target_created: @@ -1522,11 +1529,6 @@ def process_bundle(self): self._final_path = self._final_path.joinpath(target_dir) self._abs_final_path = self._abs_final_path.joinpath(target_dir) - # TODO: original target loader's function get_create_projects - # seems to handle more cases. adopt or copy - visit = self.proposal_ref.split()[0] - self.project, project_created = Project.objects.get_or_create(title=visit) - try: committer = get_user_model().objects.get(pk=self.user_id) except get_user_model().DoesNotExist: @@ -1552,9 +1554,7 @@ def process_bundle(self): self.project.open_to_public = True self.project.save() - # populate m2m field assert self.target - self.target.project_id.add(self.project) # check transformation matrix files ( # pylint: disable=unbalanced-tuple-unpacking diff --git a/viewer/target_set_upload.py b/viewer/target_set_upload.py index bc4d9cc1..adec9866 100644 --- a/viewer/target_set_upload.py +++ b/viewer/target_set_upload.py @@ -397,8 +397,8 @@ def get_create_projects(target, proposal_ref, proposal_code='lb'): # If not open then delete users for the project and re-add them based on supplied fed-ids. delete_users(project) - # Update project_id on target. - target.project_id.add(project) + # Update project on target. + target.project = project # The remaining words in proposal_ref (if any) # are expected to be fedid's (user IDs) which are used to find user information. diff --git a/viewer/tasks.py b/viewer/tasks.py index 2274fdd4..d5a238ed 100644 --- a/viewer/tasks.py +++ b/viewer/tasks.py @@ -102,6 +102,7 @@ def process_compound_set(validate_output): zfile=zfile, zfile_hashvals=zfile_hashvals, computed_set_name=computed_set_name, + project_name=params['proposal_ref'], ) compound_set = save_mols.task() @@ -158,6 +159,7 @@ def validate_compound_set(task_params): user_id = task_params['user_id'] sdf_file = task_params['sdf_file'] target = task_params['target'] + proposal_ref = task_params['proposal_ref'] # Optional params zfile = task_params.get('zfile') update = task_params.get('update') @@ -166,6 +168,7 @@ def validate_compound_set(task_params): logger.info('validate_compound_set() user_id=%s', user_id) logger.info('validate_compound_set() sdf_file=%s', sdf_file) logger.info('validate_compound_set() target=%s', target) + logger.info('validate_compound_set() proposal_ref=%s', proposal_ref) logger.info('validate_compound_set() update=%s', update) validated = True @@ -187,6 +190,7 @@ def validate_compound_set(task_params): 'user_id': user_id, 'sdf': sdf_file, 'target': target, + 'proposal_ref': proposal_ref, 'pdb_zip': zfile, 'update': update, } diff --git a/viewer/urls.py b/viewer/urls.py index fe7e909f..62ace351 100644 --- a/viewer/urls.py +++ b/viewer/urls.py @@ -25,7 +25,6 @@ path("img_from_smiles/", views.img_from_smiles, name="img_from_smiles"), path("highlight_mol_diff/", views.highlight_mol_diff, name="highlight_mol_diff"), path("open_targets/", views.get_open_targets, name="get_open_targets"), - path("compound_set//", views.computed_set_download, name="compound_set"), path("upload_designs/", views.DesignSetUploadView.as_view(), name="upload_designs"), path("job_access/", views.JobAccessView.as_view(), name="job_access"), path( diff --git a/viewer/utils.py b/viewer/utils.py index 748f5530..e915f452 100644 --- a/viewer/utils.py +++ b/viewer/utils.py @@ -302,7 +302,7 @@ def restore_curated_tags(filename: str) -> None: ) for data in so_group_data: try: - target = targets.get(title=data['ann_target_name']) + target = targets.get(pk=data['target_id']) except Target.DoesNotExist: logger.warning( 'Tried to restore SiteObservationGroup for target that does not exist: %s', @@ -324,7 +324,7 @@ def restore_curated_tags(filename: str) -> None: ) for data in so_tag_data: try: - target = targets.get(title=data['ann_target_name']) + target = targets.get(pk=data['target_id']) except Target.DoesNotExist: logger.warning( 'Tried to restore SiteObservationTag for target that does not exist: %s', diff --git a/viewer/views.py b/viewer/views.py index b59cfc49..9c1a43a3 100644 --- a/viewer/views.py +++ b/viewer/views.py @@ -179,47 +179,16 @@ def get_open_targets(request): open_proposals: set = _ISPYB_SAFE_QUERY_SET.get_open_proposals() for t in targets: - for p in t.project_id.all(): - if p.title in open_proposals: - target_names.append(t.title) - target_ids.append(t.id) - break + if t.project.title in open_proposals: + target_names.append(t.title) + target_ids.append(t.id) + break return HttpResponse( json.dumps({'target_names': target_names, 'target_ids': target_ids}) ) -def computed_set_download(request, name): - """View to download an SDF file of a ComputedSet by name - (viewer/compound_set/()). - """ - # Unused arguments - del request - - computed_set = models.ComputedSet.objects.get(name=name) - if not computed_set: - return HttpResponse(status=status.HTTP_404_NOT_FOUND) - # Is the computed set available to the user? - if not _ISPYB_SAFE_QUERY_SET.user_is_member_of_target( - request.user, computed_set.target - ): - return HttpResponse( - "You are not a member of the CompoundSet's Target Proposal", - status=status.HTTP_403_FORBIDDEN, - ) - - written_filename = computed_set.written_sdf_filename - with open(written_filename, 'r', encoding='utf-8') as wf: - data = wf.read() - response = HttpResponse(content_type='text/plain') - response[ - 'Content-Disposition' - ] = f'attachment; filename={name}.sdf' # force browser to download file - response.write(data) - return response - - # ----------------- # CLASS-BASED VIEWS # ----------------- @@ -250,7 +219,7 @@ class VectorsView(ISPyBSafeQuerySet): queryset = models.SiteObservation.filter_manager.filter_qs() serializer_class = serializers.VectorsSerializer filterset_class = filters.VectorFilter - filter_permissions = "experiment__experiment_upload__target__project_id" + filter_permissions = "experiment__experiment_upload__target__project" class MolecularPropertiesView(ISPyBSafeQuerySet): @@ -259,7 +228,7 @@ class MolecularPropertiesView(ISPyBSafeQuerySet): queryset = models.Compound.filter_manager.filter_qs() serializer_class = serializers.MolpropsSerializer filterset_class = filters.MolpropsFilter - filter_permissions = "experiment__experiment_upload__target__project_id" + filter_permissions = "experiment__experiment_upload__target__project" class GraphView(ISPyBSafeQuerySet): @@ -268,7 +237,7 @@ class GraphView(ISPyBSafeQuerySet): queryset = models.SiteObservation.filter_manager.filter_qs() serializer_class = serializers.GraphSerializer filterset_class = filters.GraphFilter - filter_permissions = "experiment__experiment_upload__target__project_id" + filter_permissions = "experiment__experiment_upload__target__project" class MolImageView(ISPyBSafeQuerySet): @@ -277,7 +246,7 @@ class MolImageView(ISPyBSafeQuerySet): queryset = models.SiteObservation.objects.filter() serializer_class = serializers.MolImageSerializer filterset_class = filters.MolImgFilter - filter_permissions = "experiment__experiment_upload__target__project_id" + filter_permissions = "experiment__experiment_upload__target__project" class CompoundImageView(ISPyBSafeQuerySet): @@ -294,7 +263,7 @@ class ProteinMapInfoView(ISPyBSafeQuerySet): queryset = models.SiteObservation.objects.all() serializer_class = serializers.ProtMapInfoSerializer - filter_permissions = "experiment__experiment_upload__target__project_id" + filter_permissions = "experiment__experiment_upload__target__project" filterset_fields = ( "code", "experiment__experiment_upload__target", @@ -307,7 +276,7 @@ class ProteinPDBInfoView(ISPyBSafeQuerySet): queryset = models.SiteObservation.objects.all() serializer_class = serializers.ProtPDBInfoSerializer - filter_permissions = "experiment__experiment_upload__target__project_id" + filter_permissions = "experiment__experiment_upload__target__project" filterset_fields = ( "code", "experiment__experiment_upload__target", @@ -320,7 +289,7 @@ class ProteinPDBBoundInfoView(ISPyBSafeQuerySet): queryset = models.SiteObservation.filter_manager.filter_qs() serializer_class = serializers.ProtPDBBoundInfoSerializer - filter_permissions = "experiment__experiment_upload__target__project_id" + filter_permissions = "experiment__experiment_upload__target__project" filterset_fields = ( "code", "experiment__experiment_upload__target", @@ -340,7 +309,7 @@ class ProjectView(ISPyBSafeQuerySet): class TargetView(mixins.UpdateModelMixin, ISPyBSafeQuerySet): queryset = models.Target.objects.filter() serializer_class = serializers.TargetSerializer - filter_permissions = "project_id" + filter_permissions = "project" filterset_fields = ("title",) permission_classes = [IsObjectProposalMember] @@ -431,7 +400,8 @@ def post(self, request): choice = request.POST['submit_choice'] # The 'sdf_file' and 'target_name' are only required for upload/update sdf_file = request.FILES.get('sdf_file') - target = request.POST.get('target_name') + target_name = request.POST.get('target_name') + proposal_ref = request.POST.get('proposal_ref') update_set = request.POST.get('update_set') user = self.request.user @@ -439,7 +409,7 @@ def post(self, request): '+ UploadComputedSetView POST user.id=%s choice="%s" target="%s" update_set="%s"', user.id, choice, - target, + target_name, update_set, ) @@ -458,17 +428,18 @@ def post(self, request): ) return redirect('viewer:upload_cset') - # If validating or uploading we need a Target and SDF file. + # If validating or uploading we need a Target, and SDF file. # If updating or deleting we need an update set (that's not 'None') if choice in ['V', 'U']: - if sdf_file is None or target is None: - request.session[_SESSION_ERROR] = ( + if sdf_file is None or target_name is None or not proposal_ref: + err_msg = ( 'To Validate or Upload' - ' you must provide a Target and SDF file' + + ' you must provide a Target, Proposal ref. and SDF file' ) + request.session[_SESSION_ERROR] = err_msg logger.warning( '- UploadComputedSetView POST error_msg="%s"', - request.session[_SESSION_ERROR], + err_msg, ) return redirect('viewer:upload_cset') elif choice in ['D']: @@ -482,6 +453,36 @@ def post(self, request): ) return redirect('viewer:upload_cset') + try: + project = models.Project.objects.get(title=proposal_ref) + except models.Project.DoesNotExist: + request.session[_SESSION_ERROR] = f'Proposal {proposal_ref} not found' + logger.warning( + '- UploadComputedSetView POST error_msg="%s"', + request.session[_SESSION_ERROR], + ) + return redirect('viewer:upload_cset') + + if ( + settings.AUTHENTICATE_UPLOAD + and project.title + not in _ISPYB_SAFE_QUERY_SET.get_proposals_for_user(request.user) + ): + context = { + 'error_message': f"You cannot load compound sets for '{target_name}'. You are not a member of any of its Proposals" + } + return render(request, 'viewer/upload-cset.html', context) + + try: + target = models.Target.objects.get(title=target_name, project=project) + except models.Target.DoesNotExist: + request.session[_SESSION_ERROR] = f'Target {target_name} not found' + logger.warning( + '- UploadComputedSetView POST error_msg="%s"', + request.session[_SESSION_ERROR], + ) + return redirect('viewer:upload_cset') + # If uploading (updating) or deleting # the set owner cannot be anonymous # and the user needs to be the owner @@ -502,25 +503,6 @@ def post(self, request): request.session[_SESSION_ERROR], ) return redirect('viewer:upload_cset') - # You cannot validate or upload a set - # unless the user is part of the Target's project (proposal) - # even if the target is 'open'. - if choice in ['V', 'U'] and settings.AUTHENTICATE_UPLOAD: - assert target - context = {} - # The target must be part of a proposal that the user is a member of. - target_record = models.Target.objects.filter(title=target).first() - if not target_record: - context['error_message'] = f'Unknown Target ({target})' - return render(request, 'viewer/upload-cset.html', context) - # What proposals is the user a member of? - if not _ISPYB_SAFE_QUERY_SET.user_is_member_of_target( - user, target_record - ): - context[ - 'error_message' - ] = f"You cannot load compound sets for '{target}'. You are not a member of any of its Proposals" - return render(request, 'viewer/upload-cset.html', context) # Save uploaded sdf and zip to tmp storage tmp_pdb_file = None @@ -547,6 +529,7 @@ def post(self, request): 'user_id': user.id, 'sdf_file': tmp_sdf_file, 'target': target, + 'proposal_ref': proposal_ref, } if tmp_pdb_file: task_params['zfile'] = tmp_pdb_file @@ -579,6 +562,7 @@ def post(self, request): 'user_id': user.id, 'sdf_file': tmp_sdf_file, 'target': target, + 'proposal_ref': proposal_ref, } if tmp_pdb_file: task_params['zfile'] = tmp_pdb_file @@ -716,22 +700,25 @@ def get(self, request, validate_task_id): # [3] comes from task in tasks.py, 4th element in task payload tuple task_data = results[3] - if isinstance(task_data, dict) and 'target' in task_data.keys(): - target_name = task_data['target'] + if isinstance(task_data, dict) and 'proposal_ref' in task_data.keys(): + project_name = task_data['proposal_ref'] try: - target = models.Target.objects.get(title=target_name) - if not _ISPYB_SAFE_QUERY_SET.user_is_member_of_target( - request.user, target - ): - return Response( - {'error': "You are not a member of the Target's proposal"}, - status=status.HTTP_403_FORBIDDEN, - ) - except models.Target.DoesNotExist: - # the name is filled from db, so this not existing would be extraordinary - logger.error('Target %s not found', target_name) + project = models.Project.objects.get(title=project_name) + + except models.Project.DoesNotExist: + logger.error('Project %s not found', project_name) + return Response( + {'error': f'Project {project_name} not found'}, + status=status.HTTP_403_FORBIDDEN, + ) + + if project.title not in _ISPYB_SAFE_QUERY_SET.get_proposals_for_user( + request.user + ): return Response( - {'error': f'Target {target_name} not found'}, + { + 'error': "You are not a member of the proposal {project_name}" + }, status=status.HTTP_403_FORBIDDEN, ) @@ -918,7 +905,7 @@ class SessionProjectView( """ queryset = models.SessionProject.objects.filter() - filter_permissions = "target__project_id" + filter_permissions = "target__project" filterset_fields = '__all__' def get_serializer_class(self): @@ -939,7 +926,7 @@ class SessionActionsView( """ queryset = models.SessionActions.filter_manager.filter_qs() - filter_permissions = "session_project__target__project_id" + filter_permissions = "session_project__target__project" serializer_class = serializers.SessionActionsSerializer # Note: jsonField for Actions will need specific queries - can introduce if needed. @@ -957,7 +944,7 @@ class SnapshotView( """ queryset = models.Snapshot.filter_manager.filter_qs() - filter_permissions = "session_project__target__project_id" + filter_permissions = "session_project__target__project" filterset_class = filters.SnapshotFilter def get_serializer_class(self): @@ -978,7 +965,7 @@ class SnapshotActionsView( """ queryset = models.SnapshotActions.filter_manager.filter_qs() - filter_permissions = "snapshot__session_project__target__project_id" + filter_permissions = "snapshot__session_project__target__project" serializer_class = serializers.SnapshotActionsSerializer # Note: jsonField for Actions will need specific queries - can introduce if needed. @@ -1063,7 +1050,7 @@ class ComputedSetView( queryset = models.ComputedSet.objects.filter() serializer_class = serializers.ComputedSetSerializer - filter_permissions = "target__project_id" + filter_permissions = "target__project" filterset_fields = ('target', 'target__title') permission_classes = [IsObjectProposalMember] @@ -1115,7 +1102,7 @@ class CompoundScoresView(ISPyBSafeQuerySet): queryset = models.ScoreDescription.objects.all() serializer_class = serializers.ScoreDescriptionSerializer - filter_permissions = "computed_set__target__project_id" + filter_permissions = "computed_set__target__project" filterset_fields = ('computed_set', 'name') @@ -1300,7 +1287,7 @@ class SiteObservationTagView( """Set up/retrieve information about tags relating to Molecules (api/molecule_tag)""" queryset = models.SiteObservationTag.objects.all() - filter_permissions = "target__project_id" + filter_permissions = "target__project" serializer_class = serializers.SiteObservationTagSerializer filterset_fields = ( 'id', @@ -1335,7 +1322,7 @@ class SessionProjectTagView( """Set up/retrieve information about tags relating to Session Projects.""" queryset = models.SessionProjectTag.objects.all() - filter_permissions = "target__project_id" + filter_permissions = "target__project" serializer_class = serializers.SessionProjectTagSerializer filterset_fields = ('id', 'tag', 'category', 'target', 'session_projects') @@ -1353,7 +1340,7 @@ class DownloadStructuresView( queryset = models.Target.objects.all() serializer_class = serializers.DownloadStructuresSerializer - filter_permissions = "project_id" + filter_permissions = "project" filterset_fields = ('title', 'id') def list(self, request): @@ -1416,6 +1403,12 @@ def create(self, request): return Response(content, status=status.HTTP_400_BAD_REQUEST) # Dynamic files + if 'target_access_string' not in request.data: + content = { + 'message': 'If no file_url, a target_access_string must be provided' + } + return Response(content, status=status.HTTP_400_BAD_REQUEST) + if 'target_name' not in request.data: content = { 'message': 'If no file_url, a target_name (title) must be provided' @@ -1423,28 +1416,36 @@ def create(self, request): return Response(content, status=status.HTTP_400_BAD_REQUEST) target_name = request.data['target_name'] + project_name = request.data['target_access_string'] target = None logger.info('Given target_name "%s"', target_name) # Check target_name is valid: try: - target = self.queryset.get(title=target_name) - except models.Target.DoesNotExist: - msg = f'Either the Target "{target_name}" is not present or you are not permitted access it' + project = models.Project.objects.get(title=project_name) + except models.Project.DoesNotExist: + msg = f'Project "{project_name}" does not exist' logger.warning(msg) content = {'message': msg} return Response(content, status=status.HTTP_404_NOT_FOUND) - logger.info('Found Target record %r', target) - # Is the user part of the target's proposal? - # (or is it a public target?) - if not _ISPYB_SAFE_QUERY_SET.user_is_member_of_target( - request.user, target, restrict_public_to_membership=False + if project.title not in _ISPYB_SAFE_QUERY_SET.get_proposals_for_user( + request.user ): - msg = 'You have not been given access to this Target' + msg = f'User "{request.user.username}" is not a member of {project_name}' logger.warning(msg) content = {'message': msg} - return Response(content, status=status.HTTP_403_FORBIDDEN) + return Response(content, status=status.HTTP_404_NOT_FOUND) + + try: + target = self.queryset.get(title=target_name, project=project) + except models.Target.DoesNotExist: + msg = f'Target "{target_name}" does not exist under project {project.title}' + logger.warning(msg) + content = {'message': msg} + return Response(content, status=status.HTTP_404_NOT_FOUND) + + logger.info('Found Target record %r', target) proteins_list = [ p.strip() for p in request.data.get('proteins', '').split(',') if p @@ -1652,15 +1653,15 @@ def create(self, request, *args, **kwargs): # To permit a download the user must be a member of the target's proposal # (or the proposal must be open) project: models.Project = serializer.validated_data['project'] - if not _ISPYB_SAFE_QUERY_SET.user_is_member_of_any_given_proposals( - request.user, [project.title], restrict_public_to_membership=False + if project.title not in _ISPYB_SAFE_QUERY_SET.get_proposals_for_user( + request.user, restrict_public_to_membership=False ): return Response( {'error': "You have no access to the Project"}, status=status.HTTP_403_FORBIDDEN, ) target: models.Target = serializer.validated_data['target'] - if project not in target.project_id.all(): + if project != target.project.title: return Response( {'error': "The Target is not part of the given Project"}, status=status.HTTP_403_FORBIDDEN, @@ -1719,7 +1720,7 @@ class ExperimentUploadView(ISPyBSafeQuerySet): serializer_class = serializers.TargetExperimentReadSerializer permission_class = [permissions.IsAuthenticated] filterset_fields = ("target", "project") - filter_permissions = "target__project_id" + filter_permissions = "target__project" http_method_names = ('get',) @@ -1766,7 +1767,7 @@ class JobFileTransferView(viewsets.ModelViewSet): """Squonk Job file transfer (api/job_file_transfer)""" queryset = models.JobFileTransfer.objects.filter() - filter_permissions = "target__project_id" + filter_permissions = "target__project" filterset_fields = ( 'id', 'snapshot', @@ -2559,26 +2560,31 @@ def create(self, request, *args, **kwargs): target_name = serializer.validated_data['target'] filename = serializer.validated_data['filename'] - if settings.AUTHENTICATE_UPLOAD: - user = self.request.user - if not user.is_authenticated: - return redirect(settings.LOGIN_URL) - else: - if target_access_string not in self.get_proposals_for_user( - user, restrict_public_to_membership=True - ): - return Response( - { - "target_access_string": [ - f"You are not authorized to upload metadatadata to '{target_access_string}'" - ] - }, - status=status.HTTP_403_FORBIDDEN, - ) + try: + project = models.Project.objects.get(title=target_access_string) + except models.Project.DoesNotExist: + return Response( + { + "target_access_string": [ + f"Project {target_access_string} does not exist" + ] + }, + status=status.HTTP_400_BAD_REQUEST, + ) + + if project.title not in _ISPYB_SAFE_QUERY_SET.get_proposals_for_user( + request.user + ): + msg = f'User "{request.user.username}" is not a member of {target_access_string}' + logger.warning(msg) + content = {'message': msg} + return Response(content, status=status.HTTP_404_NOT_FOUND) + + if settings.AUTHENTICATE_UPLOAD and not self.request.user.is_authenticated: + return redirect(settings.LOGIN_URL) - # TODO: add TAS to lookup (when ready) try: - target = models.Target.objects.get(title=target_name) + target = models.Target.objects.get(title=target_name, project=project) except models.Target.DoesNotExist: return Response( {"target": [f"Target {target_name} does not exist"]}, From 79f6e493fe24f664015e30c8349969148a82ac95 Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Thu, 26 Sep 2024 08:16:46 +0100 Subject: [PATCH 02/17] stashing --- viewer/cset_upload.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/viewer/cset_upload.py b/viewer/cset_upload.py index cad958b1..043f8dcd 100644 --- a/viewer/cset_upload.py +++ b/viewer/cset_upload.py @@ -1,4 +1,5 @@ import ast +import copy import datetime import logging import os @@ -354,6 +355,10 @@ def set_mol( inchi = Chem.inchi.MolToInchi(mol) molecule_name = mol.GetProp('_Name') + flattened_copy = copy.deepcopy(mol) + Chem.RemoveStereochemistry(mol) + flat_inchi = Chem.inchi.MolToInchi(flattened_copy) + compound: Compound = self.create_mol( inchi, compound_set.target, name=molecule_name ) @@ -442,6 +447,11 @@ def set_mol( existing_computed_molecules = [] for k in qs: + if k.compound.inchi_key == flat_inchi: + # existing compound is a flattened copy of the new one, match found + existing_computed_molecules.append(k) + continue + kmol = Chem.MolFromMolBlock(k.sdf_info) if kmol: # find distances between corresponding atoms of the From d190bfe6cd64f566ccf489653ac08d55b3b31b68 Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Fri, 27 Sep 2024 08:26:21 +0100 Subject: [PATCH 03/17] stashing --- viewer/cset_upload.py | 16 +++++++--------- viewer/tasks.py | 4 ---- viewer/views.py | 6 ++---- 3 files changed, 9 insertions(+), 17 deletions(-) diff --git a/viewer/cset_upload.py b/viewer/cset_upload.py index 4db5f1a6..fd692c82 100644 --- a/viewer/cset_upload.py +++ b/viewer/cset_upload.py @@ -140,18 +140,16 @@ def __init__( zfile, zfile_hashvals, computed_set_name, - project_name, ): self.user_id = user_id self.sdf_filename = sdf_filename self.submitter_name = submitter_name self.submitter_method = submitter_method - self.target = target + self.target_id = target self.version = version self.zfile = zfile self.zfile_hashvals = zfile_hashvals self.computed_set_name = computed_set_name - self.project_name = project_name def process_pdb(self, pdb_code, zfile, zfile_hashvals) -> str | None: for key in zfile_hashvals.keys(): @@ -619,13 +617,13 @@ def task(self) -> ComputedSet: today: datetime.date = datetime.date.today() new_ordinal: int = 1 + try: - target = Target.objects.get( - title=self.target, project=self.project_name - ) + target = Target.objects.get(pk=self.target_id) except Target.DoesNotExist as exc: - # probably wrong target name supplied - logger.error('Target %s does not exist', self.target) + # target's existance should be validated in the view, + # this could hardly happen + logger.error('Target %s does not exist', self.target_id) raise Target.DoesNotExist from exc cs_name: str = ( @@ -677,7 +675,7 @@ def task(self) -> ComputedSet: for i in range(len(mols_to_process)): _ = self.process_mol( mols_to_process[i], - self.target, + self.target_id, computed_set, sdf_filename, self.zfile, diff --git a/viewer/tasks.py b/viewer/tasks.py index d5a238ed..2274fdd4 100644 --- a/viewer/tasks.py +++ b/viewer/tasks.py @@ -102,7 +102,6 @@ def process_compound_set(validate_output): zfile=zfile, zfile_hashvals=zfile_hashvals, computed_set_name=computed_set_name, - project_name=params['proposal_ref'], ) compound_set = save_mols.task() @@ -159,7 +158,6 @@ def validate_compound_set(task_params): user_id = task_params['user_id'] sdf_file = task_params['sdf_file'] target = task_params['target'] - proposal_ref = task_params['proposal_ref'] # Optional params zfile = task_params.get('zfile') update = task_params.get('update') @@ -168,7 +166,6 @@ def validate_compound_set(task_params): logger.info('validate_compound_set() user_id=%s', user_id) logger.info('validate_compound_set() sdf_file=%s', sdf_file) logger.info('validate_compound_set() target=%s', target) - logger.info('validate_compound_set() proposal_ref=%s', proposal_ref) logger.info('validate_compound_set() update=%s', update) validated = True @@ -190,7 +187,6 @@ def validate_compound_set(task_params): 'user_id': user_id, 'sdf': sdf_file, 'target': target, - 'proposal_ref': proposal_ref, 'pdb_zip': zfile, 'update': update, } diff --git a/viewer/views.py b/viewer/views.py index 9c1a43a3..16dc232b 100644 --- a/viewer/views.py +++ b/viewer/views.py @@ -528,8 +528,7 @@ def post(self, request): task_params = { 'user_id': user.id, 'sdf_file': tmp_sdf_file, - 'target': target, - 'proposal_ref': proposal_ref, + 'target': target.pk, } if tmp_pdb_file: task_params['zfile'] = tmp_pdb_file @@ -561,8 +560,7 @@ def post(self, request): task_params = { 'user_id': user.id, 'sdf_file': tmp_sdf_file, - 'target': target, - 'proposal_ref': proposal_ref, + 'target': target.pk, } if tmp_pdb_file: task_params['zfile'] = tmp_pdb_file From 781d2c848d08b2387abfca6b97537a997a83fdea Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Wed, 2 Oct 2024 16:54:49 +0100 Subject: [PATCH 04/17] feat: allow uploading same target to multiple projects --- viewer/cset_upload.py | 10 +++++----- viewer/sdf_check.py | 7 ++++--- viewer/target_loader.py | 5 +++-- viewer/tasks.py | 2 +- 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/viewer/cset_upload.py b/viewer/cset_upload.py index fd692c82..0bdd82a4 100644 --- a/viewer/cset_upload.py +++ b/viewer/cset_upload.py @@ -213,7 +213,7 @@ def get_site_observation( try: site_obvs = SiteObservation.objects.get( code__contains=name, - experiment__experiment_upload__target__title=target, + experiment__experiment_upload__target__pk=target, ) except SiteObservation.DoesNotExist: # Initial SiteObservation lookup failed. @@ -226,7 +226,7 @@ def get_site_observation( # If all else fails then the site_obvs will be 'None' qs = SiteObservation.objects.filter( code__contains=name, - experiment__experiment_upload__target__title=target, + experiment__experiment_upload__target__pk=target, ) if qs.exists(): logger.info( @@ -238,7 +238,7 @@ def get_site_observation( alt_name = name.split(':')[0].split('_')[0] qs = SiteObservation.objects.filter( code__contains=alt_name, - experiment__experiment_upload__target__title=target, + experiment__experiment_upload__target__pk=target, ) if qs.exists(): logger.info( @@ -357,13 +357,13 @@ def set_mol( try: site_obvs = SiteObservation.objects.get( code=str(i), - experiment__experiment_upload__target_id=compound_set.target, + experiment__experiment_upload__target=compound_set.target, ) ref = site_obvs except SiteObservation.DoesNotExist: qs = SiteObservation.objects.filter( code=str(i.split(':')[0].split('_')[0]), - experiment__experiment_upload__target_id=compound_set.target, + experiment__experiment_upload__target=compound_set.target, ) if not qs.exists(): raise Exception( # pylint: disable=raise-missing-from diff --git a/viewer/sdf_check.py b/viewer/sdf_check.py index 411128e4..aa362239 100755 --- a/viewer/sdf_check.py +++ b/viewer/sdf_check.py @@ -10,7 +10,7 @@ import validators from rdkit import Chem -from viewer.models import SiteObservation +from viewer.models import SiteObservation, Target logger = logging.getLogger(__name__) @@ -91,7 +91,7 @@ def check_refmol(mol, validate_dict, target=None): ref_strip = ref_mol.strip() query = SiteObservation.objects.filter( code=ref_strip, - experiment__experiment_upload__target__title=target, + experiment__experiment_upload__target__pk=target, ) if len(query) == 0: msg = f"No SiteObservation code contains '{ref_strip}'" @@ -139,8 +139,9 @@ def check_pdb(mol, validate_dict, target=None, zfile=None): # If anything else given example x1408 if target and not pdb_fn.endswith(".pdb"): + target_name = Target.objects.gte(pk=target).title query = SiteObservation.objects.filter( - code__contains=str(f'{target}-' + pdb_fn.split(':')[0].split('_')[0]) + code__contains=str(f'{target_name}-' + pdb_fn.split(':')[0].split('_')[0]) ) if len(query) == 0: validate_dict = add_warning( diff --git a/viewer/target_loader.py b/viewer/target_loader.py index dfea7039..b3146162 100644 --- a/viewer/target_loader.py +++ b/viewer/target_loader.py @@ -1515,8 +1515,9 @@ def process_bundle(self): ) if target_created: + target_dir = f"{self.target_name}_{self.proposal_ref}" # mypy thinks target and target_name are None - target_dir = sanitize_directory_name(self.target_name, self.abs_final_path) # type: ignore [arg-type] + target_dir = sanitize_directory_name(target_dir, self.abs_final_path) # type: ignore [arg-type] self.target.zip_archive = target_dir # type: ignore [attr-defined] self.target.save() # type: ignore [attr-defined] else: @@ -2314,7 +2315,7 @@ def _move_and_save_target_experiment(target_loader): str(target_loader.abs_final_path), ) Path(target_loader.bundle_path).rename( - target_loader.abs_final_path.parent.joinpath(target_loader.data_bundle) + target_loader.abs_final_path.joinpath(target_loader.data_bundle) ) set_directory_permissions(target_loader.abs_final_path, 0o755) diff --git a/viewer/tasks.py b/viewer/tasks.py index 2274fdd4..ef2471ec 100644 --- a/viewer/tasks.py +++ b/viewer/tasks.py @@ -144,7 +144,7 @@ def validate_compound_set(task_params): - params (dict): - user_id (int): User record ID of user initiating the task - sdf (str): name of the uploaded sdf file - - target (str): name of the target that the computed set is associated with + - target (int): id of the target that the computed set is associated with - zfile: dictionary where key is the name of the file minus extension and path, and value is the filename, which is saved to temporary storage by `viewer.views.UploadCSet` From f7bc293027986fc85724290a22430c8ba3cc753d Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Thu, 3 Oct 2024 15:38:24 +0100 Subject: [PATCH 05/17] fix: correct ComputedMolecule naming (issue 1524) --- viewer/cset_upload.py | 40 ++++++++++++++++------------------------ 1 file changed, 16 insertions(+), 24 deletions(-) diff --git a/viewer/cset_upload.py b/viewer/cset_upload.py index 043f8dcd..9db5e1ba 100644 --- a/viewer/cset_upload.py +++ b/viewer/cset_upload.py @@ -3,6 +3,7 @@ import datetime import logging import os +import re import uuid import zipfile from pathlib import Path @@ -20,8 +21,7 @@ from django.core.exceptions import MultipleObjectsReturned from django.core.files.base import ContentFile from django.core.files.storage import default_storage -from django.db.models import F, TextField, Value -from django.db.models.expressions import Func +from django.db.models import F from rdkit import Chem from viewer.models import ( @@ -414,34 +414,26 @@ def set_mol( # Check if anything exists already... # I think, realistically, I only need to check compound - # fmt: off + # update: I used to annotate name components, with the new + # format, this is not necessary. or possible fmt: off qs = ComputedMolecule.objects.filter( compound=compound, - ).annotate( - # names come in format: - # target_name-sequential number-sequential letter, - # e.g. A71EV2A-1-a, hence grabbing the 3rd column - suffix=Func( - F('name'), - Value('-'), - Value(3), - function='split_part', - output_field=TextField(), - ), - ) + ).order_by('name') if qs.exists(): - suffix = next( - alphanumerator(start_from=qs.order_by('-suffix').first().suffix) - ) + # not actually latest, just last according to sorting above + latest = qs.last() + # regex pattern - split name like 'v1a' + # ('(letters)(digits)(letters)' to components + groups = re.search(r'()(\d+)(\D+)', qs.last().name) + if groups is None or len(groups.groups()) != 3: + # just a quick sanity check + raise ValueError(f'Non-standard ComputedMolecule.name: {latest.name}') + number = groups.groups[1] # type: ignore [index] + suffix = next(alphanumerator(start_from=groups.groups[2])) # type: ignore [index] else: suffix = 'a' - - # distinct is ran on indexed field, so shouldn't be a problem - number = ComputedMolecule.objects.filter( - computed_set__target=compound_set.target, - ).values('id').distinct().count() + 1 - # fmt: on + number = 1 name = f'v{number}{suffix}' From 3e3c770eb6fbe15ad10e164fa6bdb853201f3686 Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Mon, 7 Oct 2024 16:08:09 +0100 Subject: [PATCH 06/17] fix: fixed (probably) data loss on metadata.csv uploads The problem was caused by faulty SiteObservation query, cases where observation longcode was the same between multiple targets are affected. --- viewer/tags.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/viewer/tags.py b/viewer/tags.py index 729bfde7..95f01007 100644 --- a/viewer/tags.py +++ b/viewer/tags.py @@ -276,7 +276,13 @@ def load_tags_from_file(filename: str, target: Target) -> list[str]: # type: ig target=target, ) - qs = SiteObservation.objects.filter(longcode__in=df['Long code']) + # fmt: off + qs = SiteObservation.filter_manager.by_target( + target=target, + ).filter( + longcode__in=df['Long code'], + ) + # fmt: on alias_update = [] for upload_name, alias in tag_aliases: From 931275d1c2dc6bf56512429195379bd1d45eeec1 Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Wed, 9 Oct 2024 16:52:33 +0100 Subject: [PATCH 07/17] feat: versioned auto-assigned upload tags Instead of previous 'New', each upload gets tagname from version (upload_[N]) --- viewer/target_loader.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/viewer/target_loader.py b/viewer/target_loader.py index fb61054c..cb64ce53 100644 --- a/viewer/target_loader.py +++ b/viewer/target_loader.py @@ -2028,7 +2028,7 @@ def process_bundle(self): # tag all new observations, so that the curator can find and # re-pose them self._tag_observations( - "New", + self.version_dir, "", TagCategory.objects.get(category="Other"), [ @@ -2036,6 +2036,7 @@ def process_bundle(self): for k in site_observation_objects.values() # pylint: disable=no-member if k.new ], + clean_ids=False, ) def _load_yaml(self, yaml_file: Path) -> dict: @@ -2155,6 +2156,7 @@ def _tag_observations( site_observations: list, hidden: bool = False, short_tag: str | None = None, + clean_ids: bool = True, ) -> None: try: # memo to self: description is set to tag, but there's @@ -2185,9 +2187,10 @@ def _tag_observations( tag = tag if short_tag is None else short_tag short_name = name if short_tag is None else f"{prefix} - {short_tag}" - tag = clean_object_id(tag) - name = clean_object_id(name) - short_name = clean_object_id(short_name) + if clean_ids: + tag = clean_object_id(tag) + name = clean_object_id(name) + short_name = clean_object_id(short_name) try: so_tag = SiteObservationTag.objects.get( From efc9cef4af261b080790f0d765696b2660b2952d Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Thu, 10 Oct 2024 14:50:25 +0100 Subject: [PATCH 08/17] fix: correct source paths --- viewer/target_loader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/viewer/target_loader.py b/viewer/target_loader.py index cb64ce53..93265dc7 100644 --- a/viewer/target_loader.py +++ b/viewer/target_loader.py @@ -626,13 +626,13 @@ def logfunc(key, message): continue # file key should go to result dict no matter what - result[key] = filename + result[key] = (filename, source_file) logger.debug("Adding key %s: %s", key, filename) files = [] for f in list(required) + list(recommended): try: - files.append((result[f], source_file)) + files.append(result[f]) except KeyError: logfunc( f, From 7f27d654cd751c4f6ec3502c5e26739092a9431d Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Thu, 10 Oct 2024 16:31:34 +0100 Subject: [PATCH 09/17] finalise data migrations for target proposal update --- viewer/migrations/0066_auto_20240919_1505.py | 1 - viewer/migrations/0072_auto_20241010_1437.py | 53 ++++++++++++++++++++ 2 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 viewer/migrations/0072_auto_20241010_1437.py diff --git a/viewer/migrations/0066_auto_20240919_1505.py b/viewer/migrations/0066_auto_20240919_1505.py index 5001eef2..26264216 100644 --- a/viewer/migrations/0066_auto_20240919_1505.py +++ b/viewer/migrations/0066_auto_20240919_1505.py @@ -11,7 +11,6 @@ class Migration(migrations.Migration): ] def restruct_projects(apps, schema_editor): - Project = apps.get_model('viewer', 'Project') Target = apps.get_model('viewer', 'Target') # atm, target has m2m to project diff --git a/viewer/migrations/0072_auto_20241010_1437.py b/viewer/migrations/0072_auto_20241010_1437.py new file mode 100644 index 00000000..b15ccc66 --- /dev/null +++ b/viewer/migrations/0072_auto_20241010_1437.py @@ -0,0 +1,53 @@ +# Generated by Django 3.2.25 on 2024-10-10 14:37 + +from pathlib import Path + +from django.db import migrations + +from fragalysis.settings import MEDIA_ROOT, TARGET_LOADER_MEDIA_DIRECTORY + +from viewer.utils import sanitize_directory_name + + +# continuation of the data migration to replace m2m between target and +# project with fk in target to project +# starts in 0066, where target gets the fk to project populated +# migrations in between take care of changing the schema +# this one now moves the data to correct locations + +class Migration(migrations.Migration): + + dependencies = [ + ('viewer', '0071_target_unique_target_in_project'), + ] + + + def move_data(apps, schema_editor): + Target = apps.get_model('viewer', 'Target') + root_path = Path(MEDIA_ROOT).joinpath(TARGET_LOADER_MEDIA_DIRECTORY) + for target in Target.objects.all(): + new_archive_dir = sanitize_directory_name( + f"{target.zip_archive}_{target.project.title}", + root_path, + ) + old_data_path = root_path.joinpath(target.zip_archive) + new_data_path = root_path.joinpath(new_archive_dir) + + old_data_path.rename(new_data_path) + + # move tarballs + for exp_upload in target.experimentupload_set.all(): + tarball_path = root_path.joinpath(exp_upload.file.name) + new_tarball_path = new_data_path.joinpath(exp_upload.file.name) + tarball_path.rename(new_tarball_path) + + target.zip_archive = new_archive_dir + target.save() + + + def unmove_data(apps, schema_editor): + pass + + operations = [ + migrations.RunPython(move_data, unmove_data) + ] From be94d1b3f01720e7ec94fe855fe3dfd687c54867 Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Thu, 10 Oct 2024 16:41:21 +0100 Subject: [PATCH 10/17] fix: errors in tarball moving scripts --- viewer/migrations/0072_auto_20241010_1437.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/viewer/migrations/0072_auto_20241010_1437.py b/viewer/migrations/0072_auto_20241010_1437.py index b15ccc66..41631522 100644 --- a/viewer/migrations/0072_auto_20241010_1437.py +++ b/viewer/migrations/0072_auto_20241010_1437.py @@ -27,10 +27,10 @@ def move_data(apps, schema_editor): root_path = Path(MEDIA_ROOT).joinpath(TARGET_LOADER_MEDIA_DIRECTORY) for target in Target.objects.all(): new_archive_dir = sanitize_directory_name( - f"{target.zip_archive}_{target.project.title}", + f"{target.zip_archive.name}_{target.project.title}", root_path, ) - old_data_path = root_path.joinpath(target.zip_archive) + old_data_path = root_path.joinpath(target.zip_archive.name) new_data_path = root_path.joinpath(new_archive_dir) old_data_path.rename(new_data_path) From d54e05f864b2552dab513993e69c8be9664a2885 Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Tue, 15 Oct 2024 14:31:34 +0100 Subject: [PATCH 11/17] stashing --- api/urls.py | 5 ++++ viewer/cset_upload.py | 4 +-- viewer/serializers.py | 7 +++++ viewer/views.py | 65 ++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 78 insertions(+), 3 deletions(-) diff --git a/api/urls.py b/api/urls.py index c88fb765..d894d0d6 100644 --- a/api/urls.py +++ b/api/urls.py @@ -124,6 +124,11 @@ router.register( "metadata_upload", viewer_views.UploadMetadataView, basename='metadata_upload' ) +router.register( + "computedset_download", + viewer_views.DownloadComputedSetView, + basename='computedset_download', +) # Squonk Jobs router.register( diff --git a/viewer/cset_upload.py b/viewer/cset_upload.py index 998c6bd4..7b505736 100644 --- a/viewer/cset_upload.py +++ b/viewer/cset_upload.py @@ -419,8 +419,8 @@ def set_mol( if groups is None or len(groups.groups()) != 3: # just a quick sanity check raise ValueError(f'Non-standard ComputedMolecule.name: {latest.name}') - number = groups.groups[1] # type: ignore [index] - suffix = next(alphanumerator(start_from=groups.groups[2])) # type: ignore [index] + number = groups.groups()[1] # type: ignore [index] + suffix = next(alphanumerator(start_from=groups.groups()[2])) # type: ignore [index] else: suffix = 'a' number = 1 diff --git a/viewer/serializers.py b/viewer/serializers.py index 2154f562..cf374d1e 100644 --- a/viewer/serializers.py +++ b/viewer/serializers.py @@ -635,6 +635,13 @@ class Meta: fields = '__all__' +class ComputedSetDownloadSerializer(serializers.ModelSerializer): + # validation is not called, so no reason to use it + class Meta: + model = models.ComputedSet + fields = ('name',) + + class ComputedMoleculeSerializer(serializers.ModelSerializer): # performance issue # inspiration_frags = MoleculeSerializer(read_only=True, many=True) diff --git a/viewer/views.py b/viewer/views.py index 16dc232b..b21953ab 100644 --- a/viewer/views.py +++ b/viewer/views.py @@ -2594,4 +2594,67 @@ def create(self, request, *args, **kwargs): if errors: return Response({"errors": errors}, status=status.HTTP_400_BAD_REQUEST) else: - return Response({'sucess': True}, status=status.HTTP_200_OK) + return Response({'success': True}, status=status.HTTP_200_OK) + + +class DownloadComputedSetView(ISPyBSafeQuerySet): + queryset = models.ComputedSet.objects.all() + filter_permissions = "target__project" + serializer_class = serializers.ComputedSetDownloadSerializer + permission_class = [permissions.IsAuthenticated] + + def get_view_name(self): + return "Computed set download" + + def post(self, request, *args, **kwargs): + logger.info("+ DownloadComputedSetView.create called") + del args, kwargs + + logger.debug('self: %s', self) + logger.debug('request: %s', request) + logger.debug('data: %s', request.data) + # logger.debug('context: %s', self.context) + + # If done like this, I'm bypassing the validation.. + + # serializer = self.get_serializer_class()(data=request.data, context={'request': request, 'view': self}) + # logger.debug('serializer: %s', serializer) + # if not serializer.is_valid(): + # return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST) + + # logger.debug("Serializer validated_data=%s", serializer.validated_data) + + # computed_set_name = serializer.validated_data['name'] + # computed_set = models.ComputedSet.objects.get(name=computed_set_name) + + computed_set_name = request.data['name'] + try: + computed_set = models.ComputedSet.objects.get(name=computed_set_name) + except models.ComputedSet.DoesNotExist: + return Response( + {'error': f"ComputedSet '{computed_set_name}' not found"}, + status=status.HTTP_404_NOT_FOUND, + ) + + # if computed_set.target.project.title not in _ISPYB_SAFE_QUERY_SET.get_proposals_for_user( + # request.user, restrict_public_to_membership=False + # ): + # return Response( + # {'error': "You have no access to the Project"}, + # status=status.HTTP_403_FORBIDDEN, + # ) + + # so now, get the file, and get the pdbs + sdf_file = Path(computed_set.written_sdf_filename) + if not sdf_file.exists(): + return Response( + {'error': f"Uploaded file '{str(sdf_file.name)}' not found"}, + status=status.HTTP_404_NOT_FOUND, + ) + + # Finally, return the file + wrapper = FileWrapper(open(sdf_file, 'rb')) + response = FileResponse(wrapper, content_type='text/plain') + response['Content-Disposition'] = 'attachment; filename="%s"' % sdf_file.name + response['Content-Length'] = os.path.getsize(sdf_file) + return response From 75b6d5e5edf50a25a493e1b5ce01fd9af3c98870 Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Tue, 15 Oct 2024 15:38:36 +0100 Subject: [PATCH 12/17] fix: add code path to handle old uploads (with task_id) --- viewer/migrations/0072_auto_20241010_1437.py | 48 ++++++++++++++------ 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/viewer/migrations/0072_auto_20241010_1437.py b/viewer/migrations/0072_auto_20241010_1437.py index 41631522..566a4586 100644 --- a/viewer/migrations/0072_auto_20241010_1437.py +++ b/viewer/migrations/0072_auto_20241010_1437.py @@ -26,20 +26,40 @@ def move_data(apps, schema_editor): Target = apps.get_model('viewer', 'Target') root_path = Path(MEDIA_ROOT).joinpath(TARGET_LOADER_MEDIA_DIRECTORY) for target in Target.objects.all(): - new_archive_dir = sanitize_directory_name( - f"{target.zip_archive.name}_{target.project.title}", - root_path, - ) - old_data_path = root_path.joinpath(target.zip_archive.name) - new_data_path = root_path.joinpath(new_archive_dir) - - old_data_path.rename(new_data_path) - - # move tarballs - for exp_upload in target.experimentupload_set.all(): - tarball_path = root_path.joinpath(exp_upload.file.name) - new_tarball_path = new_data_path.joinpath(exp_upload.file.name) - tarball_path.rename(new_tarball_path) + + + if target.zip_archive: + old_data_path = root_path.joinpath(target.zip_archive.name) + new_archive_dir = sanitize_directory_name( + f"{target.zip_archive.name}_{target.project.title}", + root_path, + ) + new_data_path = root_path.joinpath(new_archive_dir) + old_data_path.rename(new_data_path) + # move tarballs + for exp_upload in target.experimentupload_set.all(): + tarball_path = root_path.joinpath(exp_upload.file.name) + new_tarball_path = new_data_path.joinpath(exp_upload.file.name) + tarball_path.rename(new_tarball_path) + else: + # old system, data path contains task_id + new_archive_dir = sanitize_directory_name( + f"{target.title}_{target.project.title}", + root_path, + ) + new_data_path = root_path.joinpath(new_archive_dir) + new_data_path.mkdir(parents=True, exist_ok=False) + for exp_upload in target.experimentupload_set.all(): + old_data_path = root_path.joinpath(str(exp_upload.task_id)) + # can only be one + target_subdir = [p for p in old_data_path.glob("*") if p.is_dir()][0] + target_tarball = [p for p in old_data_path.glob("*") if p.is_file()][0] + + upload_dir = next(target_subdir.glob('upload_*')) + upload_dir.rename(new_data_path.joinpath(upload_dir.parts[-1])) + new_tarball_path = new_data_path.joinpath(target_tarball.name) + target_tarball.rename(new_tarball_path) + target.zip_archive = new_archive_dir target.save() From 49ec42f3c9f23863785d6c3be5c7f49ec6eff7b6 Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Wed, 16 Oct 2024 09:51:43 +0100 Subject: [PATCH 13/17] fix: a bug in migration script --- viewer/migrations/0072_auto_20241010_1437.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/viewer/migrations/0072_auto_20241010_1437.py b/viewer/migrations/0072_auto_20241010_1437.py index 566a4586..3958a55d 100644 --- a/viewer/migrations/0072_auto_20241010_1437.py +++ b/viewer/migrations/0072_auto_20241010_1437.py @@ -56,9 +56,12 @@ def move_data(apps, schema_editor): target_tarball = [p for p in old_data_path.glob("*") if p.is_file()][0] upload_dir = next(target_subdir.glob('upload_*')) - upload_dir.rename(new_data_path.joinpath(upload_dir.parts[-1])) + # NB! move tarball first! new_tarball_path = new_data_path.joinpath(target_tarball.name) target_tarball.rename(new_tarball_path) + # and then the rest of the data + upload_dir.rename(new_data_path.joinpath(upload_dir.parts[-1])) + target.zip_archive = new_archive_dir From fb568e4768cce1d8344c5a9a6dcbe83f6b55b602 Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Wed, 16 Oct 2024 15:19:17 +0100 Subject: [PATCH 14/17] feat: RHS data download Implements 2 points outlined in spec (https://github.com/m2ms/fragalysis-frontend/issues/1527) - original sdf download - custom pdb download TODO: - additional data to sdf --- viewer/cset_upload.py | 12 +++++-- viewer/models.py | 14 ++++++++ viewer/views.py | 74 +++++++++++++++++++++++++++---------------- 3 files changed, 70 insertions(+), 30 deletions(-) diff --git a/viewer/cset_upload.py b/viewer/cset_upload.py index 7b505736..62233697 100644 --- a/viewer/cset_upload.py +++ b/viewer/cset_upload.py @@ -168,8 +168,13 @@ def process_pdb(self, pdb_code, zfile, zfile_hashvals) -> str | None: new_filename = Path(settings.MEDIA_ROOT).joinpath(pdb_field) old_filename = Path(settings.MEDIA_ROOT).joinpath(pdb_fp) - old_filename.rename(new_filename) - os.chmod(new_filename, 0o755) + + # there may be a case where 2 or more molfiles reference the + # same pdb. in this case, the old pdb is already renamed to + # new. + if old_filename.exists() and not new_filename.exists(): + old_filename.rename(new_filename) + os.chmod(new_filename, 0o755) return str(pdb_field) @@ -675,6 +680,9 @@ def task(self) -> ComputedSet: # Process the molecules logger.info('%s mols_to_process=%s', computed_set, len(mols_to_process)) for i in range(len(mols_to_process)): + logger.debug( + 'processing mol %s: %s', i, mols_to_process[i].GetProp('_Name') + ) _ = self.process_mol( mols_to_process[i], self.target_id, diff --git a/viewer/models.py b/viewer/models.py index 68626b7e..d99da650 100644 --- a/viewer/models.py +++ b/viewer/models.py @@ -1078,6 +1078,20 @@ def __repr__(self) -> str: self.site_observation_code, ) + def get_filename(self): + # strip the original filename from the auto-assigned name + # filename is stored in field like: + # computed_set_data/A0486a#c2b8d13c94bb40bb9bf9d244b05516d3.pdb_2e2245e16cca4961919c7f5fdd1d0ece + if self.pdb: + return Path(self.pdb_info.name).name + else: + fname = Path(self.pdb_info.name).name + if fname.find('#') > 0: + name = fname.split('#')[0] + return f'{name}.pdb' + + return fname + class ComputedSetComputedMolecule(models.Model): computed_set = models.ForeignKey(ComputedSet, null=False, on_delete=models.CASCADE) diff --git a/viewer/views.py b/viewer/views.py index b21953ab..35507e18 100644 --- a/viewer/views.py +++ b/viewer/views.py @@ -3,7 +3,9 @@ import os import shlex import shutil +import zipfile from datetime import datetime +from io import BytesIO from pathlib import Path from typing import Any, Dict, List, Optional from wsgiref.util import FileWrapper @@ -2610,23 +2612,9 @@ def post(self, request, *args, **kwargs): logger.info("+ DownloadComputedSetView.create called") del args, kwargs - logger.debug('self: %s', self) - logger.debug('request: %s', request) logger.debug('data: %s', request.data) - # logger.debug('context: %s', self.context) - - # If done like this, I'm bypassing the validation.. - - # serializer = self.get_serializer_class()(data=request.data, context={'request': request, 'view': self}) - # logger.debug('serializer: %s', serializer) - # if not serializer.is_valid(): - # return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST) - - # logger.debug("Serializer validated_data=%s", serializer.validated_data) - - # computed_set_name = serializer.validated_data['name'] - # computed_set = models.ComputedSet.objects.get(name=computed_set_name) + # If done like this, it's bypassing the validation.. computed_set_name = request.data['name'] try: computed_set = models.ComputedSet.objects.get(name=computed_set_name) @@ -2636,13 +2624,16 @@ def post(self, request, *args, **kwargs): status=status.HTTP_404_NOT_FOUND, ) - # if computed_set.target.project.title not in _ISPYB_SAFE_QUERY_SET.get_proposals_for_user( - # request.user, restrict_public_to_membership=False - # ): - # return Response( - # {'error': "You have no access to the Project"}, - # status=status.HTTP_403_FORBIDDEN, - # ) + if ( + computed_set.target.project.title + not in _ISPYB_SAFE_QUERY_SET.get_proposals_for_user( + request.user, restrict_public_to_membership=False + ) + ): + return Response( + {'error': "You have no access to the Project"}, + status=status.HTTP_403_FORBIDDEN, + ) # so now, get the file, and get the pdbs sdf_file = Path(computed_set.written_sdf_filename) @@ -2652,9 +2643,36 @@ def post(self, request, *args, **kwargs): status=status.HTTP_404_NOT_FOUND, ) - # Finally, return the file - wrapper = FileWrapper(open(sdf_file, 'rb')) - response = FileResponse(wrapper, content_type='text/plain') - response['Content-Disposition'] = 'attachment; filename="%s"' % sdf_file.name - response['Content-Length'] = os.path.getsize(sdf_file) - return response + pdbs = computed_set.computed_molecules.filter(pdb__isnull=True) + if pdbs.exists(): + # custom pdbs exist, zip all together and return an archive + zip_buffer = BytesIO() + with zipfile.ZipFile(zip_buffer, 'a', zipfile.ZIP_DEFLATED) as ziparchive: + with open(sdf_file, 'rb') as contents: + ziparchive.writestr(f'{sdf_file.name}.sdf', contents.read()) + for f in pdbs: + fpath = Path(settings.MEDIA_ROOT).joinpath(f.pdb_info.name) + if fpath.is_file(): + with open(fpath, 'rb') as contents: + ziparchive.writestr(f.get_filename(), contents.read()) + else: + ziparchive.writestr(f'{f.get_filename()}_MISSING', r'') + + response = HttpResponse( + zip_buffer.getvalue(), content_type='application/zip' + ) + response['Content-Disposition'] = ( + 'attachment; filename="%s"' % f'{computed_set.name}.zip' + ) + response['Content-Length'] = zip_buffer.getbuffer().nbytes + return response + + else: + # no custom pdbs, return sdf + wrapper = FileWrapper(open(sdf_file, 'rb')) + response = FileResponse(wrapper, content_type='text/plain') + response['Content-Disposition'] = ( + 'attachment; filename="%s"' % sdf_file.name + ) + response['Content-Length'] = os.path.getsize(sdf_file) + return response From 1a2078374d92b862a612d67f07f27fc2cd69476b Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Mon, 21 Oct 2024 11:02:11 +0100 Subject: [PATCH 15/17] Another fix to data structure migration script. Tested on prod directory tree and confirmed working, safe do deploy to prod. NB! tarball 'A71EV2A_upload_1_2_161024.tgz' seems to be missing! --- viewer/migrations/0072_auto_20241010_1437.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/viewer/migrations/0072_auto_20241010_1437.py b/viewer/migrations/0072_auto_20241010_1437.py index 3958a55d..8c55d2e9 100644 --- a/viewer/migrations/0072_auto_20241010_1437.py +++ b/viewer/migrations/0072_auto_20241010_1437.py @@ -39,8 +39,12 @@ def move_data(apps, schema_editor): # move tarballs for exp_upload in target.experimentupload_set.all(): tarball_path = root_path.joinpath(exp_upload.file.name) - new_tarball_path = new_data_path.joinpath(exp_upload.file.name) - tarball_path.rename(new_tarball_path) + if tarball_path.exists(): + new_tarball_path = new_data_path.joinpath(exp_upload.file.name) + tarball_path.rename(new_tarball_path) + else: + # some tarballs really do not exist on prod! + print(f'Tarball {tarball_path.name} does not exist') else: # old system, data path contains task_id new_archive_dir = sanitize_directory_name( From 22cb238002ad2ec4668e26bb1f0f9fd9c9cbb2d8 Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Thu, 24 Oct 2024 12:05:10 +0100 Subject: [PATCH 16/17] feat: added timestamp to auto-upload tag --- viewer/target_loader.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/viewer/target_loader.py b/viewer/target_loader.py index d3168834..58823ad0 100644 --- a/viewer/target_loader.py +++ b/viewer/target_loader.py @@ -2028,8 +2028,9 @@ def process_bundle(self): # tag all new observations, so that the curator can find and # re-pose them + datestr = timezone.now().date().strftime('%Y-%m-%d') self._tag_observations( - self.version_dir, + f"{self.version_dir} {datestr}", "", TagCategory.objects.get(category="Other"), [ From 63fd36322e75f36788beda5037fa40d71ccd3ed2 Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Fri, 25 Oct 2024 10:54:29 +0100 Subject: [PATCH 17/17] fix: single sdf in downloads has correct structure separators --- viewer/download_structures.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/viewer/download_structures.py b/viewer/download_structures.py index b004835e..acdbadcb 100644 --- a/viewer/download_structures.py +++ b/viewer/download_structures.py @@ -165,6 +165,10 @@ def _read_and_patch_molecule_name(path, molecule_name=None): for next_line in f_in: content += next_line + # add sdf marker, the file read is mol but the combined file is sdf + if Path(path).suffix.lower() != '.sdf': + content += '$$$$\n\n' + return content