From 02f91383c254bf0ab415ce5332b4cd9245b2f3a7 Mon Sep 17 00:00:00 2001 From: patrickcmd Date: Tue, 27 Jul 2021 17:46:53 +0300 Subject: [PATCH] OpenConceptLab/ocl_issues#22 | Retired concepts and/or mappings are now excluded from exports --- core/collections/views.py | 5 +- core/common/mixins.py | 32 +++++++---- core/common/models.py | 11 ++++ core/common/tasks.py | 8 +-- core/common/utils.py | 11 +++- core/integration_tests/tests_collections.py | 64 +++++++++++++++++++-- core/integration_tests/tests_sources.py | 55 ++++++++++++++++-- core/sources/views.py | 5 +- 8 files changed, 162 insertions(+), 29 deletions(-) diff --git a/core/collections/views.py b/core/collections/views.py index d0a75c8a4..a288d9abf 100644 --- a/core/collections/views.py +++ b/core/collections/views.py @@ -33,7 +33,7 @@ from core.collections.utils import is_version_specified from core.common.constants import ( HEAD, RELEASED_PARAM, PROCESSING_PARAM, OK_MESSAGE, - ACCESS_TYPE_NONE) + ACCESS_TYPE_NONE, INCLUDE_RETIRED_PARAM) from core.common.mixins import ( ConceptDictionaryCreateMixin, ListWithHeadersMixin, ConceptDictionaryUpdateMixin, ConceptContainerExportMixin, @@ -584,7 +584,8 @@ class CollectionVersionExportView(ConceptContainerExportMixin, CollectionVersion def handle_export_version(self): version = self.get_object() try: - export_collection.delay(version.id) + include_retired = parse_boolean_query_param(self.request, INCLUDE_RETIRED_PARAM, "True") + export_collection.delay(version.id, include_retired) return status.HTTP_202_ACCEPTED except AlreadyQueued: return status.HTTP_409_CONFLICT diff --git a/core/common/mixins.py b/core/common/mixins.py index 4048a5fd6..a7e65d52f 100644 --- a/core/common/mixins.py +++ b/core/common/mixins.py @@ -15,12 +15,12 @@ from rest_framework.mixins import ListModelMixin, CreateModelMixin from rest_framework.response import Response -from core.common.constants import HEAD, ACCESS_TYPE_EDIT, ACCESS_TYPE_VIEW, ACCESS_TYPE_NONE, INCLUDE_FACETS, \ +from core.common.constants import HEAD, ACCESS_TYPE_EDIT, ACCESS_TYPE_VIEW, ACCESS_TYPE_NONE, INCLUDE_FACETS, INCLUDE_RETIRED_PARAM, \ LIST_DEFAULT_LIMIT, HTTP_COMPRESS_HEADER, CSV_DEFAULT_LIMIT, FACETS_ONLY, NOT_FOUND, \ MUST_SPECIFY_EXTRA_PARAM_IN_BODY from core.common.permissions import HasPrivateAccess, HasOwnership, CanViewConceptDictionary from core.common.services import S3 -from .utils import write_csv_to_s3, get_csv_from_s3, get_query_params_from_url_string, compact_dict_by_values +from .utils import write_csv_to_s3, get_csv_from_s3, get_query_params_from_url_string, compact_dict_by_values, parse_boolean_query_param logger = logging.getLogger('oclapi') @@ -542,16 +542,8 @@ def get_object(self): return instance def get(self, request, *args, **kwargs): # pylint: disable=unused-argument - version = self.get_object() - logger.debug( - 'Export requested for %s version %s - Requesting AWS-S3 key', self.entity.lower(), version.version - ) - if version.is_head and not request.user.is_staff: - return Response(status=status.HTTP_405_METHOD_NOT_ALLOWED) - - if version.has_export(): - export_url = version.get_export_url() + def export_response(export_url): no_redirect = request.query_params.get('noRedirect', False) in ['true', 'True', True] if no_redirect: return Response(dict(url=export_url), status=status.HTTP_200_OK) @@ -567,6 +559,24 @@ def get(self, request, *args, **kwargs): # pylint: disable=unused-argument response['Last-Updated-Timezone'] = settings.TIME_ZONE_PLACE return response + version = self.get_object() + include_retired = parse_boolean_query_param(request, INCLUDE_RETIRED_PARAM, "True") + logger.debug( + 'Export requested for %s version %s - Requesting AWS-S3 key', self.entity.lower(), version.version + ) + + if version.is_head and not request.user.is_staff: + return Response(status=status.HTTP_405_METHOD_NOT_ALLOWED) + + if include_retired: + if version.has_export(): + export_url = version.get_export_url() + return export_response(export_url) + else: + if version.has_unretired_export(): + export_url = version.get_unretired_export_url() + return export_response(export_url) + if version.is_exporting: return Response(status=status.HTTP_208_ALREADY_REPORTED) diff --git a/core/common/models.py b/core/common/models.py index de062dbe2..b237d9114 100644 --- a/core/common/models.py +++ b/core/common/models.py @@ -722,6 +722,11 @@ def is_exporting(self): def export_path(self): last_update = self.last_child_update.strftime('%Y%m%d%H%M%S') return self.generic_export_path(suffix=f"{last_update}.zip") + + @cached_property + def exclude_retired_export_path(self): + last_update = self.last_child_update.strftime('%Y%m%d%H%M%S') + return self.generic_export_path(suffix=f"{last_update}_unretired.zip") def generic_export_path(self, suffix='*'): path = f"{self.parent_resource}/{self.mnemonic}_{self.version}." @@ -733,8 +738,14 @@ def generic_export_path(self, suffix='*'): def get_export_url(self): return S3.url_for(self.export_path) + def get_unretired_export_url(self): + return S3.url_for(self.exclude_retired_export_path) + def has_export(self): return S3.exists(self.export_path) + + def has_unretired_export(self): + return S3.exists(self.exclude_retired_export_path) def can_view_all_content(self, user): if get(user, 'is_anonymous'): diff --git a/core/common/tasks.py b/core/common/tasks.py index c9d8cba66..2d22fcfc4 100644 --- a/core/common/tasks.py +++ b/core/common/tasks.py @@ -60,7 +60,7 @@ def delete_source(source_id): @app.task(base=QueueOnce, bind=True) -def export_source(self, version_id): +def export_source(self, version_id, include_retired=True): from core.sources.models import Source logger.info('Finding source version...') @@ -75,14 +75,14 @@ def export_source(self, version_id): version.add_processing(self.request.id) try: logger.info('Found source version %s. Beginning export...', version.version) - write_export_file(version, 'source', 'core.sources.serializers.SourceVersionExportSerializer', logger) + write_export_file(version, include_retired, 'source', 'core.sources.serializers.SourceVersionExportSerializer', logger) logger.info('Export complete!') finally: version.remove_processing(self.request.id) @app.task(base=QueueOnce, bind=True) -def export_collection(self, version_id): +def export_collection(self, version_id, include_retired=True): from core.collections.models import Collection logger.info('Finding collection version...') @@ -98,7 +98,7 @@ def export_collection(self, version_id): try: logger.info('Found collection version %s. Beginning export...', version.version) write_export_file( - version, 'collection', 'core.collections.serializers.CollectionVersionExportSerializer', logger + version, include_retired, 'collection', 'core.collections.serializers.CollectionVersionExportSerializer', logger ) logger.info('Export complete!') finally: diff --git a/core/common/utils.py b/core/common/utils.py index c5f945a52..aff225646 100644 --- a/core/common/utils.py +++ b/core/common/utils.py @@ -192,7 +192,7 @@ def get_class(kls): def write_export_file( - version, resource_type, resource_serializer_type, logger + version, include_retired, resource_type, resource_serializer_type, logger ): # pylint: disable=too-many-statements,too-many-locals,too-many-branches from core.concepts.models import Concept from core.mappings.models import Mapping @@ -208,6 +208,13 @@ def write_export_file( logger.info('Done serializing attributes.') batch_size = 100 + concepts_qs = version.concepts + mappings_qs = version.mappings + + if not include_retired: + concepts_qs = concepts_qs.filter(retired=False) + mappings_qs = mappings_qs.filter(retired=False) + is_collection = resource_type == 'collection' if is_collection: @@ -329,7 +336,7 @@ def write_export_file( logger.info(file_path) logger.info('Done compressing. Uploading...') - s3_key = version.export_path + s3_key = version.export_path if include_retired else version.exclude_retired_export_path S3.upload_file( key=s3_key, file_path=file_path, binary=True, metadata=dict(ContentType='application/zip'), headers={'content-type': 'application/zip'} diff --git a/core/integration_tests/tests_collections.py b/core/integration_tests/tests_collections.py index f551c28fb..abb0e98dd 100644 --- a/core/integration_tests/tests_collections.py +++ b/core/integration_tests/tests_collections.py @@ -857,14 +857,14 @@ def test_post_303(self, s3_exists_mock): def test_post_202(self, s3_exists_mock, export_collection_mock): s3_exists_mock.return_value = False response = self.client.post( - self.collection_v1.uri + 'export/', + self.collection_v1.uri + 'export/?includeRetired=False', HTTP_AUTHORIZATION='Token ' + self.token, format='json' ) self.assertEqual(response.status_code, 202) s3_exists_mock.assert_called_once_with(f"username/coll_v1.{self.v1_updated_at}.zip") - export_collection_mock.delay.assert_called_once_with(self.collection_v1.id) + export_collection_mock.delay.assert_called_once_with(self.collection_v1.id, False) @patch('core.collections.views.export_collection') @patch('core.common.services.S3.exists') @@ -872,14 +872,14 @@ def test_post_409(self, s3_exists_mock, export_collection_mock): s3_exists_mock.return_value = False export_collection_mock.delay.side_effect = AlreadyQueued('already-queued') response = self.client.post( - self.collection_v1.uri + 'export/', + self.collection_v1.uri + 'export/?includeRetired=False', HTTP_AUTHORIZATION='Token ' + self.token, format='json' ) self.assertEqual(response.status_code, 409) s3_exists_mock.assert_called_once_with(f"username/coll_v1.{self.v1_updated_at}.zip") - export_collection_mock.delay.assert_called_once_with(self.collection_v1.id) + export_collection_mock.delay.assert_called_once_with(self.collection_v1.id, False) class CollectionVersionListViewTest(OCLAPITestCase): @@ -999,6 +999,62 @@ def test_export_collection(self, s3_mock): # pylint: disable=too-many-locals import shutil shutil.rmtree(latest_temp_dir) + @patch('core.common.utils.S3') + def test_unretired_export_collection(self, s3_mock): # pylint: disable=too-many-locals + s3_mock.url_for = Mock(return_value='https://s3-url') + s3_mock.upload_file = Mock() + source = OrganizationSourceFactory() + concept1 = ConceptFactory(parent=source) + concept2 = ConceptFactory(parent=source) + mapping = MappingFactory(from_concept=concept2, to_concept=concept1, parent=source) + collection = OrganizationCollectionFactory() + collection.add_references([concept1.uri, concept2.uri, mapping.uri]) + collection.refresh_from_db() + + export_collection(collection.id, include_retired=False) # pylint: disable=no-value-for-parameter + + latest_temp_dir = get_latest_dir_in_path('/tmp/') + zipped_file = zipfile.ZipFile(latest_temp_dir + '/export.zip') + exported_data = json.loads(zipped_file.read('export.json').decode('utf-8')) + + self.assertEqual( + exported_data, + {**CollectionVersionExportSerializer(collection).data, 'concepts': ANY, 'mappings': ANY, 'references': ANY} + ) + + exported_concepts = exported_data['concepts'] + expected_concepts = ConceptVersionExportSerializer( + [concept2.get_latest_version(), concept1.get_latest_version()], many=True + ).data + + self.assertEqual(len(exported_concepts), 2) + self.assertIn(expected_concepts[0], exported_concepts) + self.assertIn(expected_concepts[1], exported_concepts) + + exported_mappings = exported_data['mappings'] + expected_mappings = MappingDetailSerializer([mapping.get_latest_version()], many=True).data + + self.assertEqual(len(exported_mappings), 1) + self.assertEqual(expected_mappings, exported_mappings) + + exported_references = exported_data['references'] + expected_references = CollectionReferenceSerializer(collection.references.all(), many=True).data + + self.assertEqual(len(exported_references), 3) + self.assertIn(exported_references[0], expected_references) + self.assertIn(exported_references[1], expected_references) + self.assertIn(exported_references[2], expected_references) + + s3_upload_key = collection.exclude_retired_export_path + s3_mock.upload_file.assert_called_once_with( + key=s3_upload_key, file_path=latest_temp_dir + '/export.zip', binary=True, + metadata={'ContentType': 'application/zip'}, headers={'content-type': 'application/zip'} + ) + s3_mock.url_for.assert_called_once_with(s3_upload_key) + + import shutil + shutil.rmtree(latest_temp_dir) + class CollectionConceptsViewTest(OCLAPITestCase): def setUp(self): diff --git a/core/integration_tests/tests_sources.py b/core/integration_tests/tests_sources.py index 34fd450f2..5b0c42959 100644 --- a/core/integration_tests/tests_sources.py +++ b/core/integration_tests/tests_sources.py @@ -704,14 +704,14 @@ def test_post_303(self, s3_exists_mock): def test_post_202(self, s3_exists_mock, export_source_mock): s3_exists_mock.return_value = False response = self.client.post( - self.source_v1.uri + 'export/', + self.source_v1.uri + 'export/?includeRetired=False', HTTP_AUTHORIZATION='Token ' + self.token, format='json' ) self.assertEqual(response.status_code, 202) s3_exists_mock.assert_called_once_with(f"username/source1_v1.{self.v1_updated_at}.zip") - export_source_mock.delay.assert_called_once_with(self.source_v1.id) + export_source_mock.delay.assert_called_once_with(self.source_v1.id, False) @patch('core.sources.views.export_source') @patch('core.common.services.S3.exists') @@ -719,14 +719,14 @@ def test_post_409(self, s3_exists_mock, export_source_mock): s3_exists_mock.return_value = False export_source_mock.delay.side_effect = AlreadyQueued('already-queued') response = self.client.post( - self.source_v1.uri + 'export/', + self.source_v1.uri + 'export/?includeRetired=False', HTTP_AUTHORIZATION='Token ' + self.token, format='json' ) self.assertEqual(response.status_code, 409) s3_exists_mock.assert_called_once_with(f"username/source1_v1.{self.v1_updated_at}.zip") - export_source_mock.delay.assert_called_once_with(self.source_v1.id) + export_source_mock.delay.assert_called_once_with(self.source_v1.id, False) class ExportSourceTaskTest(OCLAPITestCase): @@ -777,6 +777,53 @@ def test_export_source(self, s3_mock): # pylint: disable=too-many-locals import shutil shutil.rmtree(latest_temp_dir) + @patch('core.common.utils.S3') + def test_unretired_export_source(self, s3_mock): # pylint: disable=too-many-locals + s3_mock.url_for = Mock(return_value='https://s3-url') + s3_mock.upload_file = Mock() + source = OrganizationSourceFactory() + concept1 = ConceptFactory(parent=source) + concept2 = ConceptFactory(parent=source) + mapping = MappingFactory(from_concept=concept2, to_concept=concept1, parent=source) + + source_v1 = OrganizationSourceFactory(mnemonic=source.mnemonic, organization=source.organization, version='v1') + concept1.sources.add(source_v1) + concept2.sources.add(source_v1) + mapping.sources.add(source_v1) + + export_source(source_v1.id, include_retired=False) # pylint: disable=no-value-for-parameter + + latest_temp_dir = get_latest_dir_in_path('/tmp/') + zipped_file = zipfile.ZipFile(latest_temp_dir + '/export.zip') + exported_data = json.loads(zipped_file.read('export.json').decode('utf-8')) + + self.assertEqual( + exported_data, {**SourceVersionExportSerializer(source_v1).data, 'concepts': ANY, 'mappings': ANY} + ) + + exported_concepts = exported_data['concepts'] + expected_concepts = ConceptVersionExportSerializer([concept2, concept1], many=True).data + + self.assertEqual(len(exported_concepts), 2) + self.assertIn(expected_concepts[0], exported_concepts) + self.assertIn(expected_concepts[1], exported_concepts) + + exported_mappings = exported_data['mappings'] + expected_mappings = MappingDetailSerializer([mapping], many=True).data + + self.assertEqual(len(exported_mappings), 1) + self.assertEqual(expected_mappings, exported_mappings) + + s3_upload_key = source_v1.exclude_retired_export_path + s3_mock.upload_file.assert_called_once_with( + key=s3_upload_key, file_path=latest_temp_dir + '/export.zip', binary=True, + metadata={'ContentType': 'application/zip'}, headers={'content-type': 'application/zip'} + ) + s3_mock.url_for.assert_called_once_with(s3_upload_key) + + import shutil + shutil.rmtree(latest_temp_dir) + class SourceLogoViewTest(OCLAPITestCase): def setUp(self): diff --git a/core/sources/views.py b/core/sources/views.py index 7db3cc7c8..f53db8639 100644 --- a/core/sources/views.py +++ b/core/sources/views.py @@ -14,7 +14,7 @@ from rest_framework.response import Response from core.client_configs.views import ResourceClientConfigsView -from core.common.constants import HEAD, RELEASED_PARAM, PROCESSING_PARAM, ACCESS_TYPE_NONE +from core.common.constants import HEAD, RELEASED_PARAM, PROCESSING_PARAM, ACCESS_TYPE_NONE, INCLUDE_RETIRED_PARAM from core.common.mixins import ListWithHeadersMixin, ConceptDictionaryCreateMixin, ConceptDictionaryUpdateMixin, \ ConceptContainerExportMixin, ConceptContainerProcessingMixin, ConceptContainerExtraRetrieveUpdateDestroyView from core.common.permissions import CanViewConceptDictionary, CanEditConceptDictionary, HasAccessToVersionedObject, \ @@ -392,7 +392,8 @@ class SourceVersionExportView(ConceptContainerExportMixin, SourceVersionBaseView def handle_export_version(self): version = self.get_object() try: - export_source.delay(version.id) + include_retired = parse_boolean_query_param(self.request, INCLUDE_RETIRED_PARAM, "True") + export_source.delay(version.id, include_retired) return status.HTTP_202_ACCEPTED except AlreadyQueued: return status.HTTP_409_CONFLICT