Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OpenConceptLab/ocl_issues#22 | exclude retired concepts and/or mappings from exports #14

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions core/collections/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
from core.collections.utils import is_version_specified
from core.common.constants import (
HEAD, RELEASED_PARAM, PROCESSING_PARAM, OK_MESSAGE,
ACCESS_TYPE_NONE)
ACCESS_TYPE_NONE, INCLUDE_RETIRED_PARAM)
from core.common.mixins import (
ConceptDictionaryCreateMixin, ListWithHeadersMixin, ConceptDictionaryUpdateMixin,
ConceptContainerExportMixin,
Expand Down Expand Up @@ -584,7 +584,8 @@ class CollectionVersionExportView(ConceptContainerExportMixin, CollectionVersion
def handle_export_version(self):
version = self.get_object()
try:
export_collection.delay(version.id)
include_retired = parse_boolean_query_param(self.request, INCLUDE_RETIRED_PARAM, "True")
export_collection.delay(version.id, include_retired)
snyaggarwal marked this conversation as resolved.
Show resolved Hide resolved
return status.HTTP_202_ACCEPTED
except AlreadyQueued:
return status.HTTP_409_CONFLICT
Expand Down
32 changes: 21 additions & 11 deletions core/common/mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@
from rest_framework.mixins import ListModelMixin, CreateModelMixin
from rest_framework.response import Response

from core.common.constants import HEAD, ACCESS_TYPE_EDIT, ACCESS_TYPE_VIEW, ACCESS_TYPE_NONE, INCLUDE_FACETS, \
from core.common.constants import HEAD, ACCESS_TYPE_EDIT, ACCESS_TYPE_VIEW, ACCESS_TYPE_NONE, INCLUDE_FACETS, INCLUDE_RETIRED_PARAM, \
LIST_DEFAULT_LIMIT, HTTP_COMPRESS_HEADER, CSV_DEFAULT_LIMIT, FACETS_ONLY, NOT_FOUND, \
MUST_SPECIFY_EXTRA_PARAM_IN_BODY
from core.common.permissions import HasPrivateAccess, HasOwnership, CanViewConceptDictionary
from core.common.services import S3
from .utils import write_csv_to_s3, get_csv_from_s3, get_query_params_from_url_string, compact_dict_by_values
from .utils import write_csv_to_s3, get_csv_from_s3, get_query_params_from_url_string, compact_dict_by_values, parse_boolean_query_param

logger = logging.getLogger('oclapi')

Expand Down Expand Up @@ -542,16 +542,8 @@ def get_object(self):
return instance

def get(self, request, *args, **kwargs): # pylint: disable=unused-argument
version = self.get_object()
logger.debug(
'Export requested for %s version %s - Requesting AWS-S3 key', self.entity.lower(), version.version
)
if version.is_head and not request.user.is_staff:
return Response(status=status.HTTP_405_METHOD_NOT_ALLOWED)

if version.has_export():
export_url = version.get_export_url()

def export_response(export_url):
no_redirect = request.query_params.get('noRedirect', False) in ['true', 'True', True]
if no_redirect:
return Response(dict(url=export_url), status=status.HTTP_200_OK)
Expand All @@ -567,6 +559,24 @@ def get(self, request, *args, **kwargs): # pylint: disable=unused-argument
response['Last-Updated-Timezone'] = settings.TIME_ZONE_PLACE
return response

version = self.get_object()
include_retired = parse_boolean_query_param(request, INCLUDE_RETIRED_PARAM, "True")
logger.debug(
'Export requested for %s version %s - Requesting AWS-S3 key', self.entity.lower(), version.version
)

if version.is_head and not request.user.is_staff:
return Response(status=status.HTTP_405_METHOD_NOT_ALLOWED)

if include_retired:
if version.has_export():
export_url = version.get_export_url()
return export_response(export_url)
else:
if version.has_unretired_export():
export_url = version.get_unretired_export_url()
return export_response(export_url)

if version.is_exporting:
return Response(status=status.HTTP_208_ALREADY_REPORTED)

Expand Down
11 changes: 11 additions & 0 deletions core/common/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -722,6 +722,11 @@ def is_exporting(self):
def export_path(self):
last_update = self.last_child_update.strftime('%Y%m%d%H%M%S')
return self.generic_export_path(suffix=f"{last_update}.zip")

@cached_property
def exclude_retired_export_path(self):
last_update = self.last_child_update.strftime('%Y%m%d%H%M%S')
return self.generic_export_path(suffix=f"{last_update}_unretired.zip")

def generic_export_path(self, suffix='*'):
path = f"{self.parent_resource}/{self.mnemonic}_{self.version}."
Expand All @@ -733,8 +738,14 @@ def generic_export_path(self, suffix='*'):
def get_export_url(self):
return S3.url_for(self.export_path)

def get_unretired_export_url(self):
return S3.url_for(self.exclude_retired_export_path)

def has_export(self):
return S3.exists(self.export_path)

def has_unretired_export(self):
return S3.exists(self.exclude_retired_export_path)

def can_view_all_content(self, user):
if get(user, 'is_anonymous'):
Expand Down
8 changes: 4 additions & 4 deletions core/common/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def delete_source(source_id):


@app.task(base=QueueOnce, bind=True)
def export_source(self, version_id):
def export_source(self, version_id, include_retired=True):
from core.sources.models import Source
logger.info('Finding source version...')

Expand All @@ -75,14 +75,14 @@ def export_source(self, version_id):
version.add_processing(self.request.id)
try:
logger.info('Found source version %s. Beginning export...', version.version)
write_export_file(version, 'source', 'core.sources.serializers.SourceVersionExportSerializer', logger)
write_export_file(version, include_retired, 'source', 'core.sources.serializers.SourceVersionExportSerializer', logger)
logger.info('Export complete!')
finally:
version.remove_processing(self.request.id)


@app.task(base=QueueOnce, bind=True)
def export_collection(self, version_id):
def export_collection(self, version_id, include_retired=True):
from core.collections.models import Collection
logger.info('Finding collection version...')

Expand All @@ -98,7 +98,7 @@ def export_collection(self, version_id):
try:
logger.info('Found collection version %s. Beginning export...', version.version)
write_export_file(
version, 'collection', 'core.collections.serializers.CollectionVersionExportSerializer', logger
version, include_retired, 'collection', 'core.collections.serializers.CollectionVersionExportSerializer', logger
)
logger.info('Export complete!')
finally:
Expand Down
11 changes: 9 additions & 2 deletions core/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ def get_class(kls):


def write_export_file(
version, resource_type, resource_serializer_type, logger
version, include_retired, resource_type, resource_serializer_type, logger
): # pylint: disable=too-many-statements,too-many-locals,too-many-branches
from core.concepts.models import Concept
from core.mappings.models import Mapping
Expand All @@ -208,6 +208,13 @@ def write_export_file(
logger.info('Done serializing attributes.')

batch_size = 100
concepts_qs = version.concepts
mappings_qs = version.mappings

if not include_retired:
concepts_qs = concepts_qs.filter(retired=False)
mappings_qs = mappings_qs.filter(retired=False)

is_collection = resource_type == 'collection'

if is_collection:
Expand Down Expand Up @@ -329,7 +336,7 @@ def write_export_file(
logger.info(file_path)
logger.info('Done compressing. Uploading...')

s3_key = version.export_path
s3_key = version.export_path if include_retired else version.exclude_retired_export_path
S3.upload_file(
key=s3_key, file_path=file_path, binary=True, metadata=dict(ContentType='application/zip'),
headers={'content-type': 'application/zip'}
Expand Down
64 changes: 60 additions & 4 deletions core/integration_tests/tests_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -857,29 +857,29 @@ def test_post_303(self, s3_exists_mock):
def test_post_202(self, s3_exists_mock, export_collection_mock):
s3_exists_mock.return_value = False
response = self.client.post(
self.collection_v1.uri + 'export/',
self.collection_v1.uri + 'export/?includeRetired=False',
HTTP_AUTHORIZATION='Token ' + self.token,
format='json'
)

self.assertEqual(response.status_code, 202)
s3_exists_mock.assert_called_once_with(f"username/coll_v1.{self.v1_updated_at}.zip")
export_collection_mock.delay.assert_called_once_with(self.collection_v1.id)
export_collection_mock.delay.assert_called_once_with(self.collection_v1.id, False)

@patch('core.collections.views.export_collection')
@patch('core.common.services.S3.exists')
def test_post_409(self, s3_exists_mock, export_collection_mock):
s3_exists_mock.return_value = False
export_collection_mock.delay.side_effect = AlreadyQueued('already-queued')
response = self.client.post(
self.collection_v1.uri + 'export/',
self.collection_v1.uri + 'export/?includeRetired=False',
HTTP_AUTHORIZATION='Token ' + self.token,
format='json'
)

self.assertEqual(response.status_code, 409)
s3_exists_mock.assert_called_once_with(f"username/coll_v1.{self.v1_updated_at}.zip")
export_collection_mock.delay.assert_called_once_with(self.collection_v1.id)
export_collection_mock.delay.assert_called_once_with(self.collection_v1.id, False)


class CollectionVersionListViewTest(OCLAPITestCase):
Expand Down Expand Up @@ -999,6 +999,62 @@ def test_export_collection(self, s3_mock): # pylint: disable=too-many-locals
import shutil
shutil.rmtree(latest_temp_dir)

@patch('core.common.utils.S3')
def test_unretired_export_collection(self, s3_mock): # pylint: disable=too-many-locals
s3_mock.url_for = Mock(return_value='https://s3-url')
s3_mock.upload_file = Mock()
source = OrganizationSourceFactory()
concept1 = ConceptFactory(parent=source)
concept2 = ConceptFactory(parent=source)
mapping = MappingFactory(from_concept=concept2, to_concept=concept1, parent=source)
collection = OrganizationCollectionFactory()
collection.add_references([concept1.uri, concept2.uri, mapping.uri])
collection.refresh_from_db()

export_collection(collection.id, include_retired=False) # pylint: disable=no-value-for-parameter

latest_temp_dir = get_latest_dir_in_path('/tmp/')
zipped_file = zipfile.ZipFile(latest_temp_dir + '/export.zip')
exported_data = json.loads(zipped_file.read('export.json').decode('utf-8'))

self.assertEqual(
exported_data,
{**CollectionVersionExportSerializer(collection).data, 'concepts': ANY, 'mappings': ANY, 'references': ANY}
)

exported_concepts = exported_data['concepts']
expected_concepts = ConceptVersionExportSerializer(
[concept2.get_latest_version(), concept1.get_latest_version()], many=True
).data

self.assertEqual(len(exported_concepts), 2)
self.assertIn(expected_concepts[0], exported_concepts)
self.assertIn(expected_concepts[1], exported_concepts)

exported_mappings = exported_data['mappings']
expected_mappings = MappingDetailSerializer([mapping.get_latest_version()], many=True).data

self.assertEqual(len(exported_mappings), 1)
self.assertEqual(expected_mappings, exported_mappings)

exported_references = exported_data['references']
expected_references = CollectionReferenceSerializer(collection.references.all(), many=True).data

self.assertEqual(len(exported_references), 3)
self.assertIn(exported_references[0], expected_references)
self.assertIn(exported_references[1], expected_references)
self.assertIn(exported_references[2], expected_references)

s3_upload_key = collection.exclude_retired_export_path
s3_mock.upload_file.assert_called_once_with(
key=s3_upload_key, file_path=latest_temp_dir + '/export.zip', binary=True,
metadata={'ContentType': 'application/zip'}, headers={'content-type': 'application/zip'}
)
s3_mock.url_for.assert_called_once_with(s3_upload_key)

import shutil
shutil.rmtree(latest_temp_dir)


class CollectionConceptsViewTest(OCLAPITestCase):
def setUp(self):
Expand Down
55 changes: 51 additions & 4 deletions core/integration_tests/tests_sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -704,29 +704,29 @@ def test_post_303(self, s3_exists_mock):
def test_post_202(self, s3_exists_mock, export_source_mock):
s3_exists_mock.return_value = False
response = self.client.post(
self.source_v1.uri + 'export/',
self.source_v1.uri + 'export/?includeRetired=False',
HTTP_AUTHORIZATION='Token ' + self.token,
format='json'
)

self.assertEqual(response.status_code, 202)
s3_exists_mock.assert_called_once_with(f"username/source1_v1.{self.v1_updated_at}.zip")
export_source_mock.delay.assert_called_once_with(self.source_v1.id)
export_source_mock.delay.assert_called_once_with(self.source_v1.id, False)

@patch('core.sources.views.export_source')
@patch('core.common.services.S3.exists')
def test_post_409(self, s3_exists_mock, export_source_mock):
s3_exists_mock.return_value = False
export_source_mock.delay.side_effect = AlreadyQueued('already-queued')
response = self.client.post(
self.source_v1.uri + 'export/',
self.source_v1.uri + 'export/?includeRetired=False',
HTTP_AUTHORIZATION='Token ' + self.token,
format='json'
)

self.assertEqual(response.status_code, 409)
s3_exists_mock.assert_called_once_with(f"username/source1_v1.{self.v1_updated_at}.zip")
export_source_mock.delay.assert_called_once_with(self.source_v1.id)
export_source_mock.delay.assert_called_once_with(self.source_v1.id, False)


class ExportSourceTaskTest(OCLAPITestCase):
Expand Down Expand Up @@ -777,6 +777,53 @@ def test_export_source(self, s3_mock): # pylint: disable=too-many-locals
import shutil
shutil.rmtree(latest_temp_dir)

@patch('core.common.utils.S3')
def test_unretired_export_source(self, s3_mock): # pylint: disable=too-many-locals
s3_mock.url_for = Mock(return_value='https://s3-url')
s3_mock.upload_file = Mock()
source = OrganizationSourceFactory()
concept1 = ConceptFactory(parent=source)
concept2 = ConceptFactory(parent=source)
mapping = MappingFactory(from_concept=concept2, to_concept=concept1, parent=source)

source_v1 = OrganizationSourceFactory(mnemonic=source.mnemonic, organization=source.organization, version='v1')
concept1.sources.add(source_v1)
concept2.sources.add(source_v1)
mapping.sources.add(source_v1)

export_source(source_v1.id, include_retired=False) # pylint: disable=no-value-for-parameter

latest_temp_dir = get_latest_dir_in_path('/tmp/')
zipped_file = zipfile.ZipFile(latest_temp_dir + '/export.zip')
exported_data = json.loads(zipped_file.read('export.json').decode('utf-8'))

self.assertEqual(
exported_data, {**SourceVersionExportSerializer(source_v1).data, 'concepts': ANY, 'mappings': ANY}
)

exported_concepts = exported_data['concepts']
expected_concepts = ConceptVersionExportSerializer([concept2, concept1], many=True).data

self.assertEqual(len(exported_concepts), 2)
self.assertIn(expected_concepts[0], exported_concepts)
self.assertIn(expected_concepts[1], exported_concepts)

exported_mappings = exported_data['mappings']
expected_mappings = MappingDetailSerializer([mapping], many=True).data

self.assertEqual(len(exported_mappings), 1)
self.assertEqual(expected_mappings, exported_mappings)

s3_upload_key = source_v1.exclude_retired_export_path
s3_mock.upload_file.assert_called_once_with(
key=s3_upload_key, file_path=latest_temp_dir + '/export.zip', binary=True,
metadata={'ContentType': 'application/zip'}, headers={'content-type': 'application/zip'}
)
s3_mock.url_for.assert_called_once_with(s3_upload_key)

import shutil
shutil.rmtree(latest_temp_dir)


class SourceLogoViewTest(OCLAPITestCase):
def setUp(self):
Expand Down
5 changes: 3 additions & 2 deletions core/sources/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from rest_framework.response import Response

from core.client_configs.views import ResourceClientConfigsView
from core.common.constants import HEAD, RELEASED_PARAM, PROCESSING_PARAM, ACCESS_TYPE_NONE
from core.common.constants import HEAD, RELEASED_PARAM, PROCESSING_PARAM, ACCESS_TYPE_NONE, INCLUDE_RETIRED_PARAM
from core.common.mixins import ListWithHeadersMixin, ConceptDictionaryCreateMixin, ConceptDictionaryUpdateMixin, \
ConceptContainerExportMixin, ConceptContainerProcessingMixin, ConceptContainerExtraRetrieveUpdateDestroyView
from core.common.permissions import CanViewConceptDictionary, CanEditConceptDictionary, HasAccessToVersionedObject, \
Expand Down Expand Up @@ -392,7 +392,8 @@ class SourceVersionExportView(ConceptContainerExportMixin, SourceVersionBaseView
def handle_export_version(self):
version = self.get_object()
try:
export_source.delay(version.id)
include_retired = parse_boolean_query_param(self.request, INCLUDE_RETIRED_PARAM, "True")
export_source.delay(version.id, include_retired)
return status.HTTP_202_ACCEPTED
except AlreadyQueued:
return status.HTTP_409_CONFLICT
Expand Down