From 3490313e8a0b763eeb5017813573577d556ce700 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Thu, 11 Apr 2024 11:49:07 -0400 Subject: [PATCH 01/15] AlreadyExistsError; use for DataFrame in tiledbsoma.io --- apis/python/src/tiledbsoma/__init__.py | 3 +- apis/python/src/tiledbsoma/_collection.py | 26 ++++++++---- .../python/src/tiledbsoma/_common_nd_array.py | 16 +++++--- apis/python/src/tiledbsoma/_dataframe.py | 16 +++++--- apis/python/src/tiledbsoma/_exception.py | 40 +++++++++++++++++++ apis/python/src/tiledbsoma/io/ingest.py | 17 +++++--- 6 files changed, 93 insertions(+), 25 deletions(-) diff --git a/apis/python/src/tiledbsoma/__init__.py b/apis/python/src/tiledbsoma/__init__.py index 337aebce38..03a67e72a7 100644 --- a/apis/python/src/tiledbsoma/__init__.py +++ b/apis/python/src/tiledbsoma/__init__.py @@ -147,7 +147,7 @@ from ._constants import SOMA_JOINID from ._dataframe import DataFrame from ._dense_nd_array import DenseNDArray -from ._exception import DoesNotExistError, SOMAError +from ._exception import AlreadyExistsError, DoesNotExistError, SOMAError from ._experiment import Experiment from ._factory import open from ._general_utilities import ( @@ -171,6 +171,7 @@ __version__ = get_implementation_version() __all__ = [ + "AlreadyExistsError", "AxisColumnNames", "AxisQuery", "Collection", diff --git a/apis/python/src/tiledbsoma/_collection.py b/apis/python/src/tiledbsoma/_collection.py index d74bcb11e8..fe9f05a4f2 100644 --- a/apis/python/src/tiledbsoma/_collection.py +++ b/apis/python/src/tiledbsoma/_collection.py @@ -37,7 +37,12 @@ from ._common_nd_array import NDArray from ._dataframe import DataFrame from ._dense_nd_array import DenseNDArray -from ._exception import SOMAError, is_does_not_exist_error +from ._exception import ( + AlreadyExistsError, + SOMAError, + is_already_exists_error, + is_does_not_exist_error, +) from ._funcs import typeguard_ignore from ._sparse_nd_array import SparseNDArray from ._tiledb_object import AnyTileDBObject, TileDBObject @@ -119,13 +124,18 @@ def create( Experimental. """ context = _validate_soma_tiledb_context(context) - tiledb.group_create(uri=uri, ctx=context.tiledb_ctx) - handle = cls._wrapper_type.open(uri, "w", context, tiledb_timestamp) - cls._set_create_metadata(handle) - return cls( - handle, - _dont_call_this_use_create_or_open_instead="tiledbsoma-internal-code", - ) + try: + tiledb.group_create(uri=uri, ctx=context.tiledb_ctx) + handle = cls._wrapper_type.open(uri, "w", context, tiledb_timestamp) + cls._set_create_metadata(handle) + return cls( + handle, + _dont_call_this_use_create_or_open_instead="tiledbsoma-internal-code", + ) + except tiledb.TileDBError as tdbe: + if is_already_exists_error(tdbe): + raise AlreadyExistsError(f"{uri!r} already exists") + raise @classmethod def open( diff --git a/apis/python/src/tiledbsoma/_common_nd_array.py b/apis/python/src/tiledbsoma/_common_nd_array.py index 3f476de60f..c9c1f77eaf 100644 --- a/apis/python/src/tiledbsoma/_common_nd_array.py +++ b/apis/python/src/tiledbsoma/_common_nd_array.py @@ -16,6 +16,7 @@ import tiledb from . import _arrow_types, _util +from ._exception import AlreadyExistsError, is_already_exists_error from ._tiledb_array import TileDBArray from ._types import OpenTimestamp from .options._soma_tiledb_context import ( @@ -91,11 +92,16 @@ def create( context, is_sparse=cls.is_sparse, ) - handle = cls._create_internal(uri, schema, context, tiledb_timestamp) - return cls( - handle, - _dont_call_this_use_create_or_open_instead="tiledbsoma-internal-code", - ) + try: + handle = cls._create_internal(uri, schema, context, tiledb_timestamp) + return cls( + handle, + _dont_call_this_use_create_or_open_instead="tiledbsoma-internal-code", + ) + except tiledb.TileDBError as tdbe: + if is_already_exists_error(tdbe): + raise AlreadyExistsError(f"{uri!r} already exists") + raise @property def shape(self) -> Tuple[int, ...]: diff --git a/apis/python/src/tiledbsoma/_dataframe.py b/apis/python/src/tiledbsoma/_dataframe.py index 4daae22ec8..cac3fc49d1 100644 --- a/apis/python/src/tiledbsoma/_dataframe.py +++ b/apis/python/src/tiledbsoma/_dataframe.py @@ -20,6 +20,7 @@ from . import _arrow_types, _util from . import pytiledbsoma as clib from ._constants import SOMA_JOINID +from ._exception import AlreadyExistsError, is_already_exists_error from ._query_condition import QueryCondition from ._read_iters import TableReadIter from ._tdb_handles import DataFrameWrapper @@ -217,11 +218,16 @@ def create( TileDBCreateOptions.from_platform_config(platform_config), context, ) - handle = cls._create_internal(uri, tdb_schema, context, tiledb_timestamp) - return cls( - handle, - _dont_call_this_use_create_or_open_instead="tiledbsoma-internal-code", - ) + try: + handle = cls._create_internal(uri, tdb_schema, context, tiledb_timestamp) + return cls( + handle, + _dont_call_this_use_create_or_open_instead="tiledbsoma-internal-code", + ) + except tiledb.TileDBError as tdbe: + if is_already_exists_error(tdbe): + raise AlreadyExistsError(f"{uri!r} already exists") + raise def keys(self) -> Tuple[str, ...]: """Returns the names of the columns when read back as a dataframe. diff --git a/apis/python/src/tiledbsoma/_exception.py b/apis/python/src/tiledbsoma/_exception.py index 109a42e72c..853df1cc2e 100644 --- a/apis/python/src/tiledbsoma/_exception.py +++ b/apis/python/src/tiledbsoma/_exception.py @@ -55,6 +55,46 @@ def is_does_not_exist_error(e: tiledb.TileDBError) -> bool: return False +class AlreadyExistsError(SOMAError): + """Raised when attempting to create an already existing SOMA object. + + Lifecycle: experimental + """ + + pass + + +def is_already_exists_error(e: tiledb.TileDBError) -> bool: + """Given a TileDBError, return true if it indicates the object already exists + + Lifecycle: experimental + + Example: + XXX EDIT ME + try: + with tiledb.open(uri): + ... + except tiledb.TileDBError as e: + if is_does_not_exist_error(e): + ... + raise e + """ + stre = str(e) + # Local-disk/S3 does-not-exist exceptions say 'Group does not exist'; TileDB Cloud + # does-not-exist exceptions are worded less clearly. + if ( + "lready exists" + in stre + # XXX + # or "Unrecognized array" in stre + # or "HTTP code 401" in stre + # or "HTTP code 404" in stre + ): + return True + + return False + + def is_duplicate_group_key_error(e: tiledb.TileDBError) -> bool: """Given a TileDBError, return try if it indicates a duplicate member add request in a tiledb.Group. diff --git a/apis/python/src/tiledbsoma/io/ingest.py b/apis/python/src/tiledbsoma/io/ingest.py index 80f74c396c..eecbedaaca 100644 --- a/apis/python/src/tiledbsoma/io/ingest.py +++ b/apis/python/src/tiledbsoma/io/ingest.py @@ -58,7 +58,11 @@ from .._collection import AnyTileDBCollection, CollectionBase from .._common_nd_array import NDArray from .._constants import SOMA_JOINID -from .._exception import DoesNotExistError, SOMAError +from .._exception import ( + AlreadyExistsError, + DoesNotExistError, + SOMAError, +) from .._tdb_handles import RawHandle from .._tiledb_array import TileDBArray from .._tiledb_object import AnyTileDBObject, TileDBObject @@ -1194,15 +1198,18 @@ def _write_dataframe_impl( ) try: - soma_df = _factory.open(df_uri, "w", soma_type=DataFrame, context=context) - except DoesNotExistError: soma_df = DataFrame.create( df_uri, schema=arrow_table.schema, platform_config=platform_config, context=context, ) - else: + except AlreadyExistsError: + if ingestion_params.error_if_already_exists: + raise SOMAError(f"{soma_df.uri} already exists") + + soma_df = _factory.open(df_uri, "w", soma_type=DataFrame, context=context) + if ingestion_params.skip_existing_nonempty_domain: storage_ned = _read_nonempty_domain(soma_df) dim_range = ((int(df.index.min()), int(df.index.max())),) @@ -1212,8 +1219,6 @@ def _write_dataframe_impl( _util.format_elapsed(s, f"SKIPPED {soma_df.uri}"), ) return soma_df - elif ingestion_params.error_if_already_exists: - raise SOMAError(f"{soma_df.uri} already exists") if ingestion_params.write_schema_no_data: logging.log_io( From dd6f2eb94d0e75e31034e011f08d90ed51f57331 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Thu, 11 Apr 2024 11:52:00 -0400 Subject: [PATCH 02/15] apply in `_create_or_open_collection` --- apis/python/src/tiledbsoma/io/ingest.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/apis/python/src/tiledbsoma/io/ingest.py b/apis/python/src/tiledbsoma/io/ingest.py index eecbedaaca..52c6fc4f08 100644 --- a/apis/python/src/tiledbsoma/io/ingest.py +++ b/apis/python/src/tiledbsoma/io/ingest.py @@ -987,18 +987,15 @@ def _create_or_open_collection( context: Optional[SOMATileDBContext], additional_metadata: AdditionalMetadata = None, ) -> CollectionBase[_TDBO]: + try: - thing = cls.open(uri, "w", context=context) - except DoesNotExistError: - pass # This is always OK; make a new one. - else: + coll = cls.create(uri, context=context) + except AlreadyExistsError: # It already exists. Are we resuming? if ingestion_params.error_if_already_exists: raise SOMAError(f"{uri} already exists") - add_metadata(thing, additional_metadata) - return thing + coll = cls.open(uri, "w", context=context) - coll = cls.create(uri, context=context) add_metadata(coll, additional_metadata) return coll From 70e2dd928854f1a995bf77376574502a9c8172bf Mon Sep 17 00:00:00 2001 From: John Kerl Date: Thu, 11 Apr 2024 11:53:00 -0400 Subject: [PATCH 03/15] apply in _create_from_matrix --- apis/python/src/tiledbsoma/io/ingest.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/apis/python/src/tiledbsoma/io/ingest.py b/apis/python/src/tiledbsoma/io/ingest.py index 52c6fc4f08..a008960e44 100644 --- a/apis/python/src/tiledbsoma/io/ingest.py +++ b/apis/python/src/tiledbsoma/io/ingest.py @@ -1293,10 +1293,6 @@ def _create_from_matrix( logging.log_io(None, f"START WRITING {uri}") try: - soma_ndarray = cls.open( - uri, "w", platform_config=platform_config, context=context - ) - except DoesNotExistError: # A SparseNDArray must be appendable in soma.io. shape = [None for _ in matrix.shape] if cls.is_sparse else matrix.shape soma_ndarray = cls.create( @@ -1306,9 +1302,12 @@ def _create_from_matrix( platform_config=platform_config, context=context, ) - else: + except AlreadyExistsError: if ingestion_params.error_if_already_exists: raise SOMAError(f"{soma_ndarray.uri} already exists") + soma_ndarray = cls.open( + uri, "w", platform_config=platform_config, context=context + ) if ingestion_params.write_schema_no_data: logging.log_io( From add8422837e2598a7073bdeb1dbd1e47bf3d1d47 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Thu, 11 Apr 2024 11:53:36 -0400 Subject: [PATCH 04/15] apply in _ingest_uns_ndarray --- apis/python/src/tiledbsoma/io/ingest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apis/python/src/tiledbsoma/io/ingest.py b/apis/python/src/tiledbsoma/io/ingest.py index a008960e44..aef011f998 100644 --- a/apis/python/src/tiledbsoma/io/ingest.py +++ b/apis/python/src/tiledbsoma/io/ingest.py @@ -2750,8 +2750,6 @@ def _ingest_uns_ndarray( logging.log_io(msg, msg) return try: - soma_arr = _factory.open(arr_uri, "w", soma_type=DenseNDArray, context=context) - except DoesNotExistError: soma_arr = DenseNDArray.create( arr_uri, type=pa_dtype, @@ -2759,6 +2757,8 @@ def _ingest_uns_ndarray( platform_config=platform_config, context=context, ) + except AlreadyExistsError: + soma_arr = _factory.open(arr_uri, "w", soma_type=DenseNDArray, context=context) # If resume mode: don't re-write existing data. This is the user's explicit request # that we not re-write things that have already been written. From e6c1829a493912b0d0f3883d2c14f97c0cac3fdd Mon Sep 17 00:00:00 2001 From: John Kerl Date: Thu, 11 Apr 2024 17:35:07 +0000 Subject: [PATCH 05/15] lint --- apis/python/src/tiledbsoma/io/ingest.py | 1 - 1 file changed, 1 deletion(-) diff --git a/apis/python/src/tiledbsoma/io/ingest.py b/apis/python/src/tiledbsoma/io/ingest.py index aef011f998..7863da60f4 100644 --- a/apis/python/src/tiledbsoma/io/ingest.py +++ b/apis/python/src/tiledbsoma/io/ingest.py @@ -987,7 +987,6 @@ def _create_or_open_collection( context: Optional[SOMATileDBContext], additional_metadata: AdditionalMetadata = None, ) -> CollectionBase[_TDBO]: - try: coll = cls.create(uri, context=context) except AlreadyExistsError: From 7c76c4a1b6159f4f09b70384a560727bf83556a0 Mon Sep 17 00:00:00 2001 From: John Blischak Date: Thu, 11 Apr 2024 16:42:00 -0400 Subject: [PATCH 06/15] Run SO copying workflow on macos-13 to avoid SIP (#2435) --- .github/workflows/python-so-copying.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/python-so-copying.yml b/.github/workflows/python-so-copying.yml index 2d0d49bedf..2370e6fd0a 100644 --- a/.github/workflows/python-so-copying.yml +++ b/.github/workflows/python-so-copying.yml @@ -139,7 +139,7 @@ jobs: ./venv-soma/bin/python -c "import tiledbsoma; print(tiledbsoma.pytiledbsoma.version())" macos: - runs-on: macos-12 + runs-on: macos-13 name: "macos TILEDB_EXISTS: ${{ matrix.TILEDB_EXISTS }} TILEDBSOMA_EXISTS: ${{ matrix.TILEDBSOMA_EXISTS }}" strategy: fail-fast: false @@ -153,6 +153,8 @@ jobs: - uses: actions/checkout@v4 with: fetch-depth: 0 # for setuptools-scm + - name: Check if System Integrity Protection (SIP) is enabled + run: csrutil status - name: Install pre-built libtiledb if: ${{ matrix.TILEDB_EXISTS == 'yes' }} run: | From 0244f80a34be0308d697b303194aea4348bbdf8b Mon Sep 17 00:00:00 2001 From: John Kerl Date: Thu, 11 Apr 2024 11:49:07 -0400 Subject: [PATCH 07/15] AlreadyExistsError; use for DataFrame in tiledbsoma.io --- apis/python/src/tiledbsoma/__init__.py | 3 +- apis/python/src/tiledbsoma/_collection.py | 26 ++++++++---- .../python/src/tiledbsoma/_common_nd_array.py | 16 +++++--- apis/python/src/tiledbsoma/_dataframe.py | 16 +++++--- apis/python/src/tiledbsoma/_exception.py | 40 +++++++++++++++++++ apis/python/src/tiledbsoma/io/ingest.py | 17 +++++--- 6 files changed, 93 insertions(+), 25 deletions(-) diff --git a/apis/python/src/tiledbsoma/__init__.py b/apis/python/src/tiledbsoma/__init__.py index 337aebce38..03a67e72a7 100644 --- a/apis/python/src/tiledbsoma/__init__.py +++ b/apis/python/src/tiledbsoma/__init__.py @@ -147,7 +147,7 @@ from ._constants import SOMA_JOINID from ._dataframe import DataFrame from ._dense_nd_array import DenseNDArray -from ._exception import DoesNotExistError, SOMAError +from ._exception import AlreadyExistsError, DoesNotExistError, SOMAError from ._experiment import Experiment from ._factory import open from ._general_utilities import ( @@ -171,6 +171,7 @@ __version__ = get_implementation_version() __all__ = [ + "AlreadyExistsError", "AxisColumnNames", "AxisQuery", "Collection", diff --git a/apis/python/src/tiledbsoma/_collection.py b/apis/python/src/tiledbsoma/_collection.py index d74bcb11e8..fe9f05a4f2 100644 --- a/apis/python/src/tiledbsoma/_collection.py +++ b/apis/python/src/tiledbsoma/_collection.py @@ -37,7 +37,12 @@ from ._common_nd_array import NDArray from ._dataframe import DataFrame from ._dense_nd_array import DenseNDArray -from ._exception import SOMAError, is_does_not_exist_error +from ._exception import ( + AlreadyExistsError, + SOMAError, + is_already_exists_error, + is_does_not_exist_error, +) from ._funcs import typeguard_ignore from ._sparse_nd_array import SparseNDArray from ._tiledb_object import AnyTileDBObject, TileDBObject @@ -119,13 +124,18 @@ def create( Experimental. """ context = _validate_soma_tiledb_context(context) - tiledb.group_create(uri=uri, ctx=context.tiledb_ctx) - handle = cls._wrapper_type.open(uri, "w", context, tiledb_timestamp) - cls._set_create_metadata(handle) - return cls( - handle, - _dont_call_this_use_create_or_open_instead="tiledbsoma-internal-code", - ) + try: + tiledb.group_create(uri=uri, ctx=context.tiledb_ctx) + handle = cls._wrapper_type.open(uri, "w", context, tiledb_timestamp) + cls._set_create_metadata(handle) + return cls( + handle, + _dont_call_this_use_create_or_open_instead="tiledbsoma-internal-code", + ) + except tiledb.TileDBError as tdbe: + if is_already_exists_error(tdbe): + raise AlreadyExistsError(f"{uri!r} already exists") + raise @classmethod def open( diff --git a/apis/python/src/tiledbsoma/_common_nd_array.py b/apis/python/src/tiledbsoma/_common_nd_array.py index 3f476de60f..c9c1f77eaf 100644 --- a/apis/python/src/tiledbsoma/_common_nd_array.py +++ b/apis/python/src/tiledbsoma/_common_nd_array.py @@ -16,6 +16,7 @@ import tiledb from . import _arrow_types, _util +from ._exception import AlreadyExistsError, is_already_exists_error from ._tiledb_array import TileDBArray from ._types import OpenTimestamp from .options._soma_tiledb_context import ( @@ -91,11 +92,16 @@ def create( context, is_sparse=cls.is_sparse, ) - handle = cls._create_internal(uri, schema, context, tiledb_timestamp) - return cls( - handle, - _dont_call_this_use_create_or_open_instead="tiledbsoma-internal-code", - ) + try: + handle = cls._create_internal(uri, schema, context, tiledb_timestamp) + return cls( + handle, + _dont_call_this_use_create_or_open_instead="tiledbsoma-internal-code", + ) + except tiledb.TileDBError as tdbe: + if is_already_exists_error(tdbe): + raise AlreadyExistsError(f"{uri!r} already exists") + raise @property def shape(self) -> Tuple[int, ...]: diff --git a/apis/python/src/tiledbsoma/_dataframe.py b/apis/python/src/tiledbsoma/_dataframe.py index 4daae22ec8..cac3fc49d1 100644 --- a/apis/python/src/tiledbsoma/_dataframe.py +++ b/apis/python/src/tiledbsoma/_dataframe.py @@ -20,6 +20,7 @@ from . import _arrow_types, _util from . import pytiledbsoma as clib from ._constants import SOMA_JOINID +from ._exception import AlreadyExistsError, is_already_exists_error from ._query_condition import QueryCondition from ._read_iters import TableReadIter from ._tdb_handles import DataFrameWrapper @@ -217,11 +218,16 @@ def create( TileDBCreateOptions.from_platform_config(platform_config), context, ) - handle = cls._create_internal(uri, tdb_schema, context, tiledb_timestamp) - return cls( - handle, - _dont_call_this_use_create_or_open_instead="tiledbsoma-internal-code", - ) + try: + handle = cls._create_internal(uri, tdb_schema, context, tiledb_timestamp) + return cls( + handle, + _dont_call_this_use_create_or_open_instead="tiledbsoma-internal-code", + ) + except tiledb.TileDBError as tdbe: + if is_already_exists_error(tdbe): + raise AlreadyExistsError(f"{uri!r} already exists") + raise def keys(self) -> Tuple[str, ...]: """Returns the names of the columns when read back as a dataframe. diff --git a/apis/python/src/tiledbsoma/_exception.py b/apis/python/src/tiledbsoma/_exception.py index 109a42e72c..853df1cc2e 100644 --- a/apis/python/src/tiledbsoma/_exception.py +++ b/apis/python/src/tiledbsoma/_exception.py @@ -55,6 +55,46 @@ def is_does_not_exist_error(e: tiledb.TileDBError) -> bool: return False +class AlreadyExistsError(SOMAError): + """Raised when attempting to create an already existing SOMA object. + + Lifecycle: experimental + """ + + pass + + +def is_already_exists_error(e: tiledb.TileDBError) -> bool: + """Given a TileDBError, return true if it indicates the object already exists + + Lifecycle: experimental + + Example: + XXX EDIT ME + try: + with tiledb.open(uri): + ... + except tiledb.TileDBError as e: + if is_does_not_exist_error(e): + ... + raise e + """ + stre = str(e) + # Local-disk/S3 does-not-exist exceptions say 'Group does not exist'; TileDB Cloud + # does-not-exist exceptions are worded less clearly. + if ( + "lready exists" + in stre + # XXX + # or "Unrecognized array" in stre + # or "HTTP code 401" in stre + # or "HTTP code 404" in stre + ): + return True + + return False + + def is_duplicate_group_key_error(e: tiledb.TileDBError) -> bool: """Given a TileDBError, return try if it indicates a duplicate member add request in a tiledb.Group. diff --git a/apis/python/src/tiledbsoma/io/ingest.py b/apis/python/src/tiledbsoma/io/ingest.py index 80f74c396c..eecbedaaca 100644 --- a/apis/python/src/tiledbsoma/io/ingest.py +++ b/apis/python/src/tiledbsoma/io/ingest.py @@ -58,7 +58,11 @@ from .._collection import AnyTileDBCollection, CollectionBase from .._common_nd_array import NDArray from .._constants import SOMA_JOINID -from .._exception import DoesNotExistError, SOMAError +from .._exception import ( + AlreadyExistsError, + DoesNotExistError, + SOMAError, +) from .._tdb_handles import RawHandle from .._tiledb_array import TileDBArray from .._tiledb_object import AnyTileDBObject, TileDBObject @@ -1194,15 +1198,18 @@ def _write_dataframe_impl( ) try: - soma_df = _factory.open(df_uri, "w", soma_type=DataFrame, context=context) - except DoesNotExistError: soma_df = DataFrame.create( df_uri, schema=arrow_table.schema, platform_config=platform_config, context=context, ) - else: + except AlreadyExistsError: + if ingestion_params.error_if_already_exists: + raise SOMAError(f"{soma_df.uri} already exists") + + soma_df = _factory.open(df_uri, "w", soma_type=DataFrame, context=context) + if ingestion_params.skip_existing_nonempty_domain: storage_ned = _read_nonempty_domain(soma_df) dim_range = ((int(df.index.min()), int(df.index.max())),) @@ -1212,8 +1219,6 @@ def _write_dataframe_impl( _util.format_elapsed(s, f"SKIPPED {soma_df.uri}"), ) return soma_df - elif ingestion_params.error_if_already_exists: - raise SOMAError(f"{soma_df.uri} already exists") if ingestion_params.write_schema_no_data: logging.log_io( From d0566757333ed4cc4c04f76a8e3ba933e95d522a Mon Sep 17 00:00:00 2001 From: John Kerl Date: Thu, 11 Apr 2024 11:52:00 -0400 Subject: [PATCH 08/15] apply in `_create_or_open_collection` --- apis/python/src/tiledbsoma/io/ingest.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/apis/python/src/tiledbsoma/io/ingest.py b/apis/python/src/tiledbsoma/io/ingest.py index eecbedaaca..52c6fc4f08 100644 --- a/apis/python/src/tiledbsoma/io/ingest.py +++ b/apis/python/src/tiledbsoma/io/ingest.py @@ -987,18 +987,15 @@ def _create_or_open_collection( context: Optional[SOMATileDBContext], additional_metadata: AdditionalMetadata = None, ) -> CollectionBase[_TDBO]: + try: - thing = cls.open(uri, "w", context=context) - except DoesNotExistError: - pass # This is always OK; make a new one. - else: + coll = cls.create(uri, context=context) + except AlreadyExistsError: # It already exists. Are we resuming? if ingestion_params.error_if_already_exists: raise SOMAError(f"{uri} already exists") - add_metadata(thing, additional_metadata) - return thing + coll = cls.open(uri, "w", context=context) - coll = cls.create(uri, context=context) add_metadata(coll, additional_metadata) return coll From ff1b6eec4253ef7dde1474acf75e6d254a1ac6ee Mon Sep 17 00:00:00 2001 From: John Kerl Date: Thu, 11 Apr 2024 11:53:00 -0400 Subject: [PATCH 09/15] apply in _create_from_matrix --- apis/python/src/tiledbsoma/io/ingest.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/apis/python/src/tiledbsoma/io/ingest.py b/apis/python/src/tiledbsoma/io/ingest.py index 52c6fc4f08..a008960e44 100644 --- a/apis/python/src/tiledbsoma/io/ingest.py +++ b/apis/python/src/tiledbsoma/io/ingest.py @@ -1293,10 +1293,6 @@ def _create_from_matrix( logging.log_io(None, f"START WRITING {uri}") try: - soma_ndarray = cls.open( - uri, "w", platform_config=platform_config, context=context - ) - except DoesNotExistError: # A SparseNDArray must be appendable in soma.io. shape = [None for _ in matrix.shape] if cls.is_sparse else matrix.shape soma_ndarray = cls.create( @@ -1306,9 +1302,12 @@ def _create_from_matrix( platform_config=platform_config, context=context, ) - else: + except AlreadyExistsError: if ingestion_params.error_if_already_exists: raise SOMAError(f"{soma_ndarray.uri} already exists") + soma_ndarray = cls.open( + uri, "w", platform_config=platform_config, context=context + ) if ingestion_params.write_schema_no_data: logging.log_io( From 8befd731fbf14d7c2a4ecb6ead1d75c3240b0f50 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Thu, 11 Apr 2024 11:53:36 -0400 Subject: [PATCH 10/15] apply in _ingest_uns_ndarray --- apis/python/src/tiledbsoma/io/ingest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apis/python/src/tiledbsoma/io/ingest.py b/apis/python/src/tiledbsoma/io/ingest.py index a008960e44..aef011f998 100644 --- a/apis/python/src/tiledbsoma/io/ingest.py +++ b/apis/python/src/tiledbsoma/io/ingest.py @@ -2750,8 +2750,6 @@ def _ingest_uns_ndarray( logging.log_io(msg, msg) return try: - soma_arr = _factory.open(arr_uri, "w", soma_type=DenseNDArray, context=context) - except DoesNotExistError: soma_arr = DenseNDArray.create( arr_uri, type=pa_dtype, @@ -2759,6 +2757,8 @@ def _ingest_uns_ndarray( platform_config=platform_config, context=context, ) + except AlreadyExistsError: + soma_arr = _factory.open(arr_uri, "w", soma_type=DenseNDArray, context=context) # If resume mode: don't re-write existing data. This is the user's explicit request # that we not re-write things that have already been written. From a854b701ede1c2299ef1cb33da384e4931ae7173 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Thu, 11 Apr 2024 17:35:07 +0000 Subject: [PATCH 11/15] lint --- apis/python/src/tiledbsoma/io/ingest.py | 1 - 1 file changed, 1 deletion(-) diff --git a/apis/python/src/tiledbsoma/io/ingest.py b/apis/python/src/tiledbsoma/io/ingest.py index aef011f998..7863da60f4 100644 --- a/apis/python/src/tiledbsoma/io/ingest.py +++ b/apis/python/src/tiledbsoma/io/ingest.py @@ -987,7 +987,6 @@ def _create_or_open_collection( context: Optional[SOMATileDBContext], additional_metadata: AdditionalMetadata = None, ) -> CollectionBase[_TDBO]: - try: coll = cls.create(uri, context=context) except AlreadyExistsError: From ad10444fd393f3985337049de22dfbe823aa1d31 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 12 Apr 2024 10:06:17 -0400 Subject: [PATCH 12/15] neaten --- apis/python/src/tiledbsoma/_exception.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/apis/python/src/tiledbsoma/_exception.py b/apis/python/src/tiledbsoma/_exception.py index 853df1cc2e..f66a556252 100644 --- a/apis/python/src/tiledbsoma/_exception.py +++ b/apis/python/src/tiledbsoma/_exception.py @@ -70,12 +70,11 @@ def is_already_exists_error(e: tiledb.TileDBError) -> bool: Lifecycle: experimental Example: - XXX EDIT ME try: - with tiledb.open(uri): + tiledb.Array.create(uri, schema, ctx=ctx) ... except tiledb.TileDBError as e: - if is_does_not_exist_error(e): + if is_already_exists_error(e): ... raise e """ From ec008d2e621c127c94e2fbea502de4d346a5ba81 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 12 Apr 2024 10:15:37 -0400 Subject: [PATCH 13/15] neaten --- apis/python/src/tiledbsoma/_exception.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/apis/python/src/tiledbsoma/_exception.py b/apis/python/src/tiledbsoma/_exception.py index f66a556252..3439515d50 100644 --- a/apis/python/src/tiledbsoma/_exception.py +++ b/apis/python/src/tiledbsoma/_exception.py @@ -79,16 +79,10 @@ def is_already_exists_error(e: tiledb.TileDBError) -> bool: raise e """ stre = str(e) - # Local-disk/S3 does-not-exist exceptions say 'Group does not exist'; TileDB Cloud - # does-not-exist exceptions are worded less clearly. - if ( - "lready exists" - in stre - # XXX - # or "Unrecognized array" in stre - # or "HTTP code 401" in stre - # or "HTTP code 404" in stre - ): + # Local-disk, S3, and TileDB Cloud exceptions all have the substring + # "already exists". Here we lower-case the exception message just + # in case someone ever uppercases it on the other end. + if "already exists" in stre.lower(): return True return False From 028f271576a4090857f1986d9ad108324a46b426 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 12 Apr 2024 10:19:54 -0400 Subject: [PATCH 14/15] Update raises-notes --- apis/python/src/tiledbsoma/_collection.py | 2 ++ apis/python/src/tiledbsoma/_common_nd_array.py | 2 ++ apis/python/src/tiledbsoma/_dataframe.py | 2 ++ 3 files changed, 6 insertions(+) diff --git a/apis/python/src/tiledbsoma/_collection.py b/apis/python/src/tiledbsoma/_collection.py index fe9f05a4f2..922e2f9817 100644 --- a/apis/python/src/tiledbsoma/_collection.py +++ b/apis/python/src/tiledbsoma/_collection.py @@ -117,6 +117,8 @@ def create( the context. Raises: + tiledbsoma.AlreadyExistsError: + If the underlying object already exists at the given URI. TileDBError: If unable to create the underlying object. diff --git a/apis/python/src/tiledbsoma/_common_nd_array.py b/apis/python/src/tiledbsoma/_common_nd_array.py index c9c1f77eaf..adc6e8c94a 100644 --- a/apis/python/src/tiledbsoma/_common_nd_array.py +++ b/apis/python/src/tiledbsoma/_common_nd_array.py @@ -78,6 +78,8 @@ def create( If the ``type`` is unsupported. ValueError: If the ``shape`` is unsupported. + tiledbsoma.AlreadyExistsError: + If the underlying object already exists at the given URI. TileDBError: If unable to create the underlying object. diff --git a/apis/python/src/tiledbsoma/_dataframe.py b/apis/python/src/tiledbsoma/_dataframe.py index cac3fc49d1..76c76cb691 100644 --- a/apis/python/src/tiledbsoma/_dataframe.py +++ b/apis/python/src/tiledbsoma/_dataframe.py @@ -188,6 +188,8 @@ def create( an undefined column name. ValueError: If the ``schema`` specifies illegal column names. + tiledbsoma.AlreadyExistsError: + If the underlying object already exists at the given URI. TileDBError: If unable to create the underlying object. From 746a2cd6e16ea6b2387e15fa2a9acf793d97a02c Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 12 Apr 2024 11:52:52 -0400 Subject: [PATCH 15/15] code-review feedback Co-authored-by: nguyenv --- apis/python/src/tiledbsoma/_exception.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/apis/python/src/tiledbsoma/_exception.py b/apis/python/src/tiledbsoma/_exception.py index 3439515d50..10a024855d 100644 --- a/apis/python/src/tiledbsoma/_exception.py +++ b/apis/python/src/tiledbsoma/_exception.py @@ -82,10 +82,7 @@ def is_already_exists_error(e: tiledb.TileDBError) -> bool: # Local-disk, S3, and TileDB Cloud exceptions all have the substring # "already exists". Here we lower-case the exception message just # in case someone ever uppercases it on the other end. - if "already exists" in stre.lower(): - return True - - return False + return "already exists" in stre.lower() def is_duplicate_group_key_error(e: tiledb.TileDBError) -> bool: