Skip to content

Commit

Permalink
Proposed delete_rows function for the table
Browse files Browse the repository at this point in the history
  • Loading branch information
BryanFauble committed Jan 9, 2025
1 parent df0dc3c commit 5dfa4c5
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 36 deletions.
2 changes: 0 additions & 2 deletions synapseclient/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
Column,
ColumnType,
FacetType,
Row,
RowsetResultFormat,
Table,
)
Expand All @@ -32,7 +31,6 @@
"FacetType",
"CsvResultFormat",
"RowsetResultFormat",
"Row",
"Team",
"TeamMember",
"UserProfile",
Expand Down
34 changes: 26 additions & 8 deletions synapseclient/models/protocols/table_protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
if TYPE_CHECKING:
from synapseclient.models.table import (
ColumnExpansionStrategy,
Row,
SchemaStorageStrategy,
Table,
)
Expand Down Expand Up @@ -111,23 +110,42 @@ def store_rows(
"""
return None

@staticmethod
def delete_rows(
self, rows: List["Row"], *, synapse_client: Optional[Synapse] = None
) -> None:
"""Delete rows from a table.
query: str, table_id: str, *, synapse_client: Optional[Synapse] = None
) -> pd.DataFrame:
"""
Delete rows from a table given a query to select rows. The query at a
minimum must select the `ROW_ID` and `ROW_VERSION` columns. If you want to
inspect the data that will be deleted ahead of time you may use the
`.query` method to get the data.
Arguments:
rows: The rows to delete.
query: The query to select the rows to delete. The query at a minimum
must select the `ROW_ID` and `ROW_VERSION` columns. See this document
that describes the expected syntax of the query:
<https://rest-docs.synapse.org/rest/org/sagebionetworks/repo/web/controller/TableExamples.html>
table_id: The ID of the table to delete rows from.
synapse_client: If not passed in and caching was not disabled by
`Synapse.allow_client_caching(False)` this will use the last created
instance from the Synapse class constructor.
Returns:
None
The results of your query for the rows that were deleted from the table.
TODO: Add example of how to delete rows
Example: Selecting a row to delete
This example shows how you may select a row to delete from a table.
from synapseclient import Synapse
from synapseclient.models import Table
syn = Synapse()
syn.login()
Table.delete_rows(query="SELECT ROW_ID, ROW_VERSION FROM syn1234 WHERE foo = 'asdf'")
"""
return None
return pd.DataFrame()

def get(
self,
Expand Down
64 changes: 38 additions & 26 deletions synapseclient/models/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,20 +196,6 @@ class JsonSubColumn:
treated as a facet"""


@dataclass()
class Row:
# TODO: We will want to restrict the typing here
values: Optional[List[Any]] = None
"""The values for each column of this row."""

row_id: Optional[int] = None
"""The immutable ID issued to a new row."""

version_number: Optional[int] = None
"""The version number of this row. Each row version is immutable, so when a
row is updated a new version is created."""


@dataclass()
@async_to_sync
class Column(ColumnSynchronousProtocol):
Expand Down Expand Up @@ -952,38 +938,64 @@ async def store_rows_async(

raise NotImplementedError("This method is not yet implemented")

# TODO: Refactor this function as it's really rough to work with in it's current implementation
@otel_trace_method(
method_to_trace_name=lambda self, **kwargs: f"Delete_rows: {self.name}"
)
@staticmethod
async def delete_rows_async(
self, rows: List[Row], *, synapse_client: Optional[Synapse] = None
) -> None:
"""Delete rows from a table.
query: str, table_id: str, *, synapse_client: Optional[Synapse] = None
) -> pd.DataFrame:
"""
Delete rows from a table given a query to select rows. The query at a
minimum must select the `ROW_ID` and `ROW_VERSION` columns. If you want to
inspect the data that will be deleted ahead of time you may use the
`.query` method to get the data.
Arguments:
rows: The rows to delete.
query: The query to select the rows to delete. The query at a minimum
must select the `ROW_ID` and `ROW_VERSION` columns. See this document
that describes the expected syntax of the query:
<https://rest-docs.synapse.org/rest/org/sagebionetworks/repo/web/controller/TableExamples.html>
table_id: The ID of the table to delete rows from.
synapse_client: If not passed in and caching was not disabled by
`Synapse.allow_client_caching(False)` this will use the last created
instance from the Synapse class constructor.
Returns:
None
The results of your query for the rows that were deleted from the table.
Example: Selecting a row to delete
This example shows how you may select a row to delete from a table.
TODO: Add example of how to delete rows
import asyncio
from synapseclient import Synapse
from synapseclient.models import Table
syn = Synapse()
syn.login()
async def main():
await Table.delete_rows_async(query="SELECT ROW_ID, ROW_VERSION FROM syn1234 WHERE foo = 'asdf'")
asyncio.run(main())
"""
client = Synapse.get_client(synapse_client=synapse_client)
results_from_query = await Table.query_async(query=query, synapse_client=client)
client.logger.info(
f"Found {len(results_from_query)} rows to delete for given query: {query}"
)

rows_to_delete = []
for row in rows:
for row in results_from_query:
rows_to_delete.append([row.row_id, row.version_number])
loop = asyncio.get_event_loop()
await loop.run_in_executor(
None,
lambda: delete_rows(
syn=Synapse.get_client(synapse_client=synapse_client),
table_id=self.id,
table_id=table_id,
row_id_vers_list=rows_to_delete,
),
)
return results_from_query

@otel_trace_method(
method_to_trace_name=lambda self, **kwargs: f"Table_Store: {self.name}"
Expand Down

0 comments on commit 5dfa4c5

Please sign in to comment.