Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYNPY-1551] Tables refactor #1149

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions docs/reference/oop/table_refactor.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
Contained within this file are proposed changes for interacting with Tables via this
client.



::: synapseclient.models.Table
options:
inherited_members: true
members:
- get
- store
- delete
- query
- store_rows
- delete_rows
- delete_column
- add_column
- reorder_column
- set_columns
- get_permissions
- get_acl
- set_permissions

::: synapseclient.models.FacetType
::: synapseclient.models.ColumnType
::: synapseclient.models.table.JsonSubColumn

::: synapseclient.models.Column
options:
members:
7 changes: 5 additions & 2 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,9 @@ nav:
- Core: reference/core.md
- REST Apis: reference/rest_apis.md
- Experimental:
- Object-Orientated Models: reference/oop/models.md
- Async Object-Orientated Models: reference/oop/models_async.md
- Object-Orientated Models: reference/oop/models.md
- Async Object-Orientated Models: reference/oop/models_async.md
- Table refactor: reference/oop/table_refactor.md
- Further Reading:
- Home: explanations/home.md
- Domain Models of Synapse: explanations/domain_models_of_synapse.md
Expand Down Expand Up @@ -120,6 +121,8 @@ theme:
- toc.follow
- navigation.tabs
- navigation.tabs.sticky
- navigation.instant
- navigation.instant.progress

extra_css:
- css/custom.css
Expand Down
3 changes: 3 additions & 0 deletions synapseclient/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
put_file_multipart_add,
put_file_multipart_complete,
)
from .table_services import get_columns

__all__ = [
# annotations
Expand Down Expand Up @@ -78,4 +79,6 @@
"get_transfer_config",
# entity_factory
"get_from_entity_factory",
# columns
"get_columns",
]
93 changes: 93 additions & 0 deletions synapseclient/api/table_services.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
"""
The purpose of this module is to provide any functions that are needed to interact with
columns in the Synapse REST API.
"""

from typing import TYPE_CHECKING, List, Optional

if TYPE_CHECKING:
from synapseclient import Synapse
from synapseclient.models import Column


async def get_columns(
table_id: str,
*,
synapse_client: Optional["Synapse"] = None,
) -> List["Column"]:
"""Call to synapse and set the annotations for the given input.

Arguments:
table_id: The ID of the Table to get the columns for.
synapse_client: If not passed in and caching was not disabled by
`Synapse.allow_client_caching(False)` this will use the last created
instance from the Synapse class constructor.

Returns: The annotations set in Synapse.
"""
from synapseclient import Synapse
from synapseclient.models import Column

result = await Synapse.get_client(synapse_client=synapse_client).rest_get_async(
f"/entity/{table_id}/column",
)

columns = []

for column in result.get("results", []):
columns.append(Column().fill_from_dict(synapse_column=column))

return columns


# TODO: Finish this function, this was copied out of the Synapse class and will be used to implement this API: https://rest-docs.synapse.org/rest/org/sagebionetworks/repo/model/table/TableSchemaChangeRequest.html
# async def table_updates(
# self,
# table_id: str,
# changes: List[dict] = [],
# create_snapshot: bool = False,
# comment: str = None,
# label: str = None,
# activity: str = None,
# wait: bool = True,
# ) -> dict:
# """
# Creates view updates and snapshots

# Arguments:
# table: The schema of the EntityView or its ID.
# changes: Array of Table changes
# create_snapshot: Create snapshot
# comment: Optional snapshot comment.
# label: Optional snapshot label.
# activity: Optional activity ID applied to snapshot version.
# wait: True to wait for async table update to complete

# Returns:
# A Snapshot Response
# """
# snapshot_options = {
# "snapshotComment": comment,
# "snapshotLabel": label,
# "snapshotActivityId": activity,
# }
# new_snapshot = {
# key: value for key, value in snapshot_options.items() if value is not None
# }
# table_update_body = {
# "changes": changes,
# "createSnapshot": create_snapshot,
# "snapshotOptions": new_snapshot,
# }

# uri = "/entity/{}/table/transaction/async".format(id_of(table))

# if wait:
# result = self._waitForAsync(uri, table_update_body)

# else:
# result = self.restPOST(
# "{}/start".format(uri), body=json.dumps(table_update_body)
# )

# return result
8 changes: 4 additions & 4 deletions synapseclient/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from synapseclient.core.logging_setup import DEFAULT_LOGGER_NAME

if TYPE_CHECKING:
from synapseclient.models import File, Folder, Project
from synapseclient.models import File, Folder, Project, Table

R = TypeVar("R")

Expand Down Expand Up @@ -1376,10 +1376,10 @@ def delete_none_keys(incoming_object: typing.Dict) -> None:


def merge_dataclass_entities(
source: typing.Union["Project", "Folder", "File"],
destination: typing.Union["Project", "Folder", "File"],
source: typing.Union["Project", "Folder", "File", "Table"],
destination: typing.Union["Project", "Folder", "File", "Table"],
fields_to_ignore: typing.List[str] = None,
) -> typing.Union["Project", "Folder", "File"]:
) -> typing.Union["Project", "Folder", "File", "Table"]:
"""
Utility function to merge two dataclass entities together. This is used when we are
upserting an entity from the Synapse service with the requested changes.
Expand Down
84 changes: 52 additions & 32 deletions synapseclient/models/protocols/table_protocol.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,15 @@
"""Protocol for the specific methods of this class that have synchronous counterparts
generated at runtime."""

from typing import TYPE_CHECKING, List, Optional, Protocol, Union
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Protocol, Union

import pandas as pd
from typing_extensions import Self

from synapseclient import Synapse
from synapseclient.table import CsvFileTable as Synapse_CsvFileTable
from synapseclient.table import TableQueryResult as Synaspe_TableQueryResult

if TYPE_CHECKING:
from synapseclient.models.table import (
CsvResultFormat,
Row,
RowsetResultFormat,
Table,
)
from synapseclient.models.table import Row, Table


class ColumnSynchronousProtocol(Protocol):
Expand All @@ -40,29 +34,40 @@ class TableSynchronousProtocol(Protocol):
have a synchronous counterpart that may also be called.
"""

def store_rows_from_csv(
self, csv_path: str, *, synapse_client: Optional[Synapse] = None
) -> str:
"""Takes in a path to a CSV and stores the rows to Synapse.
def store(
self, dry_run: bool = False, *, synapse_client: Optional[Synapse] = None
) -> Self:
"""Store non-row information about a table including the columns and annotations.

Arguments:
csv_path: The path to the CSV to store.
dry_run: If True, will not actually store the table but will return log to
the console what would have been stored.
synapse_client: If not passed in and caching was not disabled by
`Synapse.allow_client_caching(False)` this will use the last created
instance from the Synapse class constructor.

Returns:
The path to the CSV that was stored.
The Table instance stored in synapse.
"""
return ""
return self

def delete_rows(
self, rows: List["Row"], *, synapse_client: Optional[Synapse] = None
def store_rows(
self,
values: Union[str, List[Dict[str, Any]], Dict[str, Any], pd.DataFrame],
*,
synapse_client: Optional[Synapse] = None,
) -> None:
"""Delete rows from a table.
"""
Takes in values from the sources defined below and stores the rows to Synapse.

Arguments:
rows: The rows to delete.
values: Supports storing data from the following sources:

- A string holding the path to a CSV file
- A list of lists (or tuples) where each element is a row
- A dictionary where the key is the column name and the value is one or more values. The values will be wrapped into a [Pandas DataFrame](http://pandas.pydata.org/pandas-docs/stable/api.html#dataframe).
- A [Pandas DataFrame](http://pandas.pydata.org/pandas-docs/stable/api.html#dataframe)

synapse_client: If not passed in and caching was not disabled by
`Synapse.allow_client_caching(False)` this will use the last created
instance from the Synapse class constructor.
Expand All @@ -72,23 +77,39 @@ def delete_rows(
"""
return None

def store_schema(self, *, synapse_client: Optional[Synapse] = None) -> "Table":
"""Store non-row information about a table including the columns and annotations.
def delete_rows(
self, rows: List["Row"], *, synapse_client: Optional[Synapse] = None
) -> None:
"""Delete rows from a table.

Arguments:
rows: The rows to delete.
synapse_client: If not passed in and caching was not disabled by
`Synapse.allow_client_caching(False)` this will use the last created
instance from the Synapse class constructor.

Returns:
The Table instance stored in synapse.
None

# TODO: Add example of how to delete rows
"""
return self
return None

def get(self, *, synapse_client: Optional[Synapse] = None) -> "Table":
def get(
self,
include_columns: bool = False,
include_activity: bool = False,
*,
synapse_client: Optional[Synapse] = None,
) -> "Table":
"""Get the metadata about the table from synapse.

Arguments:
include_columns: If True, will include fully filled column objects in the
`.columns` attribute. When False, the columns will not be filled in.
include_activity: If True the activity will be included in the file
if it exists.

synapse_client: If not passed in and caching was not disabled by
`Synapse.allow_client_caching(False)` this will use the last created
instance from the Synapse class constructor.
Expand All @@ -111,15 +132,14 @@ def delete(self, *, synapse_client: Optional[Synapse] = None) -> None:
"""
return None

@classmethod
@staticmethod
def query(
cls,
query: str,
result_format: Union["CsvResultFormat", "RowsetResultFormat"] = None,
*,
synapse_client: Optional[Synapse] = None,
) -> Union[Synapse_CsvFileTable, Synaspe_TableQueryResult, None]:
"""Query for data on a table stored in Synapse.
) -> pd.DataFrame:
"""Query for data on a table stored in Synapse. The results will always be
returned as a Pandas DataFrame.

Arguments:
query: The query to run.
Expand All @@ -129,6 +149,6 @@ def query(
instance from the Synapse class constructor.

Returns:
The results of the query.
The results of the query as a Pandas DataFrame.
"""
return None
return pd.DataFrame()
Loading
Loading