Skip to content

Commit

Permalink
Give dataframe tests descriptive names and columns. (#925)
Browse files Browse the repository at this point in the history
Adds a "name" field to test_read_indexing parameters and renames columns
to describe the data that gets put in them.
  • Loading branch information
thetorpedodog authored Feb 13, 2023
1 parent c9802fb commit e0e2842
Showing 1 changed file with 93 additions and 88 deletions.
181 changes: 93 additions & 88 deletions apis/python/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,10 +374,10 @@ def make_multiply_indexed_dataframe(tmp_path, index_column_names: List[str]):
[
# TO DO: Support other index types when we have support for more than int and string/bytes
# index types in libtiledbsoma's SOMAReader.
("index1", pa.int64()),
("index2", pa.string()),
("index3", pa.int64()),
("index4", pa.int64()),
("0_thru_5", pa.int64()),
("strings_aaa", pa.string()),
("zero_one", pa.int64()),
("thousands", pa.int64()),
("soma_joinid", pa.int64()),
("A", pa.int64()),
]
Expand All @@ -388,15 +388,15 @@ def make_multiply_indexed_dataframe(tmp_path, index_column_names: List[str]):
)

data: Dict[str, list] = {
"index1": [0, 1, 2, 3, 4, 5],
"index2": ["aaa", "aaa", "bbb", "bbb", "ccc", "ccc"],
"index3": [0, 1, 0, 1, 0, 1],
"index4": [1000, 2000, 1000, 1000, 1000, 1000],
"0_thru_5": [0, 1, 2, 3, 4, 5],
"strings_aaa": ["aaa", "aaa", "bbb", "bbb", "ccc", "ccc"],
"zero_one": [0, 1, 0, 1, 0, 1],
"thousands": [1000, 2000, 1000, 1000, 1000, 1000],
"soma_joinid": [10, 11, 12, 13, 14, 15],
"A": [10, 11, 12, 13, 14, 15],
}

n_data = len(data["index1"])
n_data = len(data["0_thru_5"])
sdf.write(pa.Table.from_pandas(pd.DataFrame(data=data)))

return (schema, sdf, n_data)
Expand All @@ -405,270 +405,275 @@ def make_multiply_indexed_dataframe(tmp_path, index_column_names: List[str]):
@pytest.mark.parametrize(
"io",
[
# 1D: indexing slot is None
{
"index_column_names": ["index1"],
"name": "1D indexing slot is None",
"index_column_names": ["0_thru_5"],
"coords": [None],
"A": [10, 11, 12, 13, 14, 15],
"throws": None,
},
# 1D: indexing slot is int
{
"index_column_names": ["index1"],
"name": "1D indexing slot is int",
"index_column_names": ["0_thru_5"],
"coords": [0],
"A": [10],
"throws": None,
},
{
"index_column_names": ["index1"],
"name": "1D no results for 100",
"index_column_names": ["0_thru_5"],
"coords": [100],
"A": [],
"throws": None,
},
{
"index_column_names": ["index1"],
"name": "1D no results for -100",
"index_column_names": ["0_thru_5"],
"coords": [-100],
"A": [],
"throws": None,
},
# 1D: indexing slot is list
{
"index_column_names": ["index1"],
"name": "1D indexing slot is list",
"index_column_names": ["0_thru_5"],
"coords": [[1, 3]],
"A": [11, 13],
"throws": None,
},
{
"index_column_names": ["index1"],
"name": "1D no results for -100, 100",
"index_column_names": ["0_thru_5"],
"coords": [[-100, 100]],
"A": [],
"throws": None,
},
# Indexing by empty list must return empty results
{
"index_column_names": ["index1"],
"name": "1D empty list returns empty results",
"index_column_names": ["0_thru_5"],
"coords": [[]],
"A": [],
"throws": None,
},
# 1D: indexing slot is tuple
{
"index_column_names": ["index1"],
"name": "1D indexing slot is tuple",
"index_column_names": ["0_thru_5"],
"coords": [(1, 3)],
"A": [11, 13],
"throws": None,
},
# 1D: indexing slot is range
{
"index_column_names": ["index1"],
"name": "1D indexing slot is range",
"index_column_names": ["0_thru_5"],
"coords": [range(1, 3)],
"A": [11, 12],
"throws": None,
},
# 1D: indexing slot is pa.ChunkedArray
{
"index_column_names": ["index1"],
"name": "1D indexing slot is pa.ChunkedArray",
"index_column_names": ["0_thru_5"],
"coords": [pa.chunked_array(pa.array([1, 3]))],
"A": [11, 13],
"throws": None,
},
# 1D: indexing slot is pa.Array
{
"index_column_names": ["index1"],
"coords": [pa.array([1, 3])],
"A": [11, 13],
"throws": None,
},
# 1D: indexing slot is pa.Array
{
"index_column_names": ["index1"],
"name": "1D indexing slot is pa.Array",
"index_column_names": ["0_thru_5"],
"coords": [pa.array([1, 3])],
"A": [11, 13],
"throws": None,
},
# 1D: indexing slot is np.ndarray
{
"index_column_names": ["index1"],
"name": "1D indexing slot is np.ndarray",
"index_column_names": ["0_thru_5"],
"coords": [np.asarray([1, 3])],
"A": [11, 13],
"throws": None,
},
{
"index_column_names": ["index1"],
"name": "1D indexing by 2D np.ndarray",
"index_column_names": ["0_thru_5"],
"coords": [
np.asarray([[1, 3], [2, 4]])
], # Error since 2D array in the slot
"A": [11, 13],
"throws": ValueError,
},
# 1D: indexing slot is slice
{
"index_column_names": ["index1"],
"name": "1D indexing by slice(None)",
"index_column_names": ["0_thru_5"],
"coords": [
slice(None)
], # Indexing slot is none-slice i.e. `[:]` which is like None
"A": [10, 11, 12, 13, 14, 15],
"throws": None,
},
{
"index_column_names": ["index1"],
"coords": [], # len(ids) != len(index_column_names)
"name": "1D indexing by empty coords",
"index_column_names": ["0_thru_5"],
"coords": [],
"A": [10, 11, 12, 13, 14, 15],
"throws": None,
},
{
"index_column_names": ["index1"],
"name": "1D indexing by 1:3",
"index_column_names": ["0_thru_5"],
"coords": [slice(1, 3)], # Indexing slot is double-ended slice
"A": [11, 12, 13],
"throws": None,
},
{
"index_column_names": ["index1"],
"coords": [slice(None, None)], # Indexing slot is slice-all
"A": [10, 11, 12, 13, 14, 15],
"throws": None,
},
{
"index_column_names": ["index1"],
"name": "1D indexing by [:3]",
"index_column_names": ["0_thru_5"],
"coords": [slice(None, 3)], # Half-slice
"A": [10, 11, 12, 13],
"throws": None,
},
{
"index_column_names": ["index1"],
"name": "1D indexing by [2:]",
"index_column_names": ["0_thru_5"],
"coords": [slice(2, None)], # Half-slice
"A": [12, 13, 14, 15],
"throws": None,
},
{
"index_column_names": ["index1"],
"name": "1D indexing with one partition",
"index_column_names": ["0_thru_5"],
"coords": [slice(2, None)],
"partitions": somacore.IOfN(0, 1), # One partition is allowed.
"partitions": somacore.IOfN(0, 1),
"A": [12, 13, 14, 15],
"throws": None,
},
{
"index_column_names": ["index1"],
"name": "partitioned reads unimplemented",
"index_column_names": ["0_thru_5"],
"coords": [],
"partitions": somacore.IOfN(1, 2), # Partitioned reads forbidden.
"partitions": somacore.IOfN(1, 2),
"A": None,
"throws": ValueError,
},
{
"index_column_names": ["index1"],
"coords": [slice(1, 5, 2)], # Slice step must be 1 or None
"name": "steps forbidden",
"index_column_names": ["0_thru_5"],
"coords": [slice(1, 5, 2)],
"A": None,
"throws": ValueError,
},
{
"index_column_names": ["index1"],
"coords": [slice(-2, -1)], # Negative slices are not supported
"name": "slice must overlap domain (negative)",
"index_column_names": ["0_thru_5"],
"coords": [slice(-2, -1)],
"A": None,
"throws": ValueError,
},
{
"index_column_names": ["index1"],
"coords": [slice(1, 0)], # hi < lo
"name": "backwards slice",
"index_column_names": ["0_thru_5"],
"coords": [slice(1, 0)],
"A": None,
"throws": ValueError,
},
{
"index_column_names": ["index1"],
"coords": [(1,), (2,)], # len(ids) > len(index_column_names)
"name": "too many columns",
"index_column_names": ["0_thru_5"],
"coords": [(1,), (2,)],
"A": None,
"throws": ValueError,
},
{
"index_column_names": ["index1"],
"coords": "bogus", # ids not list/tuple
"name": "wrong coords type",
"index_column_names": ["0_thru_5"],
"coords": "bogus",
"A": None,
"throws": TypeError,
},
{
"index_column_names": ["index1"],
"coords": [{"bogus": True}], # bad index type
"name": "bad index type dict",
"index_column_names": ["0_thru_5"],
"coords": [{"bogus": True}],
"A": None,
"throws": TypeError,
},
# 1D: indexing slot is of invalid type
{
"index_column_names": ["index2", "index3"],
"name": "bad index type bool",
"index_column_names": ["strings_aaa", "zero_one"],
"coords": [[True], slice(None)],
"A": None,
"throws": (RuntimeError, tiledb.cc.TileDBError),
},
# 2D: indexing list is empty
{
"index_column_names": ["index2", "index3"],
"name": "2D index empty",
"index_column_names": ["strings_aaa", "zero_one"],
"coords": (),
"A": [10, 11, 12, 13, 14, 15],
"throws": None,
},
# 2D: indexing slot is None
{
"index_column_names": ["index2", "index3"],
"name": "2D index None",
"index_column_names": ["strings_aaa", "zero_one"],
"coords": [None, None],
"A": [10, 11, 12, 13, 14, 15],
"throws": None,
},
# 2D: indexing slot is int
{
"index_column_names": ["index1", "index3"],
"name": "2D index 0, 0",
"index_column_names": ["0_thru_5", "zero_one"],
"coords": [0, 0],
"A": [10],
"throws": None,
},
# 2D: indexing slots are string and int
{
"index_column_names": ["index2", "index3"],
"name": "2D index str, int",
"index_column_names": ["strings_aaa", "zero_one"],
"coords": [["aaa"], 0],
"A": [10],
"throws": None,
},
# 2D: indexing slot is string not list/tuple of string
{
"index_column_names": ["index2", "index3"],
"name": "2D index str, not sequence[str]",
"index_column_names": ["strings_aaa", "zero_one"],
"coords": ["aaa", 0],
"A": [10],
"throws": None,
},
# 2D: indexing slot is list
# TODO: at present SOMAReader only accepts int and string/byte dims.
{
"index_column_names": ["index2", "index3"],
"name": "2D index List[str]",
"index_column_names": ["strings_aaa", "zero_one"],
"coords": [["aaa", "ccc"], None],
"A": [10, 11, 14, 15],
"throws": None,
},
# 3D: indexing slot is list
{
"index_column_names": ["index2", "index3", "index4"],
"name": "3D index List[str]",
"index_column_names": ["strings_aaa", "zero_one", "thousands"],
"coords": [["aaa", "ccc"], None, None],
"A": [10, 11, 14, 15],
"throws": None,
},
# 3D: indexing slot is mixed
{
"index_column_names": ["index2", "index3", "index4"],
"name": "3D index mixed",
"index_column_names": ["strings_aaa", "zero_one", "thousands"],
"coords": [("aaa", "ccc"), None, np.asarray([2000, 9999])],
"A": [11],
"throws": None,
},
# value_filter
{
"index_column_names": ["index1", "index2"],
"name": "value filter good",
"index_column_names": ["0_thru_5", "strings_aaa"],
"coords": [None, ("ccc", "zzz")],
"value_filter": "soma_joinid > 13",
"A": [14, 15],
},
{
"index_column_names": ["index1", "index2"],
"name": "value filter bad",
"index_column_names": ["0_thru_5", "strings_aaa"],
"coords": [None, ("bbb", "zzz")],
"value_filter": "quick brown fox",
"A": None,
"throws": soma.SOMAError,
},
],
ids=lambda d: d.get("name"),
)
def test_read_indexing(tmp_path, io):
"""Test various ways of indexing on read"""
Expand Down

0 comments on commit e0e2842

Please sign in to comment.