From c40e44c522a79314f85a9d140ee834fdab34be30 Mon Sep 17 00:00:00 2001 From: Andrew Crozier Date: Fri, 10 May 2019 15:55:38 +0100 Subject: [PATCH 01/20] WIP: Resource-based interface --- faculty/experiments.py | 71 ++++++++++++++++++++++++++++++++++++++++++ setup.py | 1 + 2 files changed, 72 insertions(+) create mode 100644 faculty/experiments.py diff --git a/faculty/experiments.py b/faculty/experiments.py new file mode 100644 index 00000000..6bd8fd6a --- /dev/null +++ b/faculty/experiments.py @@ -0,0 +1,71 @@ +from attr import attrs, attrib +import pandas + +import faculty # TODO: Avoid possible circular imports + + +class QueryResult(object): + def __init__(self, iterable): + self.iterable = iterable + + def __iter__(self): + return iter(self.iterable) + + +class ExperimentRunQueryResult(QueryResult): + def as_dataframe(self): + records = [] + for run in self: + row = { + "Experiment ID": run.experiment_id, + "Run ID": run.id, + "Status": run.status.value, + "Started At": run.started_at, + } + for metric in run.metrics: + row[metric.key] = row[metric.value] + records.append(row) + return pandas.DataFrame(records) + + +@attrs +class ExperimentRun(object): + id = attrib() + run_number = attrib() + experiment_id = attrib() + name = attrib() + parent_run_id = attrib() + artifact_location = attrib() + status = attrib() + started_at = attrib() + ended_at = attrib() + deleted_at = attrib() + tags = attrib() + params = attrib() + metrics = attrib() + + @classmethod + def _from_client_model(cls, client_object): + return cls(**client_object._asdict()) + + @classmethod + def query(cls, project_id, experiment_ids=None): + def get_runs(): + client = faculty.client("experiment") + + response = client.list_runs(project_id, experiment_ids) + yield from map(cls._from_client_model, response.runs) + + while response.pagination.next is not None: + response = client.list_runs( + project_id, + experiment_ids, + start=response.pagination.next.start, + limit=response.pagination.next.limit, + ) + yield from map(cls._from_client_model, response.runs) + + # Open question: + # Should we evalutate the entire set of runs before returning the + # result, or is it ok to have them lazily evaluated + return ExperimentRunQueryResult(get_runs()) diff --git a/setup.py b/setup.py index e7f25fa8..c49176d4 100644 --- a/setup.py +++ b/setup.py @@ -34,6 +34,7 @@ # Install marshmallow with 'reco' (recommended) extras to ensure a # compatible version of python-dateutil is available "attrs", + "pandas", "marshmallow[reco]==3.0.0rc3", "marshmallow_enum", "marshmallow-oneofschema==2.0.0b2", From 17179da493fb1412c177c55455f43f984b479d1f Mon Sep 17 00:00:00 2001 From: Hailey Fong Date: Tue, 14 May 2019 17:49:43 +0100 Subject: [PATCH 02/20] Add test for experiments query function --- faculty/experiments.py | 10 ++- tests/test_experiments.py | 160 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 166 insertions(+), 4 deletions(-) create mode 100644 tests/test_experiments.py diff --git a/faculty/experiments.py b/faculty/experiments.py index 6bd8fd6a..d5c2a89b 100644 --- a/faculty/experiments.py +++ b/faculty/experiments.py @@ -49,17 +49,19 @@ def _from_client_model(cls, client_object): return cls(**client_object._asdict()) @classmethod - def query(cls, project_id, experiment_ids=None): + def query(cls, project_id, filter=None, sort=None): def get_runs(): client = faculty.client("experiment") - response = client.list_runs(project_id, experiment_ids) + response = client.query_runs(project_id, filter, sort) + print(response) yield from map(cls._from_client_model, response.runs) while response.pagination.next is not None: - response = client.list_runs( + response = client.query_runs( project_id, - experiment_ids, + filter, + sort, start=response.pagination.next.start, limit=response.pagination.next.limit, ) diff --git a/tests/test_experiments.py b/tests/test_experiments.py new file mode 100644 index 00000000..d2074c94 --- /dev/null +++ b/tests/test_experiments.py @@ -0,0 +1,160 @@ +from datetime import datetime +from uuid import uuid4 + +import pytest +from pytz import UTC + +from faculty.clients.experiment import ( + Experiment, + ExperimentClient, + ExperimentRun, + ExperimentRunStatus, + ListExperimentRunsResponse, + Metric, + Pagination, + Param, + SingleFilter, + SingleFilterBy, + SingleFilterOperator, + Sort, + SortBy, + SortOrder, + Tag +) + +from faculty.experiments import ( + ExperimentRun as FacultyExperimentRun, + ExperimentRunQueryResult +) + + + +PROJECT_ID = uuid4() +EXPERIMENT_ID = 661 +EXPERIMENT_RUN_ID = uuid4() +EXPERIMENT_RUN_NUMBER = 3 +EXPERIMENT_RUN_NAME = "run name" +PARENT_RUN_ID = uuid4() +RUN_STARTED_AT = datetime(2018, 3, 10, 11, 39, 12, 110000, tzinfo=UTC) +RUN_ENDED_AT = datetime(2018, 3, 10, 11, 39, 15, 110000, tzinfo=UTC) +CREATED_AT = datetime(2018, 3, 10, 11, 32, 6, 247000, tzinfo=UTC) +LAST_UPDATED_AT = datetime(2018, 3, 10, 11, 32, 30, 172000, tzinfo=UTC) +DELETED_AT = datetime(2018, 3, 10, 11, 37, 42, 482000, tzinfo=UTC) +TAG = Tag(key="tag-key", value="tag-value") +PARAM = Param(key="param-key", value="param-value") +METRIC_KEY = "metric-key" +METRIC_TIMESTAMP = datetime(2018, 3, 12, 16, 20, 22, 122000, tzinfo=UTC) +METRIC = Metric(key=METRIC_KEY, value=123, timestamp=METRIC_TIMESTAMP) + +EXPERIMENT = Experiment( + id=EXPERIMENT_ID, + name="experiment name", + description="experiment description", + artifact_location="https://example.com", + created_at=CREATED_AT, + last_updated_at=LAST_UPDATED_AT, + deleted_at=DELETED_AT, +) + +EXPERIMENT_RUN = ExperimentRun( + id=EXPERIMENT_RUN_ID, + run_number=EXPERIMENT_RUN_NUMBER, + name=EXPERIMENT_RUN_NAME, + parent_run_id=PARENT_RUN_ID, + experiment_id=EXPERIMENT.id, + artifact_location="faculty:", + status=ExperimentRunStatus.RUNNING, + started_at=RUN_STARTED_AT, + ended_at=RUN_ENDED_AT, + deleted_at=DELETED_AT, + tags=[TAG], + params=[PARAM], + metrics=[METRIC], +) + +PAGINATION = Pagination( + start=20, + size=10, + previous=None, + next=None, +) + +LIST_EXPERIMENT_RUNS_RESPONSE = ListExperimentRunsResponse( + runs=[EXPERIMENT_RUN], pagination=PAGINATION +) + +FILTER = SingleFilter( + SingleFilterBy.EXPERIMENT_ID, + None, + SingleFilterOperator.EQUAL_TO, + "2" + ) + +SORT = [Sort(SortBy.METRIC, "metric_key", SortOrder.ASC)] + +def test_experiment_run_query(mocker): + + experiment_client_mock = mocker.MagicMock() + experiment_client_mock.query_runs = LIST_EXPERIMENT_RUNS_RESPONSE + mocker.patch( + "faculty.client", new=experiment_client_mock + ) + + expected_response = FacultyExperimentRun( + id=EXPERIMENT_RUN_ID, + run_number=EXPERIMENT_RUN_NUMBER, + name=EXPERIMENT_RUN_NAME, + parent_run_id=PARENT_RUN_ID, + experiment_id=EXPERIMENT.id, + artifact_location="faculty:", + status=ExperimentRunStatus.RUNNING, + started_at=RUN_STARTED_AT, + ended_at=RUN_ENDED_AT, + deleted_at=DELETED_AT, + tags=[TAG], + params=[PARAM], + metrics=[METRIC] + ) + + print("hello") + + response = FacultyExperimentRun.query(PROJECT_ID, FILTER, SORT) + + print(response) + assert isinstance(response, ExperimentRunQueryResult) + # l = list(response) + # l = l[0] + # assert all(i==j for i,j in list(zip([getattr(l, attr) for attr in dir(l)], + # [getattr(expected_response, attr) for attr in dir(expected_response)]))) + + + # response_schema_mock = mocker.patch( + # "faculty.clients.experiment.ListExperimentRunsResponseSchema" + # ) + # request_schema_mock = mocker.patch( + # "faculty.clients.experiment.QueryRunsSchema" + # ) + # dump_mock = request_schema_mock.return_value.dump + + # test_filter = SingleFilter( + # SingleFilterBy.EXPERIMENT_ID, None, SingleFilterOperator.EQUAL_TO, "2" + # ) + # test_sort = [Sort(SortBy.METRIC, "metric_key", SortOrder.ASC)] + + # client = ExperimentClient(mocker.Mock()) + # list_result = client.query_runs( + # PROJECT_ID, filter=test_filter, sort=test_sort, start=20, limit=10 + # ) + + # assert list_result == LIST_EXPERIMENT_RUNS_RESPONSE + + # request_schema_mock.assert_called_once_with() + # dump_mock.assert_called_once_with( + # QueryRuns(test_filter, test_sort, Page(20, 10)) + # ) + # response_schema_mock.assert_called_once_with() + # ExperimentClient._post.assert_called_once_with( + # "/project/{}/run/query".format(PROJECT_ID), + # response_schema_mock.return_value, + # json=dump_mock.return_value, + # ) \ No newline at end of file From da52f4fd63ed83ba95dc48f4403f58487de91e04 Mon Sep 17 00:00:00 2001 From: Elias Benussi Date: Wed, 15 May 2019 10:25:18 +0100 Subject: [PATCH 03/20] Fix mocking of faculty.client --- faculty/experiments.py | 22 +++--- tests/test_experiments.py | 139 ++++++++++++++++++-------------------- 2 files changed, 78 insertions(+), 83 deletions(-) diff --git a/faculty/experiments.py b/faculty/experiments.py index d5c2a89b..b0540084 100644 --- a/faculty/experiments.py +++ b/faculty/experiments.py @@ -54,18 +54,18 @@ def get_runs(): client = faculty.client("experiment") response = client.query_runs(project_id, filter, sort) - print(response) - yield from map(cls._from_client_model, response.runs) + return map(cls._from_client_model, response.runs) + # yield from map(cls._from_client_model, response.runs) - while response.pagination.next is not None: - response = client.query_runs( - project_id, - filter, - sort, - start=response.pagination.next.start, - limit=response.pagination.next.limit, - ) - yield from map(cls._from_client_model, response.runs) + # while response.pagination.next is not None: + # response = client.query_runs( + # project_id, + # filter, + # sort, + # start=response.pagination.next.start, + # limit=response.pagination.next.limit, + # ) + # yield from map(cls._from_client_model, response.runs) # Open question: # Should we evalutate the entire set of runs before returning the diff --git a/tests/test_experiments.py b/tests/test_experiments.py index d2074c94..c2f79cef 100644 --- a/tests/test_experiments.py +++ b/tests/test_experiments.py @@ -5,30 +5,28 @@ from pytz import UTC from faculty.clients.experiment import ( - Experiment, - ExperimentClient, - ExperimentRun, - ExperimentRunStatus, - ListExperimentRunsResponse, - Metric, - Pagination, - Param, - SingleFilter, - SingleFilterBy, - SingleFilterOperator, - Sort, - SortBy, - SortOrder, - Tag + Experiment, + ExperimentRun, + ExperimentRunStatus, + ListExperimentRunsResponse, + Metric, + Pagination, + Param, + SingleFilter, + SingleFilterBy, + SingleFilterOperator, + Sort, + SortBy, + SortOrder, + Tag ) from faculty.experiments import ( - ExperimentRun as FacultyExperimentRun, - ExperimentRunQueryResult + ExperimentRun as FacultyExperimentRun, + ExperimentRunQueryResult ) - PROJECT_ID = uuid4() EXPERIMENT_ID = 661 EXPERIMENT_RUN_ID = uuid4() @@ -84,77 +82,74 @@ ) FILTER = SingleFilter( - SingleFilterBy.EXPERIMENT_ID, - None, - SingleFilterOperator.EQUAL_TO, - "2" - ) + SingleFilterBy.EXPERIMENT_ID, + None, + SingleFilterOperator.EQUAL_TO, + "2" +) SORT = [Sort(SortBy.METRIC, "metric_key", SortOrder.ASC)] + def test_experiment_run_query(mocker): experiment_client_mock = mocker.MagicMock() - experiment_client_mock.query_runs = LIST_EXPERIMENT_RUNS_RESPONSE + experiment_client_mock.query_runs.return_value = LIST_EXPERIMENT_RUNS_RESPONSE mocker.patch( - "faculty.client", new=experiment_client_mock + "faculty.client", return_value=experiment_client_mock ) - expected_response = FacultyExperimentRun( - id=EXPERIMENT_RUN_ID, - run_number=EXPERIMENT_RUN_NUMBER, - name=EXPERIMENT_RUN_NAME, - parent_run_id=PARENT_RUN_ID, - experiment_id=EXPERIMENT.id, - artifact_location="faculty:", - status=ExperimentRunStatus.RUNNING, - started_at=RUN_STARTED_AT, - ended_at=RUN_ENDED_AT, - deleted_at=DELETED_AT, - tags=[TAG], - params=[PARAM], - metrics=[METRIC] + expected_response = ExperimentRun( + id=EXPERIMENT_RUN_ID, + run_number=EXPERIMENT_RUN_NUMBER, + name=EXPERIMENT_RUN_NAME, + parent_run_id=PARENT_RUN_ID, + experiment_id=EXPERIMENT.id, + artifact_location="faculty:", + status=ExperimentRunStatus.RUNNING, + started_at=RUN_STARTED_AT, + ended_at=RUN_ENDED_AT, + deleted_at=DELETED_AT, + tags=[TAG], + params=[PARAM], + metrics=[METRIC] ) - print("hello") - response = FacultyExperimentRun.query(PROJECT_ID, FILTER, SORT) - print(response) assert isinstance(response, ExperimentRunQueryResult) - # l = list(response) - # l = l[0] - # assert all(i==j for i,j in list(zip([getattr(l, attr) for attr in dir(l)], - # [getattr(expected_response, attr) for attr in dir(expected_response)]))) + returned_run = list(response)[0] + + # assert all(i == j for i, j in list(zip([getattr(l, attr) for attr in dir(l)], [getattr(expected_response, attr) for attr in dir(expected_response)]))) - # response_schema_mock = mocker.patch( - # "faculty.clients.experiment.ListExperimentRunsResponseSchema" - # ) - # request_schema_mock = mocker.patch( - # "faculty.clients.experiment.QueryRunsSchema" - # ) - # dump_mock = request_schema_mock.return_value.dump + # response_schema_mock = mocker.patch( + # "faculty.clients.experiment.ListExperimentRunsResponseSchema" + # ) + # request_schema_mock = mocker.patch( + # "faculty.clients.experiment.QueryRunsSchema" + # ) + # dump_mock = request_schema_mock.return_value.dump # test_filter = SingleFilter( # SingleFilterBy.EXPERIMENT_ID, None, SingleFilterOperator.EQUAL_TO, "2" # ) - # test_sort = [Sort(SortBy.METRIC, "metric_key", SortOrder.ASC)] - - # client = ExperimentClient(mocker.Mock()) - # list_result = client.query_runs( - # PROJECT_ID, filter=test_filter, sort=test_sort, start=20, limit=10 - # ) - - # assert list_result == LIST_EXPERIMENT_RUNS_RESPONSE - - # request_schema_mock.assert_called_once_with() - # dump_mock.assert_called_once_with( - # QueryRuns(test_filter, test_sort, Page(20, 10)) - # ) - # response_schema_mock.assert_called_once_with() - # ExperimentClient._post.assert_called_once_with( - # "/project/{}/run/query".format(PROJECT_ID), - # response_schema_mock.return_value, - # json=dump_mock.return_value, - # ) \ No newline at end of file + # test_sort = [Sort(SortBy.METRIC, "metric_key", SortOrder.ASC)] + + # client = ExperimentClient(mocker.Mock()) + # list_result = client.query_runs( + # PROJECT_ID, filter=test_filter, sort=test_sort, start=20, limit=10 + # ) + + # assert list_result == LIST_EXPERIMENT_RUNS_RESPONSE + + # request_schema_mock.assert_called_once_with() + # dump_mock.assert_called_once_with( + # QueryRuns(test_filter, test_sort, Page(20, 10)) + # ) + # response_schema_mock.assert_called_once_with() + # ExperimentClient._post.assert_called_once_with( + # "/project/{}/run/query".format(PROJECT_ID), + # response_schema_mock.return_value, + # json=dump_mock.return_value, + # ) From f77feb0df89d7c682abe35aa4ab82c8bc0292103 Mon Sep 17 00:00:00 2001 From: Hailey Fong Date: Wed, 15 May 2019 12:14:54 +0100 Subject: [PATCH 04/20] Add test for query in experiments.py --- faculty/experiments.py | 22 +++++------ tests/test_experiments.py | 81 +++++++++++++++------------------------ 2 files changed, 41 insertions(+), 62 deletions(-) diff --git a/faculty/experiments.py b/faculty/experiments.py index b0540084..72790286 100644 --- a/faculty/experiments.py +++ b/faculty/experiments.py @@ -54,18 +54,18 @@ def get_runs(): client = faculty.client("experiment") response = client.query_runs(project_id, filter, sort) - return map(cls._from_client_model, response.runs) - # yield from map(cls._from_client_model, response.runs) + # return map(cls._from_client_model, response.runs) + yield from map(cls._from_client_model, response.runs) - # while response.pagination.next is not None: - # response = client.query_runs( - # project_id, - # filter, - # sort, - # start=response.pagination.next.start, - # limit=response.pagination.next.limit, - # ) - # yield from map(cls._from_client_model, response.runs) + while response.pagination.next is not None: + response = client.query_runs( + project_id, + filter, + sort, + start=response.pagination.next.start, + limit=response.pagination.next.limit, + ) + yield from map(cls._from_client_model, response.runs) # Open question: # Should we evalutate the entire set of runs before returning the diff --git a/tests/test_experiments.py b/tests/test_experiments.py index c2f79cef..217bdd35 100644 --- a/tests/test_experiments.py +++ b/tests/test_experiments.py @@ -3,6 +3,7 @@ import pytest from pytz import UTC +import inspect from faculty.clients.experiment import ( Experiment, @@ -18,12 +19,12 @@ Sort, SortBy, SortOrder, - Tag + Tag, ) from faculty.experiments import ( ExperimentRun as FacultyExperimentRun, - ExperimentRunQueryResult + ExperimentRunQueryResult, ) @@ -70,22 +71,14 @@ metrics=[METRIC], ) -PAGINATION = Pagination( - start=20, - size=10, - previous=None, - next=None, -) +PAGINATION = Pagination(start=20, size=10, previous=None, next=None) LIST_EXPERIMENT_RUNS_RESPONSE = ListExperimentRunsResponse( runs=[EXPERIMENT_RUN], pagination=PAGINATION ) FILTER = SingleFilter( - SingleFilterBy.EXPERIMENT_ID, - None, - SingleFilterOperator.EQUAL_TO, - "2" + SingleFilterBy.EXPERIMENT_ID, None, SingleFilterOperator.EQUAL_TO, "2" ) SORT = [Sort(SortBy.METRIC, "metric_key", SortOrder.ASC)] @@ -94,12 +87,12 @@ def test_experiment_run_query(mocker): experiment_client_mock = mocker.MagicMock() - experiment_client_mock.query_runs.return_value = LIST_EXPERIMENT_RUNS_RESPONSE - mocker.patch( - "faculty.client", return_value=experiment_client_mock + experiment_client_mock.query_runs.return_value = ( + LIST_EXPERIMENT_RUNS_RESPONSE ) + mocker.patch("faculty.client", return_value=experiment_client_mock) - expected_response = ExperimentRun( + expected_response = FacultyExperimentRun( id=EXPERIMENT_RUN_ID, run_number=EXPERIMENT_RUN_NUMBER, name=EXPERIMENT_RUN_NAME, @@ -112,44 +105,30 @@ def test_experiment_run_query(mocker): deleted_at=DELETED_AT, tags=[TAG], params=[PARAM], - metrics=[METRIC] + metrics=[METRIC], ) response = FacultyExperimentRun.query(PROJECT_ID, FILTER, SORT) assert isinstance(response, ExperimentRunQueryResult) returned_run = list(response)[0] - - - # assert all(i == j for i, j in list(zip([getattr(l, attr) for attr in dir(l)], [getattr(expected_response, attr) for attr in dir(expected_response)]))) - - # response_schema_mock = mocker.patch( - # "faculty.clients.experiment.ListExperimentRunsResponseSchema" - # ) - # request_schema_mock = mocker.patch( - # "faculty.clients.experiment.QueryRunsSchema" - # ) - # dump_mock = request_schema_mock.return_value.dump - - # test_filter = SingleFilter( - # SingleFilterBy.EXPERIMENT_ID, None, SingleFilterOperator.EQUAL_TO, "2" - # ) - # test_sort = [Sort(SortBy.METRIC, "metric_key", SortOrder.ASC)] - - # client = ExperimentClient(mocker.Mock()) - # list_result = client.query_runs( - # PROJECT_ID, filter=test_filter, sort=test_sort, start=20, limit=10 - # ) - - # assert list_result == LIST_EXPERIMENT_RUNS_RESPONSE - - # request_schema_mock.assert_called_once_with() - # dump_mock.assert_called_once_with( - # QueryRuns(test_filter, test_sort, Page(20, 10)) - # ) - # response_schema_mock.assert_called_once_with() - # ExperimentClient._post.assert_called_once_with( - # "/project/{}/run/query".format(PROJECT_ID), - # response_schema_mock.return_value, - # json=dump_mock.return_value, - # ) + assert isinstance(returned_run, FacultyExperimentRun) + assert all( + list( + i == j + for i, j in ( + list( + zip( + [ + getattr(returned_run, attr) + for attr in returned_run.__dict__.keys() + ], + [ + getattr(expected_response, attr) + for attr in expected_response.__dict__.keys() + ], + ) + ) + ) + ) + ) From a9041fdb48e3165b591bb3c2fc63505ff7bff754 Mon Sep 17 00:00:00 2001 From: Elias Benussi Date: Wed, 15 May 2019 14:13:08 +0100 Subject: [PATCH 05/20] Parametrise testing of query --- tests/test_experiments.py | 128 +++++++++++++++++++++++++++++--------- 1 file changed, 98 insertions(+), 30 deletions(-) diff --git a/tests/test_experiments.py b/tests/test_experiments.py index 217bdd35..38e0962d 100644 --- a/tests/test_experiments.py +++ b/tests/test_experiments.py @@ -3,7 +3,6 @@ import pytest from pytz import UTC -import inspect from faculty.clients.experiment import ( Experiment, @@ -11,6 +10,7 @@ ExperimentRunStatus, ListExperimentRunsResponse, Metric, + Page, Pagination, Param, SingleFilter, @@ -55,6 +55,12 @@ deleted_at=DELETED_AT, ) +FILTER = SingleFilter( + SingleFilterBy.EXPERIMENT_ID, None, SingleFilterOperator.EQUAL_TO, "2" +) + +SORT = [Sort(SortBy.METRIC, "metric_key", SortOrder.ASC)] + EXPERIMENT_RUN = ExperimentRun( id=EXPERIMENT_RUN_ID, run_number=EXPERIMENT_RUN_NUMBER, @@ -70,29 +76,53 @@ params=[PARAM], metrics=[METRIC], ) - -PAGINATION = Pagination(start=20, size=10, previous=None, next=None) - +PAGINATION = Pagination(0, 1, None, None) LIST_EXPERIMENT_RUNS_RESPONSE = ListExperimentRunsResponse( runs=[EXPERIMENT_RUN], pagination=PAGINATION ) - -FILTER = SingleFilter( - SingleFilterBy.EXPERIMENT_ID, None, SingleFilterOperator.EQUAL_TO, "2" -) - -SORT = [Sort(SortBy.METRIC, "metric_key", SortOrder.ASC)] - - -def test_experiment_run_query(mocker): - - experiment_client_mock = mocker.MagicMock() - experiment_client_mock.query_runs.return_value = ( - LIST_EXPERIMENT_RUNS_RESPONSE +EXPECTED_RUNS = [ + FacultyExperimentRun( + id=EXPERIMENT_RUN_ID, + run_number=EXPERIMENT_RUN_NUMBER, + name=EXPERIMENT_RUN_NAME, + parent_run_id=PARENT_RUN_ID, + experiment_id=EXPERIMENT.id, + artifact_location="faculty:", + status=ExperimentRunStatus.RUNNING, + started_at=RUN_STARTED_AT, + ended_at=RUN_ENDED_AT, + deleted_at=DELETED_AT, + tags=[TAG], + params=[PARAM], + metrics=[METRIC], ) - mocker.patch("faculty.client", return_value=experiment_client_mock) +] - expected_response = FacultyExperimentRun( +PAGINATION_MULTIPLE_1 = Pagination(0, 1, None, Page(1, 1)) +LIST_EXPERIMENT_RUNS_RESPONSE_MULTIPLE_1 = ListExperimentRunsResponse( + runs=[EXPERIMENT_RUN], pagination=PAGINATION_MULTIPLE_1 +) +EXPERIMENT_RUN_MULTIPLE_2 = ExperimentRun( + id=7, + run_number=EXPERIMENT_RUN_NUMBER, + name=EXPERIMENT_RUN_NAME, + parent_run_id=PARENT_RUN_ID, + experiment_id=EXPERIMENT.id, + artifact_location="faculty:", + status=ExperimentRunStatus.RUNNING, + started_at=RUN_STARTED_AT, + ended_at=RUN_ENDED_AT, + deleted_at=DELETED_AT, + tags=[TAG], + params=[PARAM], + metrics=[METRIC], +) +PAGINATION_MULTIPLE_2 = Pagination(1, 1, Page(0, 1), None) +LIST_EXPERIMENT_RUNS_RESPONSE_MULTIPLE_2 = ListExperimentRunsResponse( + runs=[EXPERIMENT_RUN_MULTIPLE_2], pagination=PAGINATION_MULTIPLE_2 +) +EXPECTED_RUNS_2 = [ + FacultyExperimentRun( id=EXPERIMENT_RUN_ID, run_number=EXPERIMENT_RUN_NUMBER, name=EXPERIMENT_RUN_NAME, @@ -106,27 +136,65 @@ def test_experiment_run_query(mocker): tags=[TAG], params=[PARAM], metrics=[METRIC], + ), + FacultyExperimentRun( + id=7, + run_number=EXPERIMENT_RUN_NUMBER, + name=EXPERIMENT_RUN_NAME, + parent_run_id=PARENT_RUN_ID, + experiment_id=EXPERIMENT.id, + artifact_location="faculty:", + status=ExperimentRunStatus.RUNNING, + started_at=RUN_STARTED_AT, + ended_at=RUN_ENDED_AT, + deleted_at=DELETED_AT, + tags=[TAG], + params=[PARAM], + metrics=[METRIC], + ), +] + + +@pytest.mark.parametrize( + "query_runs_side_effects,expected_runs", + [ + [[LIST_EXPERIMENT_RUNS_RESPONSE], EXPECTED_RUNS], + [ + [ + LIST_EXPERIMENT_RUNS_RESPONSE_MULTIPLE_1, + LIST_EXPERIMENT_RUNS_RESPONSE_MULTIPLE_2, + ], + EXPECTED_RUNS_2, + ], + ], +) +def test_experiment_run_query_single_call( + mocker, query_runs_side_effects, expected_runs +): + experiment_client_mock = mocker.MagicMock() + experiment_client_mock.query_runs = mocker.MagicMock( + side_effect=query_runs_side_effects ) + mocker.patch("faculty.client", return_value=experiment_client_mock) response = FacultyExperimentRun.query(PROJECT_ID, FILTER, SORT) assert isinstance(response, ExperimentRunQueryResult) - returned_run = list(response)[0] - assert isinstance(returned_run, FacultyExperimentRun) - assert all( + returned_runs = list(response) + for expected_run, returned_run in zip(expected_runs, returned_runs): + assert isinstance(returned_run, FacultyExperimentRun) + assert _are_runs_equal(expected_run, returned_run) + + +def _are_runs_equal(this, that): + return all( list( i == j for i, j in ( list( zip( - [ - getattr(returned_run, attr) - for attr in returned_run.__dict__.keys() - ], - [ - getattr(expected_response, attr) - for attr in expected_response.__dict__.keys() - ], + [getattr(this, attr) for attr in this.__dict__.keys()], + [getattr(that, attr) for attr in that.__dict__.keys()], ) ) ) From a3fb1ac6f37547ae222cefe7458c603e7e09eb7e Mon Sep 17 00:00:00 2001 From: Elias Benussi Date: Thu, 30 May 2019 17:57:43 +0100 Subject: [PATCH 06/20] Use FACULTY_PROJECT_ID by default --- faculty/experiments.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/faculty/experiments.py b/faculty/experiments.py index 72790286..3b117d65 100644 --- a/faculty/experiments.py +++ b/faculty/experiments.py @@ -1,3 +1,4 @@ +import os from attr import attrs, attrib import pandas @@ -49,7 +50,10 @@ def _from_client_model(cls, client_object): return cls(**client_object._asdict()) @classmethod - def query(cls, project_id, filter=None, sort=None): + def query(cls, project_id=None, filter=None, sort=None): + if project_id is None: + project_id = os.environ["FACULTY_PROJECT_ID"] + def get_runs(): client = faculty.client("experiment") From dd29445d5bf7f9cd46b9c7599061fc0180849854 Mon Sep 17 00:00:00 2001 From: Andrew Crozier Date: Thu, 13 Jun 2019 17:29:28 +0100 Subject: [PATCH 07/20] Prototype helper for resolving project ID --- faculty/experiments.py | 7 ++--- faculty/resolvers.py | 62 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 4 deletions(-) create mode 100644 faculty/resolvers.py diff --git a/faculty/experiments.py b/faculty/experiments.py index 3b117d65..1fc3f8b9 100644 --- a/faculty/experiments.py +++ b/faculty/experiments.py @@ -1,8 +1,8 @@ -import os from attr import attrs, attrib import pandas import faculty # TODO: Avoid possible circular imports +from faculty import resolvers class QueryResult(object): @@ -50,9 +50,8 @@ def _from_client_model(cls, client_object): return cls(**client_object._asdict()) @classmethod - def query(cls, project_id=None, filter=None, sort=None): - if project_id is None: - project_id = os.environ["FACULTY_PROJECT_ID"] + def query(cls, project=None, filter=None, sort=None): + project_id = resolvers.resolve_project_id(project) def get_runs(): client = faculty.client("experiment") diff --git a/faculty/resolvers.py b/faculty/resolvers.py new file mode 100644 index 00000000..c9e97259 --- /dev/null +++ b/faculty/resolvers.py @@ -0,0 +1,62 @@ +# Copyright 2018-2019 Faculty Science Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from uuid import UUID + +from faculty.context import get_context +from faculty.session import get_session +from faculty.clients import AccountClient, ProjectClient + + +def _make_uuid(value): + """Make a UUID from the passed value. + + Pass through UUID objects as the UUID constructor fails when passed UUID + objects. + """ + if isinstance(value, UUID): + return value + else: + return UUID(value) + + +def _project_from_name(name): + session = get_session() + user_id = AccountClient(session).authenticated_user_id() + projects = ProjectClient(session).list_accessible_by_user(user_id) + matches = [project for project in projects if project.name == name] + if len(matches) == 1: + return matches[0] + elif len(matches) == 0: + raise ValueError("No projects of name {} found".format(name)) + else: + raise ValueError("Multiple projects of name {} found".format(name)) + + +def resolve_project_id(project=None): + if project is None: + context = get_context() + if context.project_id is None: + raise ValueError( + "Must pass a project when none can be determined from the " + "runtime context" + ) + else: + return context.project_id + else: + try: + return _make_uuid(project) + except ValueError: + return _project_from_name(project).id From 0065a49e547166d99fbdb7957e6649a88e93f189 Mon Sep 17 00:00:00 2001 From: Andrew Crozier Date: Thu, 13 Jun 2019 17:31:39 +0100 Subject: [PATCH 08/20] Rename module --- faculty/{experiments.py => experiment.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename faculty/{experiments.py => experiment.py} (100%) diff --git a/faculty/experiments.py b/faculty/experiment.py similarity index 100% rename from faculty/experiments.py rename to faculty/experiment.py From 14765ca86be788a548c4093703b8c2785aeb07b8 Mon Sep 17 00:00:00 2001 From: Andrew Crozier Date: Thu, 13 Jun 2019 17:48:35 +0100 Subject: [PATCH 09/20] Fix experiment tests --- faculty/clients/experiment/__init__.py | 1 + faculty/experiment.py | 15 +++++++++++ ...test_experiments.py => test_experiment.py} | 26 +++++++++---------- 3 files changed, 28 insertions(+), 14 deletions(-) rename tests/{test_experiments.py => test_experiment.py} (94%) diff --git a/faculty/clients/experiment/__init__.py b/faculty/clients/experiment/__init__.py index 7f6a623f..5022eb03 100644 --- a/faculty/clients/experiment/__init__.py +++ b/faculty/clients/experiment/__init__.py @@ -38,6 +38,7 @@ RestoreExperimentRunsResponse, RunIdFilter, RunNumberSort, + SortOrder, StartedAtSort, Tag, TagFilter, diff --git a/faculty/experiment.py b/faculty/experiment.py index 1fc3f8b9..1ff761a0 100644 --- a/faculty/experiment.py +++ b/faculty/experiment.py @@ -1,3 +1,18 @@ +# Copyright 2018-2019 Faculty Science Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + from attr import attrs, attrib import pandas diff --git a/tests/test_experiments.py b/tests/test_experiment.py similarity index 94% rename from tests/test_experiments.py rename to tests/test_experiment.py index 38e0962d..897955a1 100644 --- a/tests/test_experiments.py +++ b/tests/test_experiment.py @@ -5,24 +5,24 @@ from pytz import UTC from faculty.clients.experiment import ( + ComparisonOperator, + ExperimentIdFilter, Experiment, ExperimentRun, ExperimentRunStatus, - ListExperimentRunsResponse, Metric, - Page, - Pagination, + MetricSort, Param, - SingleFilter, - SingleFilterBy, - SingleFilterOperator, - Sort, - SortBy, SortOrder, Tag, ) +from faculty.clients.experiment._models import ( + ListExperimentRunsResponse, + Page, + Pagination, +) -from faculty.experiments import ( +from faculty.experiment import ( ExperimentRun as FacultyExperimentRun, ExperimentRunQueryResult, ) @@ -43,7 +43,7 @@ PARAM = Param(key="param-key", value="param-value") METRIC_KEY = "metric-key" METRIC_TIMESTAMP = datetime(2018, 3, 12, 16, 20, 22, 122000, tzinfo=UTC) -METRIC = Metric(key=METRIC_KEY, value=123, timestamp=METRIC_TIMESTAMP) +METRIC = Metric(key=METRIC_KEY, step=1, value=123, timestamp=METRIC_TIMESTAMP) EXPERIMENT = Experiment( id=EXPERIMENT_ID, @@ -55,11 +55,9 @@ deleted_at=DELETED_AT, ) -FILTER = SingleFilter( - SingleFilterBy.EXPERIMENT_ID, None, SingleFilterOperator.EQUAL_TO, "2" -) +FILTER = ExperimentIdFilter(ComparisonOperator.EQUAL_TO, 2) -SORT = [Sort(SortBy.METRIC, "metric_key", SortOrder.ASC)] +SORT = [MetricSort("metric-key", SortOrder.ASC)] EXPERIMENT_RUN = ExperimentRun( id=EXPERIMENT_RUN_ID, From c28ddd13403d7d322585a78ccc2a731967ac4a51 Mon Sep 17 00:00:00 2001 From: Andrew Crozier Date: Mon, 17 Jun 2019 11:29:18 +0100 Subject: [PATCH 10/20] Allow customising session in resource interface --- faculty/experiment.py | 20 ++++++++++---------- faculty/resolvers.py | 8 ++++---- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/faculty/experiment.py b/faculty/experiment.py index 1ff761a0..78dff155 100644 --- a/faculty/experiment.py +++ b/faculty/experiment.py @@ -16,8 +16,9 @@ from attr import attrs, attrib import pandas -import faculty # TODO: Avoid possible circular imports -from faculty import resolvers +from faculty.session import get_session +from faculty.resolvers import resolve_project_id +from faculty.clients.experiment import ExperimentClient class QueryResult(object): @@ -65,11 +66,13 @@ def _from_client_model(cls, client_object): return cls(**client_object._asdict()) @classmethod - def query(cls, project=None, filter=None, sort=None): - project_id = resolvers.resolve_project_id(project) + def query(cls, project=None, filter=None, sort=None, **session_config): - def get_runs(): - client = faculty.client("experiment") + session = get_session(**session_config) + project_id = resolve_project_id(session, project) + + def _get_runs(): + client = ExperimentClient(session) response = client.query_runs(project_id, filter, sort) # return map(cls._from_client_model, response.runs) @@ -85,7 +88,4 @@ def get_runs(): ) yield from map(cls._from_client_model, response.runs) - # Open question: - # Should we evalutate the entire set of runs before returning the - # result, or is it ok to have them lazily evaluated - return ExperimentRunQueryResult(get_runs()) + return ExperimentRunQueryResult(list(_get_runs())) diff --git a/faculty/resolvers.py b/faculty/resolvers.py index c9e97259..eca5b107 100644 --- a/faculty/resolvers.py +++ b/faculty/resolvers.py @@ -16,7 +16,6 @@ from uuid import UUID from faculty.context import get_context -from faculty.session import get_session from faculty.clients import AccountClient, ProjectClient @@ -32,10 +31,11 @@ def _make_uuid(value): return UUID(value) -def _project_from_name(name): - session = get_session() +def _project_from_name(session, name): + user_id = AccountClient(session).authenticated_user_id() projects = ProjectClient(session).list_accessible_by_user(user_id) + matches = [project for project in projects if project.name == name] if len(matches) == 1: return matches[0] @@ -45,7 +45,7 @@ def _project_from_name(name): raise ValueError("Multiple projects of name {} found".format(name)) -def resolve_project_id(project=None): +def resolve_project_id(session, project=None): if project is None: context = get_context() if context.project_id is None: From b94186c79040d467405b8ad50ff32b944a0bbcc1 Mon Sep 17 00:00:00 2001 From: Andrew Crozier Date: Mon, 17 Jun 2019 12:00:20 +0100 Subject: [PATCH 11/20] Cache session in resolvers and move to new util private package --- faculty/_util/__init__.py | 13 +++++++++++++ faculty/{ => _util}/resolvers.py | 9 ++++++--- faculty/experiment.py | 2 +- setup.py | 1 + 4 files changed, 21 insertions(+), 4 deletions(-) create mode 100644 faculty/_util/__init__.py rename faculty/{ => _util}/resolvers.py (88%) diff --git a/faculty/_util/__init__.py b/faculty/_util/__init__.py new file mode 100644 index 00000000..f5221fcb --- /dev/null +++ b/faculty/_util/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2018-2019 Faculty Science Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/faculty/resolvers.py b/faculty/_util/resolvers.py similarity index 88% rename from faculty/resolvers.py rename to faculty/_util/resolvers.py index eca5b107..32609b71 100644 --- a/faculty/resolvers.py +++ b/faculty/_util/resolvers.py @@ -15,6 +15,8 @@ from uuid import UUID +from cachetools.func import lru_cache + from faculty.context import get_context from faculty.clients import AccountClient, ProjectClient @@ -45,13 +47,14 @@ def _project_from_name(session, name): raise ValueError("Multiple projects of name {} found".format(name)) +@lru_cache() def resolve_project_id(session, project=None): if project is None: context = get_context() if context.project_id is None: raise ValueError( - "Must pass a project when none can be determined from the " - "runtime context" + "Must pass a project name or ID when none can be determined " + "from the runtime context" ) else: return context.project_id @@ -59,4 +62,4 @@ def resolve_project_id(session, project=None): try: return _make_uuid(project) except ValueError: - return _project_from_name(project).id + return _project_from_name(session, project).id diff --git a/faculty/experiment.py b/faculty/experiment.py index 78dff155..60e40ef8 100644 --- a/faculty/experiment.py +++ b/faculty/experiment.py @@ -17,7 +17,7 @@ import pandas from faculty.session import get_session -from faculty.resolvers import resolve_project_id +from faculty._util.resolvers import resolve_project_id from faculty.clients.experiment import ExperimentClient diff --git a/setup.py b/setup.py index c49176d4..a80b2418 100644 --- a/setup.py +++ b/setup.py @@ -31,6 +31,7 @@ "pytz", "six", "enum34; python_version<'3.4'", + "cachetools", # Install marshmallow with 'reco' (recommended) extras to ensure a # compatible version of python-dateutil is available "attrs", From b6e3c58ca096323a7662bc65db68b25619808516 Mon Sep 17 00:00:00 2001 From: Andrew Crozier Date: Mon, 17 Jun 2019 12:35:28 +0100 Subject: [PATCH 12/20] Add tests for resolvers --- tests/_util/__init__.py | 13 ++++ tests/_util/test_resolvers.py | 109 ++++++++++++++++++++++++++++++++++ 2 files changed, 122 insertions(+) create mode 100644 tests/_util/__init__.py create mode 100644 tests/_util/test_resolvers.py diff --git a/tests/_util/__init__.py b/tests/_util/__init__.py new file mode 100644 index 00000000..f5221fcb --- /dev/null +++ b/tests/_util/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2018-2019 Faculty Science Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/_util/test_resolvers.py b/tests/_util/test_resolvers.py new file mode 100644 index 00000000..da82d948 --- /dev/null +++ b/tests/_util/test_resolvers.py @@ -0,0 +1,109 @@ +# Copyright 2018-2019 Faculty Science Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from uuid import uuid4 + +import pytest + +from faculty._util.resolvers import resolve_project_id + + +PROJECT_ID = uuid4() + + +@pytest.fixture(autouse=True) +def clear_cache(): + resolve_project_id.cache_clear() + + +@pytest.fixture +def mock_session(mocker): + return mocker.Mock() + + +@pytest.fixture +def mock_account_client(mocker, mock_session): + class_mock = mocker.patch("faculty._util.resolvers.AccountClient") + yield class_mock.return_value + class_mock.assert_called_once_with(mock_session) + + +@pytest.fixture +def mock_project_client(mocker, mock_session): + class_mock = mocker.patch("faculty._util.resolvers.ProjectClient") + yield class_mock.return_value + class_mock.assert_called_once_with(mock_session) + + +def test_resolve_project_id( + mocker, mock_session, mock_account_client, mock_project_client +): + project = mocker.Mock() + project.name = "project name" + mock_project_client.list_accessible_by_user.return_value = [ + mocker.Mock(), + project, + mocker.Mock(), + ] + + assert resolve_project_id(mock_session, "project name") == project.id + + mock_account_client.authenticated_user_id.assert_called_once_with() + mock_project_client.list_accessible_by_user.assert_called_once_with( + mock_account_client.authenticated_user_id.return_value + ) + + +def test_resolve_project_id_no_matches( + mocker, mock_session, mock_account_client, mock_project_client +): + mock_project_client.list_accessible_by_user.return_value = [ + mocker.Mock(), + mocker.Mock(), + ] + with pytest.raises(ValueError, match="No projects .* found"): + resolve_project_id(mock_session, "project name") + + +def test_resolve_project_id_multiple_matches( + mocker, mock_session, mock_account_client, mock_project_client +): + project = mocker.Mock() + project.name = "project name" + mock_project_client.list_accessible_by_user.return_value = [ + project, + project, + ] + with pytest.raises(ValueError, match="Multiple projects .* found"): + resolve_project_id(mock_session, "project name") + + +@pytest.mark.parametrize("argument", [PROJECT_ID, str(PROJECT_ID)]) +def test_resolve_project_id_is_uuid(mock_session, argument): + assert resolve_project_id(mock_session, argument) == PROJECT_ID + + +def test_resolve_project_id_from_context(mocker, mock_session): + context = mocker.Mock() + mocker.patch("faculty._util.resolvers.get_context", return_value=context) + assert resolve_project_id(mock_session) == context.project_id + + +def test_resolve_project_id_from_context_missing(mocker, mock_session): + context = mocker.Mock() + context.project_id = None + mocker.patch("faculty._util.resolvers.get_context", return_value=context) + with pytest.raises(ValueError): + resolve_project_id(mock_session) From 4aa8ce4be2185e0a7b3266443deafe5944cf844f Mon Sep 17 00:00:00 2001 From: Andrew Crozier Date: Tue, 18 Jun 2019 10:08:51 +0100 Subject: [PATCH 13/20] Add docstrings --- faculty/_util/resolvers.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/faculty/_util/resolvers.py b/faculty/_util/resolvers.py index 32609b71..92bf06bf 100644 --- a/faculty/_util/resolvers.py +++ b/faculty/_util/resolvers.py @@ -34,6 +34,12 @@ def _make_uuid(value): def _project_from_name(session, name): + """Provided a project name, find a matching project ID. + + This method searches all the projects accessible to the active user for a + matching project. If not exactly one project matches, a ValueError is + raised. + """ user_id = AccountClient(session).authenticated_user_id() projects = ProjectClient(session).list_accessible_by_user(user_id) @@ -49,6 +55,34 @@ def _project_from_name(session, name): @lru_cache() def resolve_project_id(session, project=None): + """Resolve the ID of a project based on ID, name or the current context. + + This helper encapsulates logic for determining a project in three + situations: + + * If ``None`` is passed as the project, or if no project is passed, the + project will be inferred from the runtime context (i.e. environment + variables), and so will correspond to the 'current project' when run + inside Faculty platform. + * If a ``uuid.UUID`` or a string containing a valid UUID is passed, this + will be assumed to be the ID of the project and will be returned. + * If any other string is passed, the Faculty platform will be queried for + projects matching that name. If exactly one of that name is accessible to + the user, its ID will be returned, otherwise a ``ValueError`` will be + raised. + + Parameters + ---------- + session : faculty.session.Session + project : str, uuid.UUID or None + Information to use to determine the active project. + + Returns + ------- + uuid.UUID + The ID of the project + """ + if project is None: context = get_context() if context.project_id is None: From 7b1467db32628d701a1443bcad3a62b1ca611712 Mon Sep 17 00:00:00 2001 From: Andrew Crozier Date: Tue, 18 Jun 2019 16:53:49 +0100 Subject: [PATCH 14/20] Make filters combinable with & and | operators --- faculty/clients/experiment/_models.py | 81 ++++++++++++++++-- tests/clients/experiment/test_models.py | 109 ++++++++++++++++++++++++ 2 files changed, 182 insertions(+), 8 deletions(-) create mode 100644 tests/clients/experiment/test_models.py diff --git a/faculty/clients/experiment/_models.py b/faculty/clients/experiment/_models.py index 00b6fecc..cf05a620 100644 --- a/faculty/clients/experiment/_models.py +++ b/faculty/clients/experiment/_models.py @@ -16,6 +16,8 @@ from collections import namedtuple from enum import Enum +from attr import attrs, attrib + class LifecycleStage(Enum): ACTIVE = "active" @@ -81,13 +83,73 @@ class ComparisonOperator(Enum): GREATER_THAN_OR_EQUAL_TO = "ge" -ProjectIdFilter = namedtuple("ProjectIdFilter", ["operator", "value"]) -ExperimentIdFilter = namedtuple("ExperimentIdFilter", ["operator", "value"]) -RunIdFilter = namedtuple("RunIdFilter", ["operator", "value"]) -DeletedAtFilter = namedtuple("DeletedAtFilter", ["operator", "value"]) -TagFilter = namedtuple("TagFilter", ["key", "operator", "value"]) -ParamFilter = namedtuple("ParamFilter", ["key", "operator", "value"]) -MetricFilter = namedtuple("MetricFilter", ["key", "operator", "value"]) +def _matching_compound(filter, operator): + return isinstance(filter, CompoundFilter) and filter.operator == operator + + +def _combine_filters(first, second, op): + if _matching_compound(first, op) and _matching_compound(second, op): + conditions = first.conditions + second.conditions + elif _matching_compound(first, op): + conditions = first.conditions + [second] + elif _matching_compound(second, op): + conditions = [first] + second.conditions + else: + conditions = [first, second] + return CompoundFilter(op, conditions) + + +class BaseFilter(object): + def __and__(self, other): + return _combine_filters(self, other, LogicalOperator.AND) + + def __or__(self, other): + return _combine_filters(self, other, LogicalOperator.OR) + + +@attrs +class ProjectIdFilter(BaseFilter): + operator = attrib() + value = attrib() + + +@attrs +class ExperimentIdFilter(BaseFilter): + operator = attrib() + value = attrib() + + +@attrs +class RunIdFilter(BaseFilter): + operator = attrib() + value = attrib() + + +@attrs +class DeletedAtFilter(BaseFilter): + operator = attrib() + value = attrib() + + +@attrs +class TagFilter(BaseFilter): + key = attrib() + operator = attrib() + value = attrib() + + +@attrs +class ParamFilter(BaseFilter): + key = attrib() + operator = attrib() + value = attrib() + + +@attrs +class MetricFilter(BaseFilter): + key = attrib() + operator = attrib() + value = attrib() class LogicalOperator(Enum): @@ -95,7 +157,10 @@ class LogicalOperator(Enum): OR = "or" -CompoundFilter = namedtuple("CompoundFilter", ["operator", "conditions"]) +@attrs +class CompoundFilter(BaseFilter): + operator = attrib() + conditions = attrib() class SortOrder(Enum): diff --git a/tests/clients/experiment/test_models.py b/tests/clients/experiment/test_models.py new file mode 100644 index 00000000..f40dd74b --- /dev/null +++ b/tests/clients/experiment/test_models.py @@ -0,0 +1,109 @@ +# Copyright 2018-2019 Faculty Science Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import uuid + +import pytest + +from faculty.clients.experiment._models import ( + ComparisonOperator, + CompoundFilter, + DeletedAtFilter, + ExperimentIdFilter, + LogicalOperator, + MetricFilter, + ParamFilter, + ProjectIdFilter, + RunIdFilter, + TagFilter, +) + + +SINGLE_FILTERS = [ + ProjectIdFilter(ComparisonOperator.EQUAL_TO, uuid.uuid4()), + ExperimentIdFilter(ComparisonOperator.NOT_EQUAL_TO, 4), + RunIdFilter(ComparisonOperator.EQUAL_TO, uuid.uuid4()), + DeletedAtFilter(ComparisonOperator.DEFINED, False), + TagFilter("key", ComparisonOperator.EQUAL_TO, "value"), + ParamFilter("key", ComparisonOperator.NOT_EQUAL_TO, "value"), + ParamFilter("key", ComparisonOperator.GREATER_THAN, 0.3), + MetricFilter("key", ComparisonOperator.LESS_THAN_OR_EQUAL_TO, 0.6), +] +AND_FILTER = CompoundFilter( + LogicalOperator.AND, + [ + ExperimentIdFilter(ComparisonOperator.EQUAL_TO, 4), + ParamFilter("key", ComparisonOperator.GREATER_THAN_OR_EQUAL_TO, 0.4), + ], +) +OR_FILTER = CompoundFilter( + LogicalOperator.OR, + [ + ExperimentIdFilter(ComparisonOperator.EQUAL_TO, 4), + ExperimentIdFilter(ComparisonOperator.EQUAL_TO, 5), + ], +) + + +@pytest.mark.parametrize("left", SINGLE_FILTERS + [OR_FILTER]) +@pytest.mark.parametrize("right", SINGLE_FILTERS + [OR_FILTER]) +def test_non_mergable_and(left, right): + assert (left & right) == CompoundFilter(LogicalOperator.AND, [left, right]) + + +@pytest.mark.parametrize("left", SINGLE_FILTERS + [AND_FILTER]) +@pytest.mark.parametrize("right", SINGLE_FILTERS + [AND_FILTER]) +def test_non_mergable_or(left, right): + assert (left | right) == CompoundFilter(LogicalOperator.OR, [left, right]) + + +@pytest.mark.parametrize("right", SINGLE_FILTERS) +def test_left_mergeable_and(right): + assert (AND_FILTER & right) == CompoundFilter( + LogicalOperator.AND, AND_FILTER.conditions + [right] + ) + + +@pytest.mark.parametrize("right", SINGLE_FILTERS) +def test_left_mergeable_or(right): + assert (OR_FILTER | right) == CompoundFilter( + LogicalOperator.OR, OR_FILTER.conditions + [right] + ) + + +@pytest.mark.parametrize("left", SINGLE_FILTERS) +def test_right_mergeable_and(left): + assert (left & AND_FILTER) == CompoundFilter( + LogicalOperator.AND, [left] + AND_FILTER.conditions + ) + + +@pytest.mark.parametrize("left", SINGLE_FILTERS) +def test_right_mergeable_or(left): + assert (left | OR_FILTER) == CompoundFilter( + LogicalOperator.OR, [left] + OR_FILTER.conditions + ) + + +def test_fully_mergable_and(): + assert (AND_FILTER & AND_FILTER) == CompoundFilter( + LogicalOperator.AND, AND_FILTER.conditions + AND_FILTER.conditions + ) + + +def test_fully_mergable_or(): + assert (OR_FILTER | OR_FILTER) == CompoundFilter( + LogicalOperator.OR, OR_FILTER.conditions + OR_FILTER.conditions + ) From c36e1cf418be3fe71b8f04a181bf1de02c098ea9 Mon Sep 17 00:00:00 2001 From: Andrew Crozier Date: Wed, 19 Jun 2019 11:21:41 +0100 Subject: [PATCH 15/20] Refactoring, add filter helper and add tests --- faculty/experiment.py | 272 +++++++++++++++++++++--- tests/test_experiment.py | 446 ++++++++++++++++++++++++--------------- tox.ini | 1 + 3 files changed, 520 insertions(+), 199 deletions(-) diff --git a/faculty/experiment.py b/faculty/experiment.py index 60e40ef8..6f300635 100644 --- a/faculty/experiment.py +++ b/faculty/experiment.py @@ -20,33 +20,21 @@ from faculty._util.resolvers import resolve_project_id from faculty.clients.experiment import ExperimentClient - -class QueryResult(object): - def __init__(self, iterable): - self.iterable = iterable - - def __iter__(self): - return iter(self.iterable) - - -class ExperimentRunQueryResult(QueryResult): - def as_dataframe(self): - records = [] - for run in self: - row = { - "Experiment ID": run.experiment_id, - "Run ID": run.id, - "Status": run.status.value, - "Started At": run.started_at, - } - for metric in run.metrics: - row[metric.key] = row[metric.value] - records.append(row) - return pandas.DataFrame(records) +from faculty.clients.experiment import ( + ComparisonOperator, + DeletedAtFilter, + ExperimentIdFilter, + MetricFilter, + ParamFilter, + RunIdFilter, + TagFilter, +) @attrs class ExperimentRun(object): + """A single run of an experiment.""" + id = attrib() run_number = attrib() experiment_id = attrib() @@ -67,6 +55,52 @@ def _from_client_model(cls, client_object): @classmethod def query(cls, project=None, filter=None, sort=None, **session_config): + """Query the platform for experiment runs. + + Parameters + ---------- + project : str, UUID, or None + The name or ID of a project. If ``None`` is passed (the default), + the project will be inferred from the runtime context. + filter : a filter object from ``faculty.clients.experiment`` + Condition(s) to filter experiment runs by. ``FilterBy`` provides a + convenience interface for constructing filter objects. + sort : a sequence of sort objects from ``faculty.clients.experiment`` + Condition(s) to sort experiment runs by. + **session_config + Configuration options to build the session with. + + Returns + ------- + ExperimentRunList + + Examples + -------- + Get all experiment runs in the current project: + + >>> ExperimentRun.query() + ExperimentRunList([ExperimentRun(...)]) + + Get all experiment runs in a named project: + + >>> ExperimentRun.query("my project") + ExperimentRunList([ExperimentRun(...)]) + + Filter experiment runs by experiment ID: + + >>> ExperimentRun.query(filter=FilterBy.experiment_id() == 2) + ExperimentRunList([ExperimentRun(...)]) + + Filter experiment runs by a more complex condition: + + >>> filter = ( + ... FilterBy.experiment_id().one_of([2, 3, 4]) & + ... (FilterBy.metric("accuracy") > 0.9) & + ... (FilterBy.param("alpha") < 0.3) + ... ) + >>> ExperimentRun.query("my project", filter) + ExperimentRunList([ExperimentRun(...)]) + """ session = get_session(**session_config) project_id = resolve_project_id(session, project) @@ -75,7 +109,6 @@ def _get_runs(): client = ExperimentClient(session) response = client.query_runs(project_id, filter, sort) - # return map(cls._from_client_model, response.runs) yield from map(cls._from_client_model, response.runs) while response.pagination.next is not None: @@ -88,4 +121,193 @@ def _get_runs(): ) yield from map(cls._from_client_model, response.runs) - return ExperimentRunQueryResult(list(_get_runs())) + return ExperimentRunList(_get_runs()) + + +class ExperimentRunList(list): + """A list of experiment runs. + + This collection is a subclass of ``list``, and so supports all its + functionality, but adds the ``as_dataframe`` method which returns a + representation of the contained ExperimentRuns as a ``pandas.DataFrame``. + """ + + def __repr__(self): + return "{}({})".format( + self.__class__.__name__, super(ExperimentRunList, self).__repr__() + ) + + def as_dataframe(self): + """Get the experiment runs as a pandas DataFrame. + + Returns + ------- + pandas.DataFrame + """ + + records = [] + for run in self: + row = { + ("experiment_id", ""): run.experiment_id, + ("run_id", ""): run.id, + ("run_number", ""): run.run_number, + ("status", ""): run.status.value, + ("started_at", ""): run.started_at, + ("ended_at", ""): run.ended_at, + } + for param in run.params: + row[("params", param.key)] = param.value + for metric in run.metrics: + row[("metrics", metric.key)] = metric.value + records.append(row) + + df = pandas.DataFrame(records) + df.columns = pandas.MultiIndex.from_tuples(df.columns) + + # Reorder columns and return + return df[ + [ + "experiment_id", + "run_id", + "run_number", + "status", + "started_at", + "ended_at", + "params", + "metrics", + ] + ] + + +class _FilterBuilder(object): + def __init__(self, constructor, *constructor_args): + self.constructor = constructor + self.constructor_args = constructor_args + + def _build(self, *args): + return self.constructor(*(self.constructor_args + args)) + + def defined(self, value=True): + return self._build(ComparisonOperator.DEFINED, value) + + def __eq__(self, value): + return self._build(ComparisonOperator.EQUAL_TO, value) + + def __ne__(self, value): + return self._build(ComparisonOperator.NOT_EQUAL_TO, value) + + def __gt__(self, value): + return self._build(ComparisonOperator.GREATER_THAN, value) + + def __ge__(self, value): + return self._build(ComparisonOperator.GREATER_THAN_OR_EQUAL_TO, value) + + def __lt__(self, value): + return self._build(ComparisonOperator.LESS_THAN, value) + + def __le__(self, value): + return self._build(ComparisonOperator.LESS_THAN_OR_EQUAL_TO, value) + + def one_of(self, values): + try: + first, remaining = values[0], values[1:] + except IndexError: + raise ValueError("Must provide at least one value") + filter = self == first + for val in remaining: + filter |= self == val + return filter + + +class FilterBy(object): + @staticmethod + def experiment_id(): + """Filter by experiment ID. + + Examples + -------- + Get runs for experiment 4: + + >>> FilterBy.experiment_id() == 4 + """ + return _FilterBuilder(ExperimentIdFilter) + + @staticmethod + def run_id(): + """Filter by run ID. + + Examples + -------- + Get the run with a specified ID: + + >>> FilterBy.run_id() == "945f1d96-9937-4b95-aa3f-addcdd1c8749" + """ + return _FilterBuilder(RunIdFilter) + + @staticmethod + def deleted_at(): + """Filter by run deletion time. + + Examples + -------- + Get runs deleted more than ten minutes ago: + + >>> from datetime import datetime, timedelta + >>> FilterBy.deleted_at() < datetime.now() - timedelta(minutes=10) + + Get non-deleted runs: + + >>> FilterBy.deleted_at() == None + """ + return _FilterBuilder(DeletedAtFilter) + + @staticmethod + def tag(key): + """Filter by run tag. + + Examples + -------- + Get runs with a particular tag: + + >>> FilterBy.tag("key") == "value" + + Get runs where a tag is set, with any value: + + >>> FilterBy.tag("key") != None + """ + return _FilterBuilder(TagFilter, key) + + @staticmethod + def param(key): + """Filter by parameter. + + Examples + -------- + Get runs with a particular parameter value: + + >>> FilterBy.param("key") == "value" + + Params also support filtering by numeric value: + + >>> FilterBy.param("alpha") > 0.2 + """ + return _FilterBuilder(ParamFilter, key) + + @staticmethod + def metric(key): + """Filter by metric. + + Examples + -------- + Get runs with matching metric values: + + >>> FilterBy.metric("accuracy") > 0.9 + + To filter a range of values, combine them with ``&``: + + >>> ( + ... (FilterBy.metric("accuracy") > 0.8 ) & + ... (FilterBy.metric("accuracy") > 0.9) + ... ) + """ + return _FilterBuilder(MetricFilter, key) diff --git a/tests/test_experiment.py b/tests/test_experiment.py index 897955a1..03c8ea81 100644 --- a/tests/test_experiment.py +++ b/tests/test_experiment.py @@ -1,200 +1,298 @@ +# Copyright 2018-2019 Faculty Science Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + from datetime import datetime from uuid import uuid4 +import operator import pytest from pytz import UTC +import mock from faculty.clients.experiment import ( ComparisonOperator, + CompoundFilter, + DeletedAtFilter, ExperimentIdFilter, - Experiment, - ExperimentRun, + ExperimentRun as ClientExperimentRun, ExperimentRunStatus, - Metric, - MetricSort, - Param, - SortOrder, - Tag, -) -from faculty.clients.experiment._models import ( - ListExperimentRunsResponse, - Page, - Pagination, + LogicalOperator, + MetricFilter, + ParamFilter, + RunIdFilter, + TagFilter, ) -from faculty.experiment import ( - ExperimentRun as FacultyExperimentRun, - ExperimentRunQueryResult, -) +from faculty.experiment import ExperimentRun, ExperimentRunList, FilterBy -PROJECT_ID = uuid4() -EXPERIMENT_ID = 661 -EXPERIMENT_RUN_ID = uuid4() -EXPERIMENT_RUN_NUMBER = 3 -EXPERIMENT_RUN_NAME = "run name" -PARENT_RUN_ID = uuid4() -RUN_STARTED_AT = datetime(2018, 3, 10, 11, 39, 12, 110000, tzinfo=UTC) -RUN_ENDED_AT = datetime(2018, 3, 10, 11, 39, 15, 110000, tzinfo=UTC) -CREATED_AT = datetime(2018, 3, 10, 11, 32, 6, 247000, tzinfo=UTC) -LAST_UPDATED_AT = datetime(2018, 3, 10, 11, 32, 30, 172000, tzinfo=UTC) -DELETED_AT = datetime(2018, 3, 10, 11, 37, 42, 482000, tzinfo=UTC) -TAG = Tag(key="tag-key", value="tag-value") -PARAM = Param(key="param-key", value="param-value") -METRIC_KEY = "metric-key" -METRIC_TIMESTAMP = datetime(2018, 3, 12, 16, 20, 22, 122000, tzinfo=UTC) -METRIC = Metric(key=METRIC_KEY, step=1, value=123, timestamp=METRIC_TIMESTAMP) - -EXPERIMENT = Experiment( - id=EXPERIMENT_ID, - name="experiment name", - description="experiment description", - artifact_location="https://example.com", - created_at=CREATED_AT, - last_updated_at=LAST_UPDATED_AT, - deleted_at=DELETED_AT, -) +DATETIMES = [ + datetime(2018, 3, 10, 11, 39, 12, 110000, tzinfo=UTC), + datetime(2018, 3, 10, 11, 32, 6, 247000, tzinfo=UTC), + datetime(2018, 3, 10, 11, 32, 30, 172000, tzinfo=UTC), + datetime(2018, 3, 10, 11, 37, 42, 482000, tzinfo=UTC), +] -FILTER = ExperimentIdFilter(ComparisonOperator.EQUAL_TO, 2) - -SORT = [MetricSort("metric-key", SortOrder.ASC)] - -EXPERIMENT_RUN = ExperimentRun( - id=EXPERIMENT_RUN_ID, - run_number=EXPERIMENT_RUN_NUMBER, - name=EXPERIMENT_RUN_NAME, - parent_run_id=PARENT_RUN_ID, - experiment_id=EXPERIMENT.id, - artifact_location="faculty:", - status=ExperimentRunStatus.RUNNING, - started_at=RUN_STARTED_AT, - ended_at=RUN_ENDED_AT, - deleted_at=DELETED_AT, - tags=[TAG], - params=[PARAM], - metrics=[METRIC], -) -PAGINATION = Pagination(0, 1, None, None) -LIST_EXPERIMENT_RUNS_RESPONSE = ListExperimentRunsResponse( - runs=[EXPERIMENT_RUN], pagination=PAGINATION -) -EXPECTED_RUNS = [ - FacultyExperimentRun( - id=EXPERIMENT_RUN_ID, - run_number=EXPERIMENT_RUN_NUMBER, - name=EXPERIMENT_RUN_NAME, - parent_run_id=PARENT_RUN_ID, - experiment_id=EXPERIMENT.id, - artifact_location="faculty:", + +def mock_client_run(): + return ClientExperimentRun( + **{field: mock.Mock() for field in ClientExperimentRun._fields} + ) + + +def expected_resource_run_for_client_run(run): + return ExperimentRun( + id=run.id, + run_number=run.run_number, + experiment_id=run.experiment_id, + name=run.name, + parent_run_id=run.parent_run_id, + artifact_location=run.artifact_location, + status=run.status, + started_at=run.started_at, + ended_at=run.ended_at, + deleted_at=run.deleted_at, + tags=run.tags, + params=run.params, + metrics=run.metrics, + ) + + +def test_experiment_run_query(mocker): + session = mocker.Mock() + get_session_mock = mocker.patch( + "faculty.experiment.get_session", return_value=session + ) + + project_id = mocker.Mock() + resolve_project_id_mock = mocker.patch( + "faculty.experiment.resolve_project_id", return_value=project_id + ) + + client = mocker.Mock() + mocker.patch("faculty.experiment.ExperimentClient", return_value=client) + client_runs = [mock_client_run(), mock_client_run()] + client_response = mocker.Mock() + client_response.runs = client_runs + client_response.pagination.next = None + client.query_runs.return_value = client_response + + filter = mocker.Mock() + sort = mocker.Mock() + + runs = ExperimentRun.query("my project", filter, sort, extra_conf="foo") + assert runs == ExperimentRunList( + [expected_resource_run_for_client_run(run) for run in client_runs] + ) + + get_session_mock.assert_called_once_with(extra_conf="foo") + resolve_project_id_mock.assert_called_once_with(session, "my project") + client.query_runs.assert_called_once_with(project_id, filter, sort) + + +def test_experiment_run_query_multiple_pages(mocker): + session = mocker.Mock() + get_session_mock = mocker.patch( + "faculty.experiment.get_session", return_value=session + ) + + project_id = mocker.Mock() + resolve_project_id_mock = mocker.patch( + "faculty.experiment.resolve_project_id", return_value=project_id + ) + + client = mocker.Mock() + mocker.patch("faculty.experiment.ExperimentClient", return_value=client) + client_response_0 = mocker.Mock() + client_response_0.runs = [mock_client_run(), mock_client_run()] + client_response_1 = mocker.Mock() + client_response_1.runs = [mock_client_run(), mock_client_run()] + client_response_2 = mocker.Mock() + client_response_2.runs = [mock_client_run(), mock_client_run()] + client_response_2.pagination.next = None + client.query_runs.side_effect = [ + client_response_0, + client_response_1, + client_response_2, + ] + all_client_runs = ( + client_response_0.runs + + client_response_1.runs + + client_response_2.runs + ) + + filter = mocker.Mock() + sort = mocker.Mock() + + runs = ExperimentRun.query("my project", filter, sort, extra_conf="foo") + assert runs == ExperimentRunList( + [expected_resource_run_for_client_run(run) for run in all_client_runs] + ) + + get_session_mock.assert_called_once_with(extra_conf="foo") + resolve_project_id_mock.assert_called_once_with(session, "my project") + client.query_runs.assert_has_calls( + [ + mocker.call(project_id, filter, sort), + mocker.call( + project_id, + filter, + sort, + start=client_response_0.pagination.next.start, + limit=client_response_0.pagination.next.limit, + ), + mocker.call( + project_id, + filter, + sort, + start=client_response_1.pagination.next.start, + limit=client_response_1.pagination.next.limit, + ), + ] + ) + + +def test_experiment_run_list_as_dataframe(mocker): + run_0 = mocker.Mock( + experiment_id=1, + id=uuid4(), + run_number=3, + status=ExperimentRunStatus.FINISHED, + started_at=DATETIMES[0], + ended_at=DATETIMES[1], + params=[ + mocker.Mock(key="classic", value="foo"), + mocker.Mock(key="monty", value="spam"), + ], + metrics=[ + mocker.Mock(key="accuracy", value=0.87), + mocker.Mock(key="f1_score", value=0.76), + ], + ) + run_1 = mocker.Mock( + experiment_id=2, + id=uuid4(), + run_number=4, status=ExperimentRunStatus.RUNNING, - started_at=RUN_STARTED_AT, - ended_at=RUN_ENDED_AT, - deleted_at=DELETED_AT, - tags=[TAG], - params=[PARAM], - metrics=[METRIC], + started_at=DATETIMES[2], + ended_at=DATETIMES[3], + params=[ + mocker.Mock(key="classic", value="bar"), + mocker.Mock(key="monty", value="eggs"), + ], + metrics=[ + mocker.Mock(key="accuracy", value=0.91), + mocker.Mock(key="f1_score", value=0.72), + ], ) + + runs_df = ExperimentRunList([run_0, run_1]).as_dataframe() + + assert list(runs_df.columns) == [ + ("experiment_id", ""), + ("run_id", ""), + ("run_number", ""), + ("status", ""), + ("started_at", ""), + ("ended_at", ""), + ("params", "classic"), + ("params", "monty"), + ("metrics", "accuracy"), + ("metrics", "f1_score"), + ] + assert (runs_df.experiment_id == [1, 2]).all() + assert (runs_df.run_id == [run_0.id, run_1.id]).all() + assert (runs_df.run_number == [3, 4]).all() + assert (runs_df.status == ["finished", "running"]).all() + assert (runs_df.started_at == [DATETIMES[0], DATETIMES[2]]).all() + assert (runs_df.ended_at == [DATETIMES[1], DATETIMES[3]]).all() + assert (runs_df.params.classic == ["foo", "bar"]).all() + assert (runs_df.params.monty == ["spam", "eggs"]).all() + assert (runs_df.metrics.accuracy == [0.87, 0.91]).all() + assert (runs_df.metrics.f1_score == [0.76, 0.72]).all() + + +FILTER_BY_NO_KEY_CASES = [ + (FilterBy.experiment_id, ExperimentIdFilter), + (FilterBy.run_id, RunIdFilter), + (FilterBy.deleted_at, DeletedAtFilter), +] +FILTER_BY_WITH_KEY_CASES = [ + (FilterBy.tag, TagFilter), + (FilterBy.param, ParamFilter), + (FilterBy.metric, MetricFilter), ] -PAGINATION_MULTIPLE_1 = Pagination(0, 1, None, Page(1, 1)) -LIST_EXPERIMENT_RUNS_RESPONSE_MULTIPLE_1 = ListExperimentRunsResponse( - runs=[EXPERIMENT_RUN], pagination=PAGINATION_MULTIPLE_1 -) -EXPERIMENT_RUN_MULTIPLE_2 = ExperimentRun( - id=7, - run_number=EXPERIMENT_RUN_NUMBER, - name=EXPERIMENT_RUN_NAME, - parent_run_id=PARENT_RUN_ID, - experiment_id=EXPERIMENT.id, - artifact_location="faculty:", - status=ExperimentRunStatus.RUNNING, - started_at=RUN_STARTED_AT, - ended_at=RUN_ENDED_AT, - deleted_at=DELETED_AT, - tags=[TAG], - params=[PARAM], - metrics=[METRIC], -) -PAGINATION_MULTIPLE_2 = Pagination(1, 1, Page(0, 1), None) -LIST_EXPERIMENT_RUNS_RESPONSE_MULTIPLE_2 = ListExperimentRunsResponse( - runs=[EXPERIMENT_RUN_MULTIPLE_2], pagination=PAGINATION_MULTIPLE_2 -) -EXPECTED_RUNS_2 = [ - FacultyExperimentRun( - id=EXPERIMENT_RUN_ID, - run_number=EXPERIMENT_RUN_NUMBER, - name=EXPERIMENT_RUN_NAME, - parent_run_id=PARENT_RUN_ID, - experiment_id=EXPERIMENT.id, - artifact_location="faculty:", - status=ExperimentRunStatus.RUNNING, - started_at=RUN_STARTED_AT, - ended_at=RUN_ENDED_AT, - deleted_at=DELETED_AT, - tags=[TAG], - params=[PARAM], - metrics=[METRIC], - ), - FacultyExperimentRun( - id=7, - run_number=EXPERIMENT_RUN_NUMBER, - name=EXPERIMENT_RUN_NAME, - parent_run_id=PARENT_RUN_ID, - experiment_id=EXPERIMENT.id, - artifact_location="faculty:", - status=ExperimentRunStatus.RUNNING, - started_at=RUN_STARTED_AT, - ended_at=RUN_ENDED_AT, - deleted_at=DELETED_AT, - tags=[TAG], - params=[PARAM], - metrics=[METRIC], - ), +OPERATOR_CASES = [ + (lambda x, v: x.defined(v), ComparisonOperator.DEFINED), + (operator.eq, ComparisonOperator.EQUAL_TO), + (operator.ne, ComparisonOperator.NOT_EQUAL_TO), + (operator.gt, ComparisonOperator.GREATER_THAN), + (operator.ge, ComparisonOperator.GREATER_THAN_OR_EQUAL_TO), + (operator.lt, ComparisonOperator.LESS_THAN), + (operator.le, ComparisonOperator.LESS_THAN_OR_EQUAL_TO), ] -@pytest.mark.parametrize( - "query_runs_side_effects,expected_runs", - [ - [[LIST_EXPERIMENT_RUNS_RESPONSE], EXPECTED_RUNS], +@pytest.mark.parametrize("method, filter_cls", FILTER_BY_NO_KEY_CASES) +@pytest.mark.parametrize("python_operator, expected_operator", OPERATOR_CASES) +def test_filter_by_no_key( + mocker, method, filter_cls, python_operator, expected_operator +): + value = mocker.Mock() + filter = python_operator(method(), value) + expected = filter_cls(expected_operator, value) + assert filter == expected + + +@pytest.mark.parametrize("method, filter_cls", FILTER_BY_WITH_KEY_CASES) +@pytest.mark.parametrize("python_operator, expected_operator", OPERATOR_CASES) +def test_filter_by_with_key( + mocker, method, filter_cls, python_operator, expected_operator +): + key = mocker.Mock() + value = mocker.Mock() + filter = python_operator(method(key), value) + expected = filter_cls(key, expected_operator, value) + assert filter == expected + + +@pytest.mark.parametrize("method, filter_cls", FILTER_BY_NO_KEY_CASES) +def test_filter_by_one_of_no_key(mocker, method, filter_cls): + values = [mocker.Mock(), mocker.Mock()] + filter = method().one_of(values) + expected = CompoundFilter( + LogicalOperator.OR, [ - [ - LIST_EXPERIMENT_RUNS_RESPONSE_MULTIPLE_1, - LIST_EXPERIMENT_RUNS_RESPONSE_MULTIPLE_2, - ], - EXPECTED_RUNS_2, + filter_cls(ComparisonOperator.EQUAL_TO, values[0]), + filter_cls(ComparisonOperator.EQUAL_TO, values[1]), ], - ], -) -def test_experiment_run_query_single_call( - mocker, query_runs_side_effects, expected_runs -): - experiment_client_mock = mocker.MagicMock() - experiment_client_mock.query_runs = mocker.MagicMock( - side_effect=query_runs_side_effects ) - mocker.patch("faculty.client", return_value=experiment_client_mock) - - response = FacultyExperimentRun.query(PROJECT_ID, FILTER, SORT) - - assert isinstance(response, ExperimentRunQueryResult) - returned_runs = list(response) - for expected_run, returned_run in zip(expected_runs, returned_runs): - assert isinstance(returned_run, FacultyExperimentRun) - assert _are_runs_equal(expected_run, returned_run) - - -def _are_runs_equal(this, that): - return all( - list( - i == j - for i, j in ( - list( - zip( - [getattr(this, attr) for attr in this.__dict__.keys()], - [getattr(that, attr) for attr in that.__dict__.keys()], - ) - ) - ) - ) + assert filter == expected + + +@pytest.mark.parametrize("method, filter_cls", FILTER_BY_WITH_KEY_CASES) +def test_filter_by_one_of_with_key(mocker, method, filter_cls): + key = mocker.Mock() + values = [mocker.Mock(), mocker.Mock()] + filter = method(key).one_of(values) + expected = CompoundFilter( + LogicalOperator.OR, + [ + filter_cls(key, ComparisonOperator.EQUAL_TO, values[0]), + filter_cls(key, ComparisonOperator.EQUAL_TO, values[1]), + ], ) + assert filter == expected diff --git a/tox.ini b/tox.ini index 0b5826ae..d5ba4216 100644 --- a/tox.ini +++ b/tox.ini @@ -9,6 +9,7 @@ deps = pytest-mock requests_mock python-dateutil>=2.7 + mock commands = pytest {posargs} [testenv:flake8] From b41a9c92c392da539f61ce53882ba3231b7b7e5c Mon Sep 17 00:00:00 2001 From: Andrew Crozier Date: Wed, 19 Jun 2019 11:28:59 +0100 Subject: [PATCH 16/20] Fix Python 2.7 compatability --- faculty/experiment.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/faculty/experiment.py b/faculty/experiment.py index 6f300635..5dd9e462 100644 --- a/faculty/experiment.py +++ b/faculty/experiment.py @@ -109,7 +109,8 @@ def _get_runs(): client = ExperimentClient(session) response = client.query_runs(project_id, filter, sort) - yield from map(cls._from_client_model, response.runs) + for run in response.runs: + yield cls._from_client_model(run) while response.pagination.next is not None: response = client.query_runs( @@ -119,7 +120,8 @@ def _get_runs(): start=response.pagination.next.start, limit=response.pagination.next.limit, ) - yield from map(cls._from_client_model, response.runs) + for run in response.runs: + yield cls._from_client_model(run) return ExperimentRunList(_get_runs()) From 9577604e9ced33447e1a980daee1b7c26185d6d6 Mon Sep 17 00:00:00 2001 From: Andrew Crozier Date: Fri, 21 Jun 2019 11:07:58 +0100 Subject: [PATCH 17/20] Fix for case when no params or metrics --- faculty/experiment.py | 23 +++++++++--------- tests/test_experiment.py | 50 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 11 deletions(-) diff --git a/faculty/experiment.py b/faculty/experiment.py index 5dd9e462..44be2c6d 100644 --- a/faculty/experiment.py +++ b/faculty/experiment.py @@ -167,18 +167,19 @@ def as_dataframe(self): df.columns = pandas.MultiIndex.from_tuples(df.columns) # Reorder columns and return - return df[ - [ - "experiment_id", - "run_id", - "run_number", - "status", - "started_at", - "ended_at", - "params", - "metrics", - ] + column_order = [ + "experiment_id", + "run_id", + "run_number", + "status", + "started_at", + "ended_at", ] + if "params" in df.columns: + column_order.append("params") + if "metrics" in df.columns: + column_order.append("metrics") + return df[column_order] class _FilterBuilder(object): diff --git a/tests/test_experiment.py b/tests/test_experiment.py index 03c8ea81..a54b46bf 100644 --- a/tests/test_experiment.py +++ b/tests/test_experiment.py @@ -224,6 +224,56 @@ def test_experiment_run_list_as_dataframe(mocker): assert (runs_df.metrics.f1_score == [0.76, 0.72]).all() +def test_experiment_run_list_as_dataframe_no_params(mocker): + run = mocker.Mock( + experiment_id=1, + id=uuid4(), + run_number=3, + status=ExperimentRunStatus.FINISHED, + started_at=DATETIMES[0], + ended_at=DATETIMES[1], + params=[], + metrics=[mocker.Mock(key="accuracy", value=0.91)], + ) + + runs_df = ExperimentRunList([run]).as_dataframe() + + assert list(runs_df.columns) == [ + ("experiment_id", ""), + ("run_id", ""), + ("run_number", ""), + ("status", ""), + ("started_at", ""), + ("ended_at", ""), + ("metrics", "accuracy"), + ] + + +def test_experiment_run_list_as_dataframe_no_metrics(mocker): + run = mocker.Mock( + experiment_id=1, + id=uuid4(), + run_number=3, + status=ExperimentRunStatus.FINISHED, + started_at=DATETIMES[0], + ended_at=DATETIMES[1], + params=[mocker.Mock(key="classic", value="bar")], + metrics=[], + ) + + runs_df = ExperimentRunList([run]).as_dataframe() + + assert list(runs_df.columns) == [ + ("experiment_id", ""), + ("run_id", ""), + ("run_number", ""), + ("status", ""), + ("started_at", ""), + ("ended_at", ""), + ("params", "classic"), + ] + + FILTER_BY_NO_KEY_CASES = [ (FilterBy.experiment_id, ExperimentIdFilter), (FilterBy.run_id, RunIdFilter), From 6b933edf0ed7d62c8b11ee1d1e175f0312bb3a68 Mon Sep 17 00:00:00 2001 From: Andrew Crozier Date: Fri, 21 Jun 2019 11:17:42 +0100 Subject: [PATCH 18/20] Fix tests on Python 3.4 --- tests/test_experiment.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/test_experiment.py b/tests/test_experiment.py index a54b46bf..63e1f94c 100644 --- a/tests/test_experiment.py +++ b/tests/test_experiment.py @@ -17,6 +17,7 @@ from uuid import uuid4 import operator +import pandas import pytest from pytz import UTC import mock @@ -216,8 +217,12 @@ def test_experiment_run_list_as_dataframe(mocker): assert (runs_df.run_id == [run_0.id, run_1.id]).all() assert (runs_df.run_number == [3, 4]).all() assert (runs_df.status == ["finished", "running"]).all() - assert (runs_df.started_at == [DATETIMES[0], DATETIMES[2]]).all() - assert (runs_df.ended_at == [DATETIMES[1], DATETIMES[3]]).all() + assert ( + runs_df.started_at == pandas.Series([DATETIMES[0], DATETIMES[2]]) + ).all() + assert ( + runs_df.ended_at == pandas.Series([DATETIMES[1], DATETIMES[3]]) + ).all() assert (runs_df.params.classic == ["foo", "bar"]).all() assert (runs_df.params.monty == ["spam", "eggs"]).all() assert (runs_df.metrics.accuracy == [0.87, 0.91]).all() From 705867378260fb9bc52bec12241a0a9b46eea628 Mon Sep 17 00:00:00 2001 From: Andrew Crozier Date: Fri, 21 Jun 2019 12:42:34 +0100 Subject: [PATCH 19/20] Move comment to right place --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index a80b2418..2ff51703 100644 --- a/setup.py +++ b/setup.py @@ -32,10 +32,10 @@ "six", "enum34; python_version<'3.4'", "cachetools", - # Install marshmallow with 'reco' (recommended) extras to ensure a - # compatible version of python-dateutil is available "attrs", "pandas", + # Install marshmallow with 'reco' (recommended) extras to ensure a + # compatible version of python-dateutil is available "marshmallow[reco]==3.0.0rc3", "marshmallow_enum", "marshmallow-oneofschema==2.0.0b2", From e407edd9de4821d27158098fa2e0287e355e6e5d Mon Sep 17 00:00:00 2001 From: Andrew Crozier Date: Fri, 21 Jun 2019 17:12:07 +0100 Subject: [PATCH 20/20] Drop Python 3.4 support --- .travis.yml | 2 -- tox.ini | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 1c724c43..3d8ba6c3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -18,8 +18,6 @@ matrix: env: TOXENV=license - python: 2.7 env: TOXENV=py27 - - python: 3.4 - env: TOXENV=py34 - python: 3.5 env: TOXENV=py35 - python: 3.6 diff --git a/tox.ini b/tox.ini index d5ba4216..9624a1e8 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py27, py34, py35, py36, py37, flake8, black, license +envlist = py27, py35, py36, py37, flake8, black, license [testenv] sitepackages = False