From 729b6db62f19c40ea8ebadea471a111d2b6a98e5 Mon Sep 17 00:00:00 2001 From: Your Name Date: Mon, 13 Jan 2025 15:51:27 +0000 Subject: [PATCH] mollify ruff --- src/inspect_ai/_eval/task/results.py | 2 +- src/inspect_ai/log/_log.py | 2 +- src/inspect_ai/scorer/_metrics/std.py | 5 +++-- src/inspect_ai/scorer/_reducer/reducer.py | 8 +++++++- src/inspect_ai/scorer/_reducer/types.py | 2 +- tests/scorer/test_metric.py | 2 +- tests/scorer/test_metric_stderr.py | 19 ++++++++----------- 7 files changed, 22 insertions(+), 18 deletions(-) diff --git a/src/inspect_ai/_eval/task/results.py b/src/inspect_ai/_eval/task/results.py index e3f80395a..4a8214ed7 100644 --- a/src/inspect_ai/_eval/task/results.py +++ b/src/inspect_ai/_eval/task/results.py @@ -18,7 +18,7 @@ ) from inspect_ai.log._log import EvalSampleReductions from inspect_ai.scorer import Metric, Score, Scorer -from inspect_ai.scorer._metric import SampleScore, ReducedScore +from inspect_ai.scorer._metric import ReducedScore, SampleScore from inspect_ai.scorer._reducer import ScoreReducer, mean_score, reducer_log_name from inspect_ai.scorer._scorer import ( SCORER_METRICS, diff --git a/src/inspect_ai/log/_log.py b/src/inspect_ai/log/_log.py index 3cd690e1a..c57de2355 100644 --- a/src/inspect_ai/log/_log.py +++ b/src/inspect_ai/log/_log.py @@ -28,8 +28,8 @@ from inspect_ai.util._store import Store from inspect_ai.util._store_model import SMT -from ._transcript import Event from ..scorer._metric import ReducedScore +from ._transcript import Event logger = getLogger(__name__) diff --git a/src/inspect_ai/scorer/_metrics/std.py b/src/inspect_ai/scorer/_metrics/std.py index f7d782453..dc760ad2f 100644 --- a/src/inspect_ai/scorer/_metrics/std.py +++ b/src/inspect_ai/scorer/_metrics/std.py @@ -5,7 +5,6 @@ from .._metric import ( Metric, - Score, ReducedScore, ValueToFloat, metric, @@ -50,7 +49,9 @@ def metric(scores: list[ReducedScore]) -> float: @metric def stderr(to_float: ValueToFloat = value_to_float()) -> Metric: - """Clustered standard error of the mean, where each ``ReducedScore``'s children form a cluster. + """Clustered standard error of the mean. + + Where each ``ReducedScore``'s children form a cluster. If ``epochs=1`` such that each ``ReducedScore`` has only one child, clustered standard errors reduce to heteroskedasticity-robust (White) standard errors. diff --git a/src/inspect_ai/scorer/_reducer/reducer.py b/src/inspect_ai/scorer/_reducer/reducer.py index 34d9fa1a5..b81e14d38 100644 --- a/src/inspect_ai/scorer/_reducer/reducer.py +++ b/src/inspect_ai/scorer/_reducer/reducer.py @@ -4,7 +4,13 @@ import numpy as np -from inspect_ai.scorer._metric import Score, Value, ValueToFloat, value_to_float, ReducedScore +from inspect_ai.scorer._metric import ( + ReducedScore, + Score, + Value, + ValueToFloat, + value_to_float, +) from .registry import REDUCER_NAME, score_reducer from .types import ScoreReducer diff --git a/src/inspect_ai/scorer/_reducer/types.py b/src/inspect_ai/scorer/_reducer/types.py index 3be2f8b71..020db2299 100644 --- a/src/inspect_ai/scorer/_reducer/types.py +++ b/src/inspect_ai/scorer/_reducer/types.py @@ -1,6 +1,6 @@ from typing import Protocol, runtime_checkable -from .._metric import Score, ReducedScore +from .._metric import ReducedScore, Score @runtime_checkable diff --git a/tests/scorer/test_metric.py b/tests/scorer/test_metric.py index f71515339..291088884 100644 --- a/tests/scorer/test_metric.py +++ b/tests/scorer/test_metric.py @@ -17,7 +17,7 @@ scorer, std, ) -from inspect_ai.scorer._metric import metric_create, ReducedScore +from inspect_ai.scorer._metric import ReducedScore, metric_create from inspect_ai.scorer._target import Target from inspect_ai.solver._task_state import TaskState diff --git a/tests/scorer/test_metric_stderr.py b/tests/scorer/test_metric_stderr.py index fd1d30045..9d9587990 100644 --- a/tests/scorer/test_metric_stderr.py +++ b/tests/scorer/test_metric_stderr.py @@ -1,9 +1,8 @@ import pytest -from inspect_ai.scorer._metric import Score, ReducedScore +from inspect_ai.scorer._metric import ReducedScore, Score from inspect_ai.scorer._metrics.std import stderr - """ Comparisons to ``statsmodels`` are done using the following code: ```python @@ -27,9 +26,7 @@ def cluster_se(data: pd.DataFrame) -> float: def test_stderr_single_cluster(): - """ - Backward compatibility: previous implementation of stderr returned 0 for a single reduced score. - """ + """Backward compatibility: previous implementation of stderr returned 0 for a single reduced score.""" scores = [ ReducedScore(value=2.5, children=[ Score(value=1.0), @@ -47,8 +44,8 @@ def test_stderr_single_cluster(): def test_stderr_singleton_clusters(): - """ - Test clustered SE with three clusters of size 1 each. + """Test clustered SE with three clusters of size 1 each. + This should reduce to the heteroskedasticity-robust standard error. Statsmodels verification: @@ -112,8 +109,8 @@ def test_stderr_identical_within_varied_between(): def test_stderr_1(): - """ - Statsmodels verification: + """Statsmodels verification. + ```python data = pd.DataFrame({ "y": [1, 1, 0, 0, 1, 0], @@ -137,8 +134,8 @@ def test_stderr_1(): def test_stderr_2(): - """ - Statsmodels verification: + """Statsmodels verification. + ```python data = pd.DataFrame({ "y": [9.0, 4.0, 11.0, 6.0, 13.0, 8.0],