Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

lf.eval.v2: Add an option not to generate example HTML. #389

Merged
merged 1 commit into from
Jan 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 23 additions & 18 deletions langfun/core/eval/v2/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@ def run(
example_ids: list[int] | None = None,
raise_if_has_error: bool = False,
reprocess: bool | list[int] = False,
regenerate_example_html: bool | list[int] = False,
generate_example_html: Literal['new', 'all', 'no'] | list[int] = 'new',
process_timeout: int | None = None,
use_cache: Literal['global', 'per_dataset', 'no'] = 'per_dataset',
note: str | None = None,
Expand Down Expand Up @@ -435,11 +435,13 @@ def run(
meaning that existing checkpoints will be ignored. If a list of
example IDs, it indicates that only the specified examples will be
reprocessed.
regenerate_example_html: A boolean or a list of example IDs. If boolean,
it indicates that whether all the examples to be evaluated will have
their HTML files regenerated. If a list of example IDs, it indicates
that only the specified examples will have their HTML files
regenerated.
generate_example_html: Among 'new', 'all', 'no' or a list of example IDs.
If 'new', generate HTML files for all newly processed examples, and
keep/copy existing HTML files for unchanged examples.
If 'all', generate HTML files for all examples.
If 'no', do not generate HTML files for any examples.
If a list of example IDs, generate HTML files for the specified
examples.
process_timeout: The timeout in seconds for each process. If None, it
will use the default timeout for the runner.
use_cache: Whether to use LLM cache for the experiment.
Expand Down Expand Up @@ -467,7 +469,7 @@ def run(
example_ids=example_ids,
raise_if_has_error=raise_if_has_error,
reprocess=reprocess,
regenerate_example_html=regenerate_example_html,
generate_example_html=generate_example_html,
use_cache=use_cache,
process_timeout=process_timeout,
note=note,
Expand Down Expand Up @@ -837,14 +839,17 @@ class Run(pg.Object, pg.views.html.HtmlTreeView.Extension):
)
] = False

regenerate_example_html: Annotated[
bool | list[int],
generate_example_html: Annotated[
Literal['new', 'all', 'no'] | list[int],
(
'If True, it will regenerate the HTML files for previously processed '
'examples. If a list of integers, the HTML files for the examples of '
'the given IDs will be regenerated'
'If "new", generate HTML files for all newly processed examples, '
'and keep/copy existing HTML files for unchanged examples. '
'If "all", generate HTML files for all examples. '
'If "no", do not generate HTML files for any examples. '
'If a list of example IDs, generate HTML files for the specified '
'examples.'
)
] = False
] = 'new'

filter: Annotated[
Callable[[Experiment], bool] | None,
Expand Down Expand Up @@ -917,17 +922,17 @@ def examples_to_reprocess(self, experiment: Experiment) -> set[int]:
def examples_to_load(self, experiment: Experiment) -> set[int]:
"""Returns the example IDs to load from checkpoint files.."""
load_ids = self.examples_to_evaluate(experiment)
if isinstance(self.regenerate_example_html, list):
load_ids |= set(self.regenerate_example_html)
if isinstance(self.generate_example_html, list):
load_ids |= set(self.generate_example_html)
load_ids -= self.examples_to_reprocess(experiment)
return load_ids

def examples_to_load_metadata(self, experiment: Experiment) -> set[int]:
"""Returns the example IDs to load the metadata."""
load_metadata_ids = set()
if isinstance(self.regenerate_example_html, list):
load_metadata_ids = set(self.regenerate_example_html)
elif self.regenerate_example_html:
if isinstance(self.generate_example_html, list):
load_metadata_ids = set(self.generate_example_html)
elif self.generate_example_html == 'all':
load_metadata_ids = self.examples_to_evaluate(experiment)
load_metadata_ids -= self.examples_to_reprocess(experiment)
return load_metadata_ids
Expand Down
25 changes: 21 additions & 4 deletions langfun/core/eval/v2/experiment_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ def test_examples_with_reprocess_some(self):
self.assertEqual(run.examples_to_load(exp), set([3, 5]))
self.assertEqual(run.examples_to_load_metadata(exp), set())

def test_examples_with_regenerate_example_html_all(self):
def test_examples_with_generate_example_html_all(self):
run = Run(
'/root',
RunId.from_id('20241102_0'),
Expand All @@ -346,15 +346,15 @@ def test_examples_with_regenerate_example_html_all(self):
])),
example_ids=[1, 3, 5],
reprocess=[1],
regenerate_example_html=True,
generate_example_html='all',
)
exp = run.experiment.leaf_nodes[0]
self.assertEqual(run.examples_to_evaluate(exp), set([1, 3, 5]))
self.assertEqual(run.examples_to_reprocess(exp), set([1]))
self.assertEqual(run.examples_to_load(exp), set([3, 5]))
self.assertEqual(run.examples_to_load_metadata(exp), set([3, 5]))

def test_examples_with_regenerate_example_html_some(self):
def test_examples_with_generate_example_html_new(self):
run = Run(
'/root',
RunId.from_id('20241102_0'),
Expand All @@ -363,7 +363,24 @@ def test_examples_with_regenerate_example_html_some(self):
])),
example_ids=[1, 3, 5],
reprocess=[1],
regenerate_example_html=[1, 2, 3],
generate_example_html='new',
)
exp = run.experiment.leaf_nodes[0]
self.assertEqual(run.examples_to_evaluate(exp), set([1, 3, 5]))
self.assertEqual(run.examples_to_reprocess(exp), set([1]))
self.assertEqual(run.examples_to_load(exp), set([3, 5]))
self.assertEqual(run.examples_to_load_metadata(exp), set())

def test_examples_with_generate_example_html_some(self):
run = Run(
'/root',
RunId.from_id('20241102_0'),
pg.Ref(Suite([
MyEvaluation(replica_id=0, inputs=sample_inputs(10)),
])),
example_ids=[1, 3, 5],
reprocess=[1],
generate_example_html=[1, 2, 3],
)
exp = run.experiment.leaf_nodes[0]
self.assertEqual(run.examples_to_evaluate(exp), set([1, 3, 5]))
Expand Down
24 changes: 17 additions & 7 deletions langfun/core/eval/v2/reporting.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@ def _save_example_html(
self, runner: Runner, experiment: Experiment, example: Example
) -> None:
"""Saves the example in HTML format."""
current_run = runner.current_run
def _generate():
try:
with pg.timeit() as t:
Expand Down Expand Up @@ -222,14 +223,19 @@ def _generate():
raise e

def _copy():
src_file = runner.current_run.input_path_for(
experiment, f'{example.id}.html'
)
dest_file = runner.current_run.output_path_for(
experiment, f'{example.id}.html'
)
src_file = current_run.input_path_for(experiment, f'{example.id}.html')
dest_file = current_run.output_path_for(experiment, f'{example.id}.html')

if src_file == dest_file:
return

if not pg.io.path_exists(src_file):
experiment.warning(
f'Skip copying \'{example.id}.html\' as '
f'{src_file!r} does not exist.'
)
return

try:
with pg.timeit() as t, pg.io.open(src_file, 'r') as src:
content = src.read()
Expand All @@ -244,7 +250,11 @@ def _copy():
)
raise e

if example.newly_processed or runner.current_run.regenerate_example_html:
generate_example_html = current_run.generate_example_html
if (generate_example_html == 'all'
or (generate_example_html == 'new' and example.newly_processed)
or (isinstance(generate_example_html, list)
and example.id in generate_example_html)):
op = _generate
else:
op = _copy
Expand Down
2 changes: 1 addition & 1 deletion langfun/core/eval/v2/reporting_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def test_example_html_generation_error(self):
)
found_error_log = False
for log_entry in experiment._log_entries:
if log_entry.message.startswith('Failed to copy'):
if log_entry.message.startswith('Skip copying'):
found_error_log = True
break
self.assertTrue(found_error_log)
Expand Down
Loading