Skip to content

Commit

Permalink
add a few tests
Browse files Browse the repository at this point in the history
  • Loading branch information
jbloomAus committed Sep 2, 2024
1 parent bff7fd9 commit 96247d5
Show file tree
Hide file tree
Showing 4 changed files with 20,054 additions and 41 deletions.
74 changes: 37 additions & 37 deletions tests/acceptance/test_neuronpedia_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,40 +324,40 @@ def test_simple_neuronpedia_runner_hook_z_sae():
assert "run_settings.json" in os.listdir(runner.cfg.outputs_dir)


# def test_neuronpedia_runner_prefix_suffix_it_model():

# NP_OUTPUT_FOLDER = "neuronpedia_outputs/test_gemmascope_it_template"
# ACT_CACHE_FOLDER = "cached_activations"
# SAE_SET = "gemma-scope-9b-it-res-canonical"
# SAE_PATH = "layer_9/width_131k/canonical"
# NUM_FEATURES_PER_BATCH = 2
# NUM_BATCHES = 2

# # delete output files if present
# os.system(f"rm -rf {NP_OUTPUT_FOLDER}")
# os.system(f"rm -rf {ACT_CACHE_FOLDER}")

# # # we make two batches of 2 features each
# cfg = NeuronpediaRunnerConfig(
# sae_set=SAE_SET,
# sae_path=SAE_PATH,
# np_set_name="gemma-scope-9b-it-res-canonical",
# from_local_sae=False,
# outputs_dir=NP_OUTPUT_FOLDER,
# sparsity_threshold=1,
# n_prompts_total=5000,
# n_tokens_in_prompt=128,
# n_features_at_a_time=NUM_FEATURES_PER_BATCH,
# n_prompts_in_forward_pass=32,
# start_batch=0,
# end_batch=NUM_BATCHES - 1,
# use_wandb=True,
# shuffle_tokens=False,
# prefix_tokens=[106, 1645, 108],
# suffix_tokens=[107, 108],
# )

# runner = NeuronpediaRunner(cfg)
# runner.run()

# assert "run_settings.json" in os.listdir(runner.cfg.outputs_dir)
def test_neuronpedia_runner_prefix_suffix_it_model():

NP_OUTPUT_FOLDER = "neuronpedia_outputs/test_masking"
ACT_CACHE_FOLDER = "cached_activations"
SAE_SET = "gpt2-small-res-jb"
SAE_PATH = "blocks.0.hook_resid_pre"
NUM_FEATURES_PER_BATCH = 2
NUM_BATCHES = 2

# delete output files if present
os.system(f"rm -rf {NP_OUTPUT_FOLDER}")
os.system(f"rm -rf {ACT_CACHE_FOLDER}")

# # we make two batches of 2 features each
cfg = NeuronpediaRunnerConfig(
sae_set=SAE_SET,
sae_path=SAE_PATH,
np_set_name="res-jb",
from_local_sae=False,
outputs_dir=NP_OUTPUT_FOLDER,
sparsity_threshold=1,
n_prompts_total=5000,
n_features_at_a_time=NUM_FEATURES_PER_BATCH,
n_prompts_in_forward_pass=32,
start_batch=0,
end_batch=NUM_BATCHES - 1,
use_wandb=True,
shuffle_tokens=False,
prefix_tokens=[106, 1645, 108],
suffix_tokens=[107, 108],
ignore_positions=[0, 1, 2],
)

runner = NeuronpediaRunner(cfg)
runner.run()

assert "run_settings.json" in os.listdir(runner.cfg.outputs_dir)
20,013 changes: 20,012 additions & 1 deletion tests/acceptance/test_simple/batch-0.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion tests/acceptance/test_simple/batch-1.json

Large diffs are not rendered by default.

6 changes: 4 additions & 2 deletions tests/unit/test_neuronpedia_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,9 @@ def test_add_prefix_suffix_to_tokens(neuronpedia_runner: NeuronpediaRunner) -> N
)


def test_add_prefix_suffix_to_tokens_prepend_bos_false(neuronpedia_runner: NeuronpediaRunner) -> None:
def test_add_prefix_suffix_to_tokens_prepend_bos_false(
neuronpedia_runner: NeuronpediaRunner,
) -> None:

# modify the config to add a prefix / suffix
neuronpedia_runner.cfg.prefix_tokens = [101, 102, 103] # Example prefix tokens
Expand All @@ -95,7 +97,7 @@ def test_add_prefix_suffix_to_tokens_prepend_bos_false(neuronpedia_runner: Neuro
assert torch.allclose(tokens[:, -3:].cpu(), torch.tensor([104, 105, 106]))

# assert the first token position is still the bos
assert ~torch.allclose(
assert not torch.allclose(
tokens[:, 0].cpu(),
torch.tensor(
[neuronpedia_runner.model.to_single_token("<|endoftext|>")],
Expand Down

0 comments on commit 96247d5

Please sign in to comment.