From 69eae1846000884cbc231ef2f7d161ac32da559f Mon Sep 17 00:00:00 2001 From: thomasw21 <24695242+thomasw21@users.noreply.github.com> Date: Mon, 4 Jul 2022 20:02:33 +0200 Subject: [PATCH 01/35] Get OPT specific code --- evaluation/results/opt/README.md | 1 + ...nvert_transformers_checkpoint_to_meg_ds.py | 196 ++++++++++++++++++ .../results/opt/run_opt_evaluation.slurm | 121 +++++++++++ 3 files changed, 318 insertions(+) create mode 100644 evaluation/results/opt/README.md create mode 100644 evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py create mode 100644 evaluation/results/opt/run_opt_evaluation.slurm diff --git a/evaluation/results/opt/README.md b/evaluation/results/opt/README.md new file mode 100644 index 00000000..5fb71237 --- /dev/null +++ b/evaluation/results/opt/README.md @@ -0,0 +1 @@ +Utilities requires to run OPT evaluation diff --git a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py new file mode 100644 index 00000000..abb9803c --- /dev/null +++ b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py @@ -0,0 +1,196 @@ +import argparse +import json +import re, os +from functools import partial +from multiprocessing import Pool +from typing import List, Optional, Dict + +import torch +from tqdm import tqdm + + +def get_args(): + parser = argparse.ArgumentParser() + # Required parameters + parser.add_argument( + "--opt_checkpoint_path", + default=None, + type=str, + required=True, + help="Path to the transformers OPT checkpoint path.", + ) + parser.add_argument( + "--opt_sharded_index_path", + default=None, + type=str, + required=True, + help="Path to the transformers OPT checkpoint metadata path.", + ) + parser.add_argument( + "--megatron_dump_folder_path", default=None, type=str, required=True, + help="Path to the output Megatron-DS model." + ) + parser.add_argument( + "--num-proc", default=1, type=int, + ) + return parser.parse_args() + + +def compute_meg_ds_weight_names(num_layers: int): + return { + "layer_01-model_00-model_states.pt": [ + "word_embeddings.weight", + "position_embeddings.weight", + ], + **{ + f"layer_{str(layer_id).zfill(2)}-model_00-model_states.pt": [ + "input_layernorm.weight", + "input_layernorm.bias", + "self_attention.query_key_value.weight", + "self_attention.query_key_value.bias", + "self_attention.dense.weight", + "self_attention.dense.bias", + "post_attention_layernorm.weight", + "post_attention_layernorm.bias", + "mlp.dense_h_to_4h.weight", + "mlp.dense_h_to_4h.bias", + "mlp.dense_4h_to_h.weight", + "mlp.dense_4h_to_h.bias", + ] + for layer_id in range(3, num_layers + 3) + }, + f"layer_{str(num_layers + 5).zfill(2)}-model_00-model_states.pt": [ + "weight", + "bias" + ] + } + +NON_TRANSFORMERS_BLOCK_WEIGHTS = { + "word_embeddings.weight": "decoder.embed_tokens.weight", + "position_embeddings.weight": "decoder.embed_positions.weight", + "weight": "decoder.final_layer_norm.weight", + "bias": "decoder.final_layer_norm.bias" +} +TRANSFORMERS_BLOCK_WEIGHTS = { + "input_layernorm.weight": ["self_attn_layer_norm.weight"], + "input_layernorm.bias": ["self_attn_layer_norm.bias"], + "self_attention.query_key_value.weight": ["self_attn.q_proj.weight", "self_attn.k_proj.weight", "self_attn.v_proj.weight"], + "self_attention.query_key_value.bias": ["self_attn.q_proj.bias", "self_attn.k_proj.bias", "self_attn.v_proj.bias"], + "self_attention.dense.weight": ["self_attn.out_proj.weight"], + "self_attention.dense.bias": ["self_attn.out_proj.bias"], + "post_attention_layernorm.weight": ["final_layer_norm.weight"], + "post_attention_layernorm.bias": ["final_layer_norm.bias"], + "mlp.dense_h_to_4h.weight": ["fc1.weight"], + "mlp.dense_h_to_4h.bias": ["fc1.bias"], + "mlp.dense_4h_to_h.weight": ["fc2.weight"], + "mlp.dense_4h_to_h.bias": ["fc2.bias"] +} +def get_transformers_weight_names(meg_ds_weight: str, layer_id: Optional[int]) -> List[str]: + if layer_id is None: + return [NON_TRANSFORMERS_BLOCK_WEIGHTS[meg_ds_weight]] + else: + return [f"decoder.layers.{layer_id}.{tfrs_block_name}" for tfrs_block_name in TRANSFORMERS_BLOCK_WEIGHTS[meg_ds_weight]] + +def get_layer_id(meg_ds_filename: str, total_num_layers: int) -> Optional[int]: + layer_id = int(re.match(r"layer_(\d*)-model_00-model_states.pt", meg_ds_filename)[1]) - 3 + + if layer_id < 0: + return None + + if layer_id >= total_num_layers: + return None + + return layer_id + +def find_transformers_weights_and_save_meg_ds_weights( + meg_ds_filename: str, + meg_ds_weight_names: List[str], + opt_checkpoint_path: str, + megatron_dump_folder_path:str, + total_num_layers: int, + trfs_weight_map: Dict[str, str] +): + layer_id = get_layer_id(meg_ds_filename, total_num_layers=total_num_layers) + trfs_weight_namess = {meg_ds_weight_name: get_transformers_weight_names(meg_ds_weight_name, layer_id=layer_id) for meg_ds_weight_name in meg_ds_weight_names} + + # Find the path they live in. + trfs_filenames = set(trfs_weight_map[trfs_weight_name] for trfs_weight_names in trfs_weight_namess.values() for trfs_weight_name in trfs_weight_names) + trfs_filename_to_weights = { + trfs_filename: torch.load(os.path.join(opt_checkpoint_path, trfs_filename), map_location="cpu") + for trfs_filename in trfs_filenames + } + + # query those weights + result = { + meg_ds_weight_name: [ + trfs_filename_to_weights[trfs_weight_map[tfrs_weight_name]][tfrs_weight_name] + for tfrs_weight_name in tfrs_weight_names + ] + for meg_ds_weight_name, tfrs_weight_names in trfs_weight_namess.items() + } + + # possibly concatenate + save_path = os.path.join(megatron_dump_folder_path, meg_ds_filename) + with open(save_path, "wb") as fo: + torch.save( + {key: torch.cat(value) for key, value in result.items()}, + fo + ) + + +def convert_opt_checkpoint_to_megatron( + opt_checkpoint_path: str, + megatron_dump_folder_path: str, + opt_index_path: str, + num_proc: int +): + # Get total number of layers + with open(opt_index_path, "r") as fi: + index_file = json.load(fi)["weight_map"] + # Compute total amount of layers + total_amount_of_layers = 0 + for weight_name in index_file.keys(): + match = re.match(r"decoder.layers.(\d*).*", weight_name) + if match is not None: + total_amount_of_layers = max(int(match[1]), total_amount_of_layers) + total_amount_of_layers += 1 + + # Given the total number of layers we can compute exactly each meg_ds params we need to find. + meg_ds_filename_to_meg_ds_weights = compute_meg_ds_weight_names(total_amount_of_layers) + + # Given the needed weights we can query them from the transformers checkpoint + # We have to be smart about it and load a bin file once and get everything. + if num_proc == 1: + for meg_ds_filename, meg_ds_weight_names in tqdm(meg_ds_filename_to_meg_ds_weights.items()): + find_transformers_weights_and_save_meg_ds_weights( + meg_ds_filename=meg_ds_filename, + meg_ds_weight_names=meg_ds_weight_names, + opt_checkpoint_path=opt_checkpoint_path, + megatron_dump_folder_path=megatron_dump_folder_path, + total_num_layers=total_amount_of_layers, + trfs_weight_map=index_file + ) + else: + with Pool(num_proc) as pool: + pool.starmap( + partial( + find_transformers_weights_and_save_meg_ds_weights, + opt_checkpoint_path=opt_checkpoint_path, + megatron_dump_folder_path=megatron_dump_folder_path, + total_num_layers=total_amount_of_layers, + trfs_weight_map=index_file + ), + tqdm(meg_ds_filename_to_meg_ds_weights.items()) + ) + +def main(): + args = get_args() + convert_opt_checkpoint_to_megatron( + opt_checkpoint_path=args.opt_checkpoint_path, + megatron_dump_folder_path=args.megatron_dump_folder_path, + opt_index_path=args.opt_sharded_index_path, + num_proc=args.num_proc + ) + +if __name__ == "__main__": + main() diff --git a/evaluation/results/opt/run_opt_evaluation.slurm b/evaluation/results/opt/run_opt_evaluation.slurm new file mode 100644 index 00000000..b068b3c2 --- /dev/null +++ b/evaluation/results/opt/run_opt_evaluation.slurm @@ -0,0 +1,121 @@ +#!/bin/bash +#SBATCH --job-name=eai-eval-opt +#SBATCH --partition=gpu_p5 +#SBATCH --constraint=a100 +#SBATCH --reservation=hug +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 # crucial - only 1 task per dist per node! +#SBATCH --cpus-per-task=64 # number of cores per tasks +#SBATCH --hint=nomultithread # we get physical cores not logical +#SBATCH --gres=gpu:8 # number of gpus +#SBATCH --time 20:00:00 # maximum execution time (HH:MM:SS) +#SBATCH --output=/gpfswork/rech/six/uty16tp/logs/%x-%j.out # output file name +#SBATCH --account=six@a100 + +set -x -e + +source $six_ALL_CCFRWORK/start-py38-pt111 +conda activate thomas_lm_eval + +echo "START TIME: $(date)" + + +CHECKPOINT_PATH=$six_ALL_CCFRSCRATCH/opt/opt-175b-meg-ds +MEGATRON_DEEPSPEED_REPO=/gpfswork/rech/six/uty16tp/code/big_science/Megatron-DeepSpeed +export HF_DATASETS_OFFLINE=1 +export TRANSFORMERS_OFFLINE=1 + +export TRANSFORMERS_CACHE=$six_ALL_CCFRWORK/models +export HF_DATASETS_CACHE=$six_ALL_CCFRWORK/datasets +export HF_MODULES_CACHE=$six_ALL_CCFRWORK/modules +export HF_METRICS_CACHE=$six_ALL_CCFRWORK/metrics + +cd $MEGATRON_DEEPSPEED_REPO + +# Make sure you use the slow version of the tokenizer. +TOKENIZER_NAME_OR_PATH=bigscience/opt + +PP_SIZE=8 +TP_SIZE=1 +SEQ_LEN=2048 + +# different from the training MICRO_BATCH_SIZE - no optim memory, so can do bigger BS +# make as big as it can fit into gpu w/o OOM, but not too close to 100% +EVAL_MICRO_BATCH_SIZE=1 + +#dummy arguments to make megatron happy. +MEGATRON_REQUIRED_ARGS=" \ + --num-layers -1 \ + --hidden-size -1 \ + --num-attention-heads -1 \ + --seq-length -1 \ + --max-position-embeddings -1 \ +" + + +ZERO_STAGE=0 + +config_json="./ds_config.json" + +# Deepspeed figures out GAS dynamically from dynamic GBS via set_train_batch_size() +cat < $config_json +{ + "train_micro_batch_size_per_gpu": 1, + "train_batch_size": 1, + "gradient_clipping": 1.0, + "zero_optimization": { + "stage": $ZERO_STAGE + }, + "bf16": { + "enabled": true + }, + "steps_per_print": 2000, + "wall_clock_breakdown": false +} +EOT + +OPT_FOLDER=$WORK/opt + +CMD="./tasks/eval_harness/evaluate.py \ + --load $CHECKPOINT_PATH \ + --results_path $OPT_FOLDER/eai_results.json \ + --tensor-model-parallel-size $TP_SIZE \ + --pipeline-model-parallel-size $PP_SIZE \ + --tokenizer-type PretrainedFromHF \ + --tokenizer-name-or-path $TOKENIZER_NAME_OR_PATH \ + --micro-batch-size $EVAL_MICRO_BATCH_SIZE \ + --no-load-optim \ + --relu \ + --no-load-rng \ + --fp16 \ + --inference \ + --seq-length $SEQ_LEN \ + --task_list arc_challenge,arc_easy,boolq,copa,headqa,hellaswag,lambada,logiqa,mathqa,mc_taco,mrpc,multirc,openbookqa,piqa,prost,pubmedqa,qnli,qqp,race,rte,sciq,sst,triviaqa,webqs,wic,winogrande,wnli,wsc \ + --deepspeed \ + --deepspeed_config ds_config.json \ + --intermed_results \ + --adaptive_seq_len \ + --micro_bs_multiplier 4 \ + $MEGATRON_REQUIRED_ARGS \ + " + +GPUS_PER_NODE=8 +NNODES=$SLURM_NNODES +MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1) +MASTER_PORT=6000 +export LAUNCHER="python -u -m torch.distributed.run \ + --nproc_per_node $GPUS_PER_NODE \ + --nnodes $NNODES \ + --rdzv_endpoint $MASTER_ADDR:$MASTER_PORT \ + --rdzv_backend c10d \ + --max_restarts 0 \ + --tee 3 \ + " + +export CUDA_LAUNCH_BLOCKING=1 + +echo $LAUNCHER $CMD + +export PYTHONPATH=$MEGATRON_DEEPSPEED_REPO + +$LAUNCHER $CMD 2>&1 | tee $OPT_FOLDER/eval-harness.log From ea5fe63d105110063c5fa4edbd3895d270db0271 Mon Sep 17 00:00:00 2001 From: thomasw21 <24695242+thomasw21@users.noreply.github.com> Date: Mon, 4 Jul 2022 21:41:47 +0200 Subject: [PATCH 02/35] Run inference for opt --- .../results/opt/run_opt_evaluation.slurm | 37 ++++++++++++------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/evaluation/results/opt/run_opt_evaluation.slurm b/evaluation/results/opt/run_opt_evaluation.slurm index b068b3c2..b0d3daf3 100644 --- a/evaluation/results/opt/run_opt_evaluation.slurm +++ b/evaluation/results/opt/run_opt_evaluation.slurm @@ -33,23 +33,39 @@ export HF_METRICS_CACHE=$six_ALL_CCFRWORK/metrics cd $MEGATRON_DEEPSPEED_REPO # Make sure you use the slow version of the tokenizer. -TOKENIZER_NAME_OR_PATH=bigscience/opt +# Same tokenizer for 125m and 175b +TOKENIZER_NAME_OR_PATH=/gpfsscratch/rech/six/commun/opt/opt-125m PP_SIZE=8 TP_SIZE=1 + +NHIDDEN=12288 +NLAYERS=96 +NHEADS=96 SEQ_LEN=2048 # different from the training MICRO_BATCH_SIZE - no optim memory, so can do bigger BS # make as big as it can fit into gpu w/o OOM, but not too close to 100% EVAL_MICRO_BATCH_SIZE=1 -#dummy arguments to make megatron happy. -MEGATRON_REQUIRED_ARGS=" \ - --num-layers -1 \ - --hidden-size -1 \ - --num-attention-heads -1 \ - --seq-length -1 \ - --max-position-embeddings -1 \ +MEGATRON_REQUIRED_ARGS=" + --pp-partition-method 'type:transformer' \ + --num-layers $NLAYERS \ + --hidden-size $NHIDDEN \ + --num-attention-heads $NHEADS \ + --seq-length $SEQ_LEN \ + --max-position-embeddings $SEQ_LEN \ + --tokenizer-type PretrainedFromHF \ + --tokenizer-name-or-path $TOKENIZER_NAME_OR_PATH \ + --fp16 \ + --relu \ + --bf16 \ + --seed 42 \ + --position-embedding-type alibi \ + --checkpoint-activations \ + --abort-on-unmet-fused-kernel-constraints \ + --kill-switch-path $KILL_SWITCH_PATH \ + --pad-vocab-size-to 250880 \ " @@ -81,15 +97,10 @@ CMD="./tasks/eval_harness/evaluate.py \ --results_path $OPT_FOLDER/eai_results.json \ --tensor-model-parallel-size $TP_SIZE \ --pipeline-model-parallel-size $PP_SIZE \ - --tokenizer-type PretrainedFromHF \ - --tokenizer-name-or-path $TOKENIZER_NAME_OR_PATH \ --micro-batch-size $EVAL_MICRO_BATCH_SIZE \ --no-load-optim \ - --relu \ --no-load-rng \ - --fp16 \ --inference \ - --seq-length $SEQ_LEN \ --task_list arc_challenge,arc_easy,boolq,copa,headqa,hellaswag,lambada,logiqa,mathqa,mc_taco,mrpc,multirc,openbookqa,piqa,prost,pubmedqa,qnli,qqp,race,rte,sciq,sst,triviaqa,webqs,wic,winogrande,wnli,wsc \ --deepspeed \ --deepspeed_config ds_config.json \ From 5539293a31486bdad0de56ea09ed80492daded64 Mon Sep 17 00:00:00 2001 From: thomasw21 <24695242+thomasw21@users.noreply.github.com> Date: Mon, 4 Jul 2022 21:47:46 +0200 Subject: [PATCH 03/35] Run inference for opt --- .../results/opt/run_opt_evaluation_125m.slurm | 128 ++++++++++++++++++ ...on.slurm => run_opt_evaluation_175b.slurm} | 17 ++- 2 files changed, 136 insertions(+), 9 deletions(-) create mode 100644 evaluation/results/opt/run_opt_evaluation_125m.slurm rename evaluation/results/opt/{run_opt_evaluation.slurm => run_opt_evaluation_175b.slurm} (92%) diff --git a/evaluation/results/opt/run_opt_evaluation_125m.slurm b/evaluation/results/opt/run_opt_evaluation_125m.slurm new file mode 100644 index 00000000..627dc58e --- /dev/null +++ b/evaluation/results/opt/run_opt_evaluation_125m.slurm @@ -0,0 +1,128 @@ +#!/bin/bash +#SBATCH --job-name=eai-eval-opt-125m +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=10 +#SBATCH --hint=nomultithread +#SBATCH --gres=gpu:1 +#SBATCH --time 20:00:00 +#SBATCH --output=/gpfswork/rech/six/uty16tp/logs/%x-%j.out +#SBATCH --account=six@v100 + +set -x -e + +source $six_ALL_CCFRWORK/start-py38-pt111 +conda activate thomas_lm_eval + +echo "START TIME: $(date)" + + +CHECKPOINT_PATH=$six_ALL_CCFRSCRATCH/opt/opt-125m-meg-ds +MEGATRON_DEEPSPEED_REPO=/gpfswork/rech/six/uty16tp/code/big_science/Megatron-DeepSpeed +export HF_DATASETS_OFFLINE=1 +export TRANSFORMERS_OFFLINE=1 + +export TRANSFORMERS_CACHE=$six_ALL_CCFRWORK/models +export HF_DATASETS_CACHE=$six_ALL_CCFRWORK/datasets +export HF_MODULES_CACHE=$six_ALL_CCFRWORK/modules +export HF_METRICS_CACHE=$six_ALL_CCFRWORK/metrics + +cd $MEGATRON_DEEPSPEED_REPO + +# Make sure you use the slow version of the tokenizer. +# Same tokenizer for 125m and 175b +TOKENIZER_NAME_OR_PATH=/gpfsscratch/rech/six/commun/opt/opt-125m + +PP_SIZE=1 +TP_SIZE=1 + +NHIDDEN=768 +NLAYERS=12 +NHEADS=12 +SEQ_LEN=2048 + +# different from the training MICRO_BATCH_SIZE - no optim memory, so can do bigger BS +# make as big as it can fit into gpu w/o OOM, but not too close to 100% +EVAL_MICRO_BATCH_SIZE=1 + +MEGATRON_REQUIRED_ARGS=" + --pp-partition-method 'type:transformer' \ + --num-layers $NLAYERS \ + --hidden-size $NHIDDEN \ + --num-attention-heads $NHEADS \ + --seq-length $SEQ_LEN \ + --max-position-embeddings $SEQ_LEN \ + --tokenizer-type PretrainedFromHF \ + --tokenizer-name-or-path $TOKENIZER_NAME_OR_PATH \ + --fp16 \ + --relu \ + --seed 42 \ +" + + +ZERO_STAGE=0 + +config_json="./ds_config.json" + +# Deepspeed figures out GAS dynamically from dynamic GBS via set_train_batch_size() +cat < $config_json +{ + "train_micro_batch_size_per_gpu": 1, + "train_batch_size": 1, + "gradient_clipping": 1.0, + "zero_optimization": { + "stage": $ZERO_STAGE + }, + "fp16": { + "enabled": true, + "loss_scale": 0, + "loss_scale_window": 500, + "hysteresis": 2, + "min_loss_scale": 1, + "initial_scale_power": 12 + }, + "steps_per_print": 2000, + "wall_clock_breakdown": false +} +EOT + +OPT_FOLDER=$WORK/opt + +CMD="./tasks/eval_harness/evaluate.py \ + --load $CHECKPOINT_PATH \ + --results_path $OPT_FOLDER/eai_results.json \ + --tensor-model-parallel-size $TP_SIZE \ + --pipeline-model-parallel-size $PP_SIZE \ + --micro-batch-size $EVAL_MICRO_BATCH_SIZE \ + --no-load-optim \ + --no-load-rng \ + --inference \ + --task_list arc_challenge,arc_easy,boolq,copa,headqa,hellaswag,lambada,logiqa,mathqa,mc_taco,mrpc,multirc,openbookqa,piqa,prost,pubmedqa,qnli,qqp,race,rte,sciq,sst,triviaqa,webqs,wic,winogrande,wnli,wsc \ + --deepspeed \ + --deepspeed_config ds_config.json \ + --intermed_results \ + --adaptive_seq_len \ + --micro_bs_multiplier 4 \ + $MEGATRON_REQUIRED_ARGS \ + " + +GPUS_PER_NODE=1 +NNODES=$SLURM_NNODES +MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1) +MASTER_PORT=6000 +export LAUNCHER="python -u -m torch.distributed.run \ + --nproc_per_node $GPUS_PER_NODE \ + --nnodes $NNODES \ + --rdzv_endpoint $MASTER_ADDR:$MASTER_PORT \ + --rdzv_backend c10d \ + --max_restarts 0 \ + --tee 3 \ + " + +export CUDA_LAUNCH_BLOCKING=1 + +echo $LAUNCHER $CMD + +export PYTHONPATH=$MEGATRON_DEEPSPEED_REPO + +$LAUNCHER $CMD 2>&1 | tee $OPT_FOLDER/eval-harness.log diff --git a/evaluation/results/opt/run_opt_evaluation.slurm b/evaluation/results/opt/run_opt_evaluation_175b.slurm similarity index 92% rename from evaluation/results/opt/run_opt_evaluation.slurm rename to evaluation/results/opt/run_opt_evaluation_175b.slurm index b0d3daf3..d6bf0215 100644 --- a/evaluation/results/opt/run_opt_evaluation.slurm +++ b/evaluation/results/opt/run_opt_evaluation_175b.slurm @@ -1,5 +1,5 @@ #!/bin/bash -#SBATCH --job-name=eai-eval-opt +#SBATCH --job-name=eai-eval-opt-175b #SBATCH --partition=gpu_p5 #SBATCH --constraint=a100 #SBATCH --reservation=hug @@ -59,13 +59,7 @@ MEGATRON_REQUIRED_ARGS=" --tokenizer-name-or-path $TOKENIZER_NAME_OR_PATH \ --fp16 \ --relu \ - --bf16 \ --seed 42 \ - --position-embedding-type alibi \ - --checkpoint-activations \ - --abort-on-unmet-fused-kernel-constraints \ - --kill-switch-path $KILL_SWITCH_PATH \ - --pad-vocab-size-to 250880 \ " @@ -82,8 +76,13 @@ cat < $config_json "zero_optimization": { "stage": $ZERO_STAGE }, - "bf16": { - "enabled": true + "fp16": { + "enabled": true, + "loss_scale": 0, + "loss_scale_window": 500, + "hysteresis": 2, + "min_loss_scale": 1, + "initial_scale_power": 12 }, "steps_per_print": 2000, "wall_clock_breakdown": false From 3d54dd9ca509c738a82919e76e3b19e74ea98c5e Mon Sep 17 00:00:00 2001 From: thomasw21 <24695242+thomasw21@users.noreply.github.com> Date: Mon, 4 Jul 2022 23:20:12 +0200 Subject: [PATCH 04/35] Got it running --- .../opt/convert_transformers_checkpoint_to_meg_ds.py | 5 ++++- evaluation/results/opt/run_opt_evaluation_125m.slurm | 8 +++++--- evaluation/results/opt/run_opt_evaluation_175b.slurm | 8 +++++--- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py index abb9803c..be8fde9b 100644 --- a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py +++ b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py @@ -59,7 +59,7 @@ def compute_meg_ds_weight_names(num_layers: int): ] for layer_id in range(3, num_layers + 3) }, - f"layer_{str(num_layers + 5).zfill(2)}-model_00-model_states.pt": [ + f"layer_{str(num_layers + 4).zfill(2)}-model_00-model_states.pt": [ "weight", "bias" ] @@ -183,6 +183,9 @@ def convert_opt_checkpoint_to_megatron( tqdm(meg_ds_filename_to_meg_ds_weights.items()) ) + # Create dummy mp_rank_00_model_states.pt + torch.save({"mp_world_size": 1, "module": None, "dp_world_size": 1}, os.path.join(megatron_dump_folder_path, "mp_rank_00_model_states.pt")) + def main(): args = get_args() convert_opt_checkpoint_to_megatron( diff --git a/evaluation/results/opt/run_opt_evaluation_125m.slurm b/evaluation/results/opt/run_opt_evaluation_125m.slurm index 627dc58e..0ab36bb6 100644 --- a/evaluation/results/opt/run_opt_evaluation_125m.slurm +++ b/evaluation/results/opt/run_opt_evaluation_125m.slurm @@ -40,23 +40,25 @@ NHIDDEN=768 NLAYERS=12 NHEADS=12 SEQ_LEN=2048 +MAX_POSITION_EMBEDDINGS=2050 # different from the training MICRO_BATCH_SIZE - no optim memory, so can do bigger BS # make as big as it can fit into gpu w/o OOM, but not too close to 100% EVAL_MICRO_BATCH_SIZE=1 MEGATRON_REQUIRED_ARGS=" - --pp-partition-method 'type:transformer' \ --num-layers $NLAYERS \ --hidden-size $NHIDDEN \ --num-attention-heads $NHEADS \ --seq-length $SEQ_LEN \ - --max-position-embeddings $SEQ_LEN \ + --max-position-embeddings $MAX_POSITION_EMBEDDINGS \ --tokenizer-type PretrainedFromHF \ --tokenizer-name-or-path $TOKENIZER_NAME_OR_PATH \ --fp16 \ --relu \ --seed 42 \ + --pad-vocab-size-to 50272 \ + --make-vocab-size-divisible-by 1\ " @@ -86,7 +88,7 @@ cat < $config_json } EOT -OPT_FOLDER=$WORK/opt +OPT_FOLDER=$WORK/opt/opt-125m CMD="./tasks/eval_harness/evaluate.py \ --load $CHECKPOINT_PATH \ diff --git a/evaluation/results/opt/run_opt_evaluation_175b.slurm b/evaluation/results/opt/run_opt_evaluation_175b.slurm index d6bf0215..a2d40139 100644 --- a/evaluation/results/opt/run_opt_evaluation_175b.slurm +++ b/evaluation/results/opt/run_opt_evaluation_175b.slurm @@ -43,23 +43,25 @@ NHIDDEN=12288 NLAYERS=96 NHEADS=96 SEQ_LEN=2048 +MAX_POSITION_EMBEDDINGS=2050 # different from the training MICRO_BATCH_SIZE - no optim memory, so can do bigger BS # make as big as it can fit into gpu w/o OOM, but not too close to 100% EVAL_MICRO_BATCH_SIZE=1 MEGATRON_REQUIRED_ARGS=" - --pp-partition-method 'type:transformer' \ --num-layers $NLAYERS \ --hidden-size $NHIDDEN \ --num-attention-heads $NHEADS \ --seq-length $SEQ_LEN \ - --max-position-embeddings $SEQ_LEN \ + --max-position-embeddings $MAX_POSITION_EMBEDDINGS \ --tokenizer-type PretrainedFromHF \ --tokenizer-name-or-path $TOKENIZER_NAME_OR_PATH \ --fp16 \ --relu \ --seed 42 \ + --pad-vocab-size-to 50272 \ + --make-vocab-size-divisible-by 1\ " @@ -89,7 +91,7 @@ cat < $config_json } EOT -OPT_FOLDER=$WORK/opt +OPT_FOLDER=$WORK/opt/opt-175m CMD="./tasks/eval_harness/evaluate.py \ --load $CHECKPOINT_PATH \ From e2bf1b90a3bf367ca533843dc4270b858bc0999b Mon Sep 17 00:00:00 2001 From: thomasw21 <24695242+thomasw21@users.noreply.github.com> Date: Tue, 5 Jul 2022 00:50:25 +0200 Subject: [PATCH 05/35] Turns out meg-ds has a weird say of merging qkv --- ...nvert_transformers_checkpoint_to_meg_ds.py | 30 +++++++++++++------ 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py index be8fde9b..0aa7e599 100644 --- a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py +++ b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py @@ -108,6 +108,8 @@ def find_transformers_weights_and_save_meg_ds_weights( opt_checkpoint_path: str, megatron_dump_folder_path:str, total_num_layers: int, + num_heads: int, + hidden_size: int, trfs_weight_map: Dict[str, str] ): layer_id = get_layer_id(meg_ds_filename, total_num_layers=total_num_layers) @@ -132,8 +134,15 @@ def find_transformers_weights_and_save_meg_ds_weights( # possibly concatenate save_path = os.path.join(megatron_dump_folder_path, meg_ds_filename) with open(save_path, "wb") as fo: + # qkv are mixed s.t. [q1 k1 v1 q2 k2 v2 ...] with (1,2..) being head_id torch.save( - {key: torch.cat(value) for key, value in result.items()}, + { + key: torch.cat( + value.view(num_heads, 1, hidden_size//num_heads, hidden_size), + dim=1 + ).resize(3 * hidden_size, hidden_size) + for key, value in result.items() + }, fo ) @@ -148,12 +157,11 @@ def convert_opt_checkpoint_to_megatron( with open(opt_index_path, "r") as fi: index_file = json.load(fi)["weight_map"] # Compute total amount of layers - total_amount_of_layers = 0 - for weight_name in index_file.keys(): - match = re.match(r"decoder.layers.(\d*).*", weight_name) - if match is not None: - total_amount_of_layers = max(int(match[1]), total_amount_of_layers) - total_amount_of_layers += 1 + with open(os.path.join(opt_checkpoint_path, "config.json"), "r") as fi: + config = json.load(fi) + total_amount_of_layers = config["num_hidden_layers"] + num_heads = config["num_attention_heads"] + hidden_size = config["hidden_size"] # Given the total number of layers we can compute exactly each meg_ds params we need to find. meg_ds_filename_to_meg_ds_weights = compute_meg_ds_weight_names(total_amount_of_layers) @@ -168,7 +176,9 @@ def convert_opt_checkpoint_to_megatron( opt_checkpoint_path=opt_checkpoint_path, megatron_dump_folder_path=megatron_dump_folder_path, total_num_layers=total_amount_of_layers, - trfs_weight_map=index_file + trfs_weight_map=index_file, + num_heads=num_heads, + hidden_size=hidden_size ) else: with Pool(num_proc) as pool: @@ -178,7 +188,9 @@ def convert_opt_checkpoint_to_megatron( opt_checkpoint_path=opt_checkpoint_path, megatron_dump_folder_path=megatron_dump_folder_path, total_num_layers=total_amount_of_layers, - trfs_weight_map=index_file + trfs_weight_map=index_file, + num_heads=num_heads, + hidden_size=hidden_size ), tqdm(meg_ds_filename_to_meg_ds_weights.items()) ) From bbd3b5d5ee516e9d9fc4e5b6fb5fa6693584e0ef Mon Sep 17 00:00:00 2001 From: thomasw21 <24695242+thomasw21@users.noreply.github.com> Date: Tue, 5 Jul 2022 00:54:42 +0200 Subject: [PATCH 06/35] Woops --- .../opt/convert_transformers_checkpoint_to_meg_ds.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py index 0aa7e599..6f02dcf1 100644 --- a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py +++ b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py @@ -138,10 +138,13 @@ def find_transformers_weights_and_save_meg_ds_weights( torch.save( { key: torch.cat( - value.view(num_heads, 1, hidden_size//num_heads, hidden_size), + [ + value.view(num_heads, 1, hidden_size//num_heads, hidden_size) + for value in values + ], dim=1 ).resize(3 * hidden_size, hidden_size) - for key, value in result.items() + for key, values in result.items() }, fo ) From f2d77a3ec940fe8d8d637307572ca96ccf1098c7 Mon Sep 17 00:00:00 2001 From: thomasw21 <24695242+thomasw21@users.noreply.github.com> Date: Tue, 5 Jul 2022 00:57:42 +0200 Subject: [PATCH 07/35] Woops --- ...nvert_transformers_checkpoint_to_meg_ds.py | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py index 6f02dcf1..14f4810d 100644 --- a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py +++ b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py @@ -102,6 +102,19 @@ def get_layer_id(meg_ds_filename: str, total_num_layers: int) -> Optional[int]: return layer_id +def merge_layers(layers, num_heads: int, hidden_size: int): + if len(layers): + return layers[0] + else: + # We merge QKV + return torch.cat( + [ + layer.view(num_heads, 1, hidden_size // num_heads, hidden_size) + for layer in layers + ], + dim=1 + ).resize(3 * hidden_size, hidden_size) + def find_transformers_weights_and_save_meg_ds_weights( meg_ds_filename: str, meg_ds_weight_names: List[str], @@ -137,13 +150,7 @@ def find_transformers_weights_and_save_meg_ds_weights( # qkv are mixed s.t. [q1 k1 v1 q2 k2 v2 ...] with (1,2..) being head_id torch.save( { - key: torch.cat( - [ - value.view(num_heads, 1, hidden_size//num_heads, hidden_size) - for value in values - ], - dim=1 - ).resize(3 * hidden_size, hidden_size) + key: merge_layers(values, num_heads=num_heads, hidden_size=hidden_size) for key, values in result.items() }, fo From a852d048108d09a256d0f8e4201c1c1414f36def Mon Sep 17 00:00:00 2001 From: thomasw21 <24695242+thomasw21@users.noreply.github.com> Date: Tue, 5 Jul 2022 01:02:11 +0200 Subject: [PATCH 08/35] Woops --- ...convert_transformers_checkpoint_to_meg_ds.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py index 14f4810d..2a8fb9b7 100644 --- a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py +++ b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py @@ -107,13 +107,16 @@ def merge_layers(layers, num_heads: int, hidden_size: int): return layers[0] else: # We merge QKV - return torch.cat( - [ - layer.view(num_heads, 1, hidden_size // num_heads, hidden_size) - for layer in layers - ], - dim=1 - ).resize(3 * hidden_size, hidden_size) + return torch.reshape( + torch.cat( + [ + layer.view(num_heads, 1, hidden_size // num_heads, hidden_size) + for layer in layers + ], + dim=1 + ), + (3 * hidden_size, hidden_size) + ) def find_transformers_weights_and_save_meg_ds_weights( meg_ds_filename: str, From e1d131ed8d8060e68b208d92ac0042382af665b4 Mon Sep 17 00:00:00 2001 From: thomasw21 <24695242+thomasw21@users.noreply.github.com> Date: Tue, 5 Jul 2022 01:04:32 +0200 Subject: [PATCH 09/35] Woops --- .../results/opt/convert_transformers_checkpoint_to_meg_ds.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py index 2a8fb9b7..fe88418b 100644 --- a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py +++ b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py @@ -103,7 +103,7 @@ def get_layer_id(meg_ds_filename: str, total_num_layers: int) -> Optional[int]: return layer_id def merge_layers(layers, num_heads: int, hidden_size: int): - if len(layers): + if len(layers) == 0: return layers[0] else: # We merge QKV From fa9c4f5ba618d0b741b7e4588bbd94fec31f5d00 Mon Sep 17 00:00:00 2001 From: thomasw21 <24695242+thomasw21@users.noreply.github.com> Date: Tue, 5 Jul 2022 01:05:07 +0200 Subject: [PATCH 10/35] Woops --- .../results/opt/convert_transformers_checkpoint_to_meg_ds.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py index fe88418b..29d647b0 100644 --- a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py +++ b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py @@ -103,7 +103,7 @@ def get_layer_id(meg_ds_filename: str, total_num_layers: int) -> Optional[int]: return layer_id def merge_layers(layers, num_heads: int, hidden_size: int): - if len(layers) == 0: + if len(layers) == 1: return layers[0] else: # We merge QKV From 65cef670601f79d8d6f8fb187061b67f77dc0298 Mon Sep 17 00:00:00 2001 From: thomasw21 <24695242+thomasw21@users.noreply.github.com> Date: Tue, 5 Jul 2022 01:07:42 +0200 Subject: [PATCH 11/35] Woops --- ...nvert_transformers_checkpoint_to_meg_ds.py | 34 +++++++++++++------ 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py index 29d647b0..2fcc3972 100644 --- a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py +++ b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py @@ -107,16 +107,30 @@ def merge_layers(layers, num_heads: int, hidden_size: int): return layers[0] else: # We merge QKV - return torch.reshape( - torch.cat( - [ - layer.view(num_heads, 1, hidden_size // num_heads, hidden_size) - for layer in layers - ], - dim=1 - ), - (3 * hidden_size, hidden_size) - ) + if len(layers[0].shape) == 1: + # bias + return torch.reshape( + torch.cat( + [ + layer.view(num_heads, 1, hidden_size // num_heads) + for layer in layers + ], + dim=1 + ), + (3 * hidden_size, ) + ) + else: + #weight + return torch.reshape( + torch.cat( + [ + layer.view(num_heads, 1, hidden_size // num_heads, hidden_size) + for layer in layers + ], + dim=1 + ), + (3 * hidden_size, hidden_size) + ) def find_transformers_weights_and_save_meg_ds_weights( meg_ds_filename: str, From 8ddfba990f28ffe2ac20af6bc02379ab6927aca3 Mon Sep 17 00:00:00 2001 From: thomasw21 <24695242+thomasw21@users.noreply.github.com> Date: Tue, 5 Jul 2022 01:24:49 +0200 Subject: [PATCH 12/35] Turns out this is the last combination --- .../results/opt/convert_transformers_checkpoint_to_meg_ds.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py index 2fcc3972..dab032d6 100644 --- a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py +++ b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py @@ -112,7 +112,7 @@ def merge_layers(layers, num_heads: int, hidden_size: int): return torch.reshape( torch.cat( [ - layer.view(num_heads, 1, hidden_size // num_heads) + layer.view(num_heads, hidden_size // num_heads, 1) for layer in layers ], dim=1 @@ -124,7 +124,7 @@ def merge_layers(layers, num_heads: int, hidden_size: int): return torch.reshape( torch.cat( [ - layer.view(num_heads, 1, hidden_size // num_heads, hidden_size) + layer.view(num_heads, hidden_size // num_heads, 1, hidden_size) for layer in layers ], dim=1 From cc465ca334f0cd12c09658433dda561748d51a63 Mon Sep 17 00:00:00 2001 From: thomasw21 <24695242+thomasw21@users.noreply.github.com> Date: Tue, 5 Jul 2022 01:45:19 +0200 Subject: [PATCH 13/35] Hope this is better --- .../results/opt/convert_transformers_checkpoint_to_meg_ds.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py index dab032d6..2fcc3972 100644 --- a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py +++ b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py @@ -112,7 +112,7 @@ def merge_layers(layers, num_heads: int, hidden_size: int): return torch.reshape( torch.cat( [ - layer.view(num_heads, hidden_size // num_heads, 1) + layer.view(num_heads, 1, hidden_size // num_heads) for layer in layers ], dim=1 @@ -124,7 +124,7 @@ def merge_layers(layers, num_heads: int, hidden_size: int): return torch.reshape( torch.cat( [ - layer.view(num_heads, hidden_size // num_heads, 1, hidden_size) + layer.view(num_heads, 1, hidden_size // num_heads, hidden_size) for layer in layers ], dim=1 From 2c9bea5542f348baa20c6efb8c9c851eaab5ae20 Mon Sep 17 00:00:00 2001 From: thomasw21 <24695242+thomasw21@users.noreply.github.com> Date: Tue, 5 Jul 2022 02:52:59 +0200 Subject: [PATCH 14/35] Update conversion script --- .../opt/convert_transformers_checkpoint_to_meg_ds.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py index 2fcc3972..62d083fa 100644 --- a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py +++ b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py @@ -62,6 +62,9 @@ def compute_meg_ds_weight_names(num_layers: int): f"layer_{str(num_layers + 4).zfill(2)}-model_00-model_states.pt": [ "weight", "bias" + ], + f"layer_{str(num_layers + 5).zfill(2)}-model_00-model_states.pt": [ + "lm_head.weight", ] } @@ -69,7 +72,8 @@ def compute_meg_ds_weight_names(num_layers: int): "word_embeddings.weight": "decoder.embed_tokens.weight", "position_embeddings.weight": "decoder.embed_positions.weight", "weight": "decoder.final_layer_norm.weight", - "bias": "decoder.final_layer_norm.bias" + "bias": "decoder.final_layer_norm.bias", + "lm_head.weight": "lm_head.weight" } TRANSFORMERS_BLOCK_WEIGHTS = { "input_layernorm.weight": ["self_attn_layer_norm.weight"], From 357f22ffa179ca5f4f0014d8d87bb3297f5793e2 Mon Sep 17 00:00:00 2001 From: thomasw21 <24695242+thomasw21@users.noreply.github.com> Date: Tue, 5 Jul 2022 03:05:57 +0200 Subject: [PATCH 15/35] Revert "Update conversion script" This reverts commit 2c9bea5542f348baa20c6efb8c9c851eaab5ae20. --- .../opt/convert_transformers_checkpoint_to_meg_ds.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py index 62d083fa..2fcc3972 100644 --- a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py +++ b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py @@ -62,9 +62,6 @@ def compute_meg_ds_weight_names(num_layers: int): f"layer_{str(num_layers + 4).zfill(2)}-model_00-model_states.pt": [ "weight", "bias" - ], - f"layer_{str(num_layers + 5).zfill(2)}-model_00-model_states.pt": [ - "lm_head.weight", ] } @@ -72,8 +69,7 @@ def compute_meg_ds_weight_names(num_layers: int): "word_embeddings.weight": "decoder.embed_tokens.weight", "position_embeddings.weight": "decoder.embed_positions.weight", "weight": "decoder.final_layer_norm.weight", - "bias": "decoder.final_layer_norm.bias", - "lm_head.weight": "lm_head.weight" + "bias": "decoder.final_layer_norm.bias" } TRANSFORMERS_BLOCK_WEIGHTS = { "input_layernorm.weight": ["self_attn_layer_norm.weight"], From 80016c2b14593600f2236c8320f8e20dbe96f45f Mon Sep 17 00:00:00 2001 From: thomasw21 <24695242+thomasw21@users.noreply.github.com> Date: Tue, 5 Jul 2022 11:27:27 +0200 Subject: [PATCH 16/35] Add checkpoint version --- .../opt/convert_transformers_checkpoint_to_meg_ds.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py index 2fcc3972..aa497086 100644 --- a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py +++ b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py @@ -223,7 +223,15 @@ def convert_opt_checkpoint_to_megatron( ) # Create dummy mp_rank_00_model_states.pt - torch.save({"mp_world_size": 1, "module": None, "dp_world_size": 1}, os.path.join(megatron_dump_folder_path, "mp_rank_00_model_states.pt")) + torch.save( + { + "mp_world_size": 1, + "module": None, + "dp_world_size": 1, + "checkpoint_version": 3 + }, + os.path.join(megatron_dump_folder_path, "mp_rank_00_model_states.pt") + ) def main(): args = get_args() From ebb84d8fcf8b9d1e721e9b3482423e9e9394d216 Mon Sep 17 00:00:00 2001 From: thomasw21 <24695242+thomasw21@users.noreply.github.com> Date: Tue, 5 Jul 2022 11:55:44 +0200 Subject: [PATCH 17/35] Add iteration --- .../results/opt/convert_transformers_checkpoint_to_meg_ds.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py index aa497086..b49dae9e 100644 --- a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py +++ b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py @@ -228,7 +228,8 @@ def convert_opt_checkpoint_to_megatron( "mp_world_size": 1, "module": None, "dp_world_size": 1, - "checkpoint_version": 3 + "checkpoint_version": 3, + "iteration": 0 }, os.path.join(megatron_dump_folder_path, "mp_rank_00_model_states.pt") ) From b213b1af8a390483524276b71599e41f1be61f2e Mon Sep 17 00:00:00 2001 From: thomasw21 <24695242+thomasw21@users.noreply.github.com> Date: Tue, 5 Jul 2022 14:36:39 +0200 Subject: [PATCH 18/35] I have Meg-DS --- evaluation/results/opt/run_opt_evaluation_125m.slurm | 1 + evaluation/results/opt/run_opt_evaluation_175b.slurm | 1 + 2 files changed, 2 insertions(+) diff --git a/evaluation/results/opt/run_opt_evaluation_125m.slurm b/evaluation/results/opt/run_opt_evaluation_125m.slurm index 0ab36bb6..d3964792 100644 --- a/evaluation/results/opt/run_opt_evaluation_125m.slurm +++ b/evaluation/results/opt/run_opt_evaluation_125m.slurm @@ -59,6 +59,7 @@ MEGATRON_REQUIRED_ARGS=" --seed 42 \ --pad-vocab-size-to 50272 \ --make-vocab-size-divisible-by 1\ + --no-bias-gelu-fusion\ " diff --git a/evaluation/results/opt/run_opt_evaluation_175b.slurm b/evaluation/results/opt/run_opt_evaluation_175b.slurm index a2d40139..bc7bbab7 100644 --- a/evaluation/results/opt/run_opt_evaluation_175b.slurm +++ b/evaluation/results/opt/run_opt_evaluation_175b.slurm @@ -62,6 +62,7 @@ MEGATRON_REQUIRED_ARGS=" --seed 42 \ --pad-vocab-size-to 50272 \ --make-vocab-size-divisible-by 1\ + --no-bias-gelu-fusion\ " From 87418444c86aa77de582e7dd9dd0e5fb431794d5 Mon Sep 17 00:00:00 2001 From: thomasw21 <24695242+thomasw21@users.noreply.github.com> Date: Tue, 5 Jul 2022 15:37:49 +0200 Subject: [PATCH 19/35] Launch huge array --- .../opt/run_opt_bs_evaluation_125m.slurm | 172 ++++++++++++++ .../opt/run_opt_bs_evaluation_175b.slurm | 218 ++++++++++++++++++ .../results/opt/run_opt_evaluation_125m.slurm | 47 +++- .../results/opt/run_opt_evaluation_175b.slurm | 47 +++- 4 files changed, 478 insertions(+), 6 deletions(-) create mode 100644 evaluation/results/opt/run_opt_bs_evaluation_125m.slurm create mode 100644 evaluation/results/opt/run_opt_bs_evaluation_175b.slurm diff --git a/evaluation/results/opt/run_opt_bs_evaluation_125m.slurm b/evaluation/results/opt/run_opt_bs_evaluation_125m.slurm new file mode 100644 index 00000000..b5ca83f3 --- /dev/null +++ b/evaluation/results/opt/run_opt_bs_evaluation_125m.slurm @@ -0,0 +1,172 @@ +#!/bin/bash +#SBATCH --job-name=bs-eval-opt-125m +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=10 +#SBATCH --hint=nomultithread +#SBATCH --gres=gpu:1 +#SBATCH --time 20:00:00 +#SBATCH --output=/gpfswork/rech/six/uty16tp/logs/%x-%j.out +#SBATCH --account=six@v100 +#SBATCH --array=0-70 + +set -x -e + +source $six_ALL_CCFRWORK/start-py38-pt111 +conda activate muennighofflmeval + +echo "START TIME: $(date)" + + +CHECKPOINT_PATH=$six_ALL_CCFRSCRATCH/opt/opt-125m-meg-ds +MEGATRON_DEEPSPEED_REPO=/gpfswork/rech/six/uty16tp/code/big_science/Megatron-DeepSpeed +export HF_DATASETS_OFFLINE=1 +export TRANSFORMERS_OFFLINE=1 + +export TRANSFORMERS_CACHE=$six_ALL_CCFRWORK/models +export HF_DATASETS_CACHE=$six_ALL_CCFRWORK/datasetseval +export HF_MODULES_CACHE=$six_ALL_CCFRWORK/modules +export HF_METRICS_CACHE=$six_ALL_CCFRWORK/metrics + +cd $MEGATRON_DEEPSPEED_REPO + +# Make sure you use the slow version of the tokenizer. +# Same tokenizer for 125m and 175b +TOKENIZER_NAME_OR_PATH=/gpfsscratch/rech/six/commun/opt/opt-125m + +PP_SIZE=1 +TP_SIZE=1 + +NHIDDEN=768 +NLAYERS=12 +NHEADS=12 +SEQ_LEN=2048 +MAX_POSITION_EMBEDDINGS=2050 + +# different from the training MICRO_BATCH_SIZE - no optim memory, so can do bigger BS +# make as big as it can fit into gpu w/o OOM, but not too close to 100% +EVAL_MICRO_BATCH_SIZE=1 + +MEGATRON_REQUIRED_ARGS=" + --num-layers $NLAYERS \ + --hidden-size $NHIDDEN \ + --num-attention-heads $NHEADS \ + --seq-length $SEQ_LEN \ + --max-position-embeddings $MAX_POSITION_EMBEDDINGS \ + --tokenizer-type PretrainedFromHF \ + --tokenizer-name-or-path $TOKENIZER_NAME_OR_PATH \ + --fp16 \ + --relu \ + --seed 42 \ + --pad-vocab-size-to 50272 \ + --make-vocab-size-divisible-by 1\ + --no-bias-gelu-fusion\ +" + + +ZERO_STAGE=0 + +config_json="./ds_config.json" + +# Deepspeed figures out GAS dynamically from dynamic GBS via set_train_batch_size() +cat < $config_json +{ + "train_micro_batch_size_per_gpu": 1, + "train_batch_size": 1, + "gradient_clipping": 1.0, + "zero_optimization": { + "stage": $ZERO_STAGE + }, + "fp16": { + "enabled": true, + "loss_scale": 0, + "loss_scale_window": 500, + "hysteresis": 2, + "min_loss_scale": 1, + "initial_scale_power": 12 + }, + "steps_per_print": 2000, + "wall_clock_breakdown": false +} +EOT + +# --task_list GEM/web_nlg_en,GEM/web_nlg_en_challenge_test_numbers,GEM/web_nlg_en_challenge_test_scramble,GEM/web_nlg_en_challenge_validation_sample,GEM/web_nlg_ru,GEM/web_nlg_ru_challenge_test_scramble,GEM/web_nlg_ru_challenge_validation_sample,GEM/wiki_auto_asset_turk_challenge_test_asset_backtranslation,GEM/wiki_auto_asset_turk_challenge_test_asset_bfp02,GEM/wiki_auto_asset_turk_challenge_test_asset_bfp05,GEM/wiki_auto_asset_turk_challenge_test_asset_nopunc,GEM/wiki_auto_asset_turk_challenge_test_turk_backtranslation,GEM/wiki_auto_asset_turk_challenge_test_turk_bfp02,GEM/wiki_auto_asset_turk_challenge_test_turk_bfp05,GEM/wiki_auto_asset_turk_challenge_test_turk_nopunc,GEM/wiki_auto_asset_turk_test_asset,GEM/wiki_auto_asset_turk_test_turk,GEM/wiki_lingua_ar,GEM/wiki_lingua_cs,GEM/wiki_lingua_de,GEM/wiki_lingua_en,GEM/wiki_lingua_es,GEM/wiki_lingua_fr,GEM/wiki_lingua_hi,GEM/wiki_lingua_id,GEM/wiki_lingua_it,GEM/wiki_lingua_ja,GEM/wiki_lingua_ko,GEM/wiki_lingua_nl,GEM/wiki_lingua_pt,GEM/wiki_lingua_ru,GEM/wiki_lingua_th,GEM/wiki_lingua_tr,GEM/wiki_lingua_vi,GEM/wiki_lingua_zh,gem_xsum,gem_xsum_challenge_sample,gem_xsum_challenge_test_backtranslation,gem_xsum_challenge_test_bfp_02,gem_xsum_challenge_test_bfp_05,gem_xsum_challenge_test_covid,gem_xsum_challenge_test_nopunc,axb,axg,boolq,cb,cola,copa,crows_pairs_english,crows_pairs_french,diabla,e2e_nlg_cleaned,mnli,mnli_mismatched,multirc,piaf,qqp,rte,sst,tydiqa_primary,tydiqa_secondary,wic,wsc,wnli,wino_bias_type1_anti,wino_bias_type1_pro,wino_bias_type2_anti,wino_bias_type2_pro,xquad_ar,xquad_en\ +TASKS=( +arc_challenge +arc_easy +boolq +copa +headqa +hellaswag +lambada +logiqa +mathqa +mc_taco +mrpc +multirc +openbookqa +piqa +prost +pubmedqa +qnli +qqp +race +rte +sciq +sst +triviaqa +webqs +wic +winogrande +wnli +wsc +) + +if [ "${#TASKS[@]}" -eq "$SLURM_ARRAY_TASK_COUNT" ]; + then + echo "Please update the array size as the it doesn't correspond to the number of models we want to evaluate. Array size: $SLURM_ARRAY_TASK_COUNT, number of models: ${#TASKS[@]}" + exit 1 +fi +TASK=${TASKS[$SLURM_ARRAY_TASK_ID]} + +OPT_FOLDER=$WORK/opt/opt-125m/$TASK +mkdir -p $OPT_FOLDER + +CMD="./tasks/eval_harness/evaluate.py \ + --load $CHECKPOINT_PATH \ + --results_path $OPT_FOLDER/bs_results.json \ + --tensor-model-parallel-size $TP_SIZE \ + --pipeline-model-parallel-size $PP_SIZE \ + --micro-batch-size $EVAL_MICRO_BATCH_SIZE \ + --no-load-optim \ + --no-load-rng \ + --inference \ + --task_list $TASK\ + --deepspeed \ + --deepspeed_config ds_config.json \ + --intermed_results \ + --adaptive_seq_len \ + --micro_bs_multiplier 4 \ + $MEGATRON_REQUIRED_ARGS \ + " + +GPUS_PER_NODE=1 +NNODES=$SLURM_NNODES +MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1) +MASTER_PORT=6000 +export LAUNCHER="python -u -m torch.distributed.run \ + --nproc_per_node $GPUS_PER_NODE \ + --nnodes $NNODES \ + --rdzv_endpoint $MASTER_ADDR:$MASTER_PORT \ + --rdzv_backend c10d \ + --max_restarts 0 \ + --tee 3 \ + " + +export CUDA_LAUNCH_BLOCKING=1 + +echo $LAUNCHER $CMD + +export PYTHONPATH=$MEGATRON_DEEPSPEED_REPO + +$LAUNCHER $CMD 2>&1 | tee $OPT_FOLDER/bs-eval-harness.log diff --git a/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm b/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm new file mode 100644 index 00000000..85ed952d --- /dev/null +++ b/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm @@ -0,0 +1,218 @@ +#!/bin/bash +#SBATCH --job-name=bs-eval-opt-175b +#SBATCH --partition=gpu_p5 +#SBATCH --constraint=a100 +#SBATCH --reservation=hug +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 # crucial - only 1 task per dist per node! +#SBATCH --cpus-per-task=64 # number of cores per tasks +#SBATCH --hint=nomultithread # we get physical cores not logical +#SBATCH --gres=gpu:8 # number of gpus +#SBATCH --time 20:00:00 # maximum execution time (HH:MM:SS) +#SBATCH --output=/gpfswork/rech/six/uty16tp/logs/%x-%j.out # output file name +#SBATCH --account=six@a100 +#SBATCH --array=0-70 + +set -x -e + +source $six_ALL_CCFRWORK/start-py38-pt111 +conda activate muennighofflmeval + +echo "START TIME: $(date)" + + +CHECKPOINT_PATH=$six_ALL_CCFRSCRATCH/opt/opt-175b-meg-ds +MEGATRON_DEEPSPEED_REPO=/gpfswork/rech/six/uty16tp/code/big_science/Megatron-DeepSpeed +export HF_DATASETS_OFFLINE=1 +export TRANSFORMERS_OFFLINE=1 + +export TRANSFORMERS_CACHE=$six_ALL_CCFRWORK/models +export HF_DATASETS_CACHE=$six_ALL_CCFRWORK/datasetseval +export HF_MODULES_CACHE=$six_ALL_CCFRWORK/modules +export HF_METRICS_CACHE=$six_ALL_CCFRWORK/metrics + +cd $MEGATRON_DEEPSPEED_REPO + +# Make sure you use the slow version of the tokenizer. +# Same tokenizer for 125m and 175b +TOKENIZER_NAME_OR_PATH=/gpfsscratch/rech/six/commun/opt/opt-125m + +PP_SIZE=8 +TP_SIZE=1 + +NHIDDEN=12288 +NLAYERS=96 +NHEADS=96 +SEQ_LEN=2048 +MAX_POSITION_EMBEDDINGS=2050 + +# different from the training MICRO_BATCH_SIZE - no optim memory, so can do bigger BS +# make as big as it can fit into gpu w/o OOM, but not too close to 100% +EVAL_MICRO_BATCH_SIZE=1 + +MEGATRON_REQUIRED_ARGS=" + --num-layers $NLAYERS \ + --hidden-size $NHIDDEN \ + --num-attention-heads $NHEADS \ + --seq-length $SEQ_LEN \ + --max-position-embeddings $MAX_POSITION_EMBEDDINGS \ + --tokenizer-type PretrainedFromHF \ + --tokenizer-name-or-path $TOKENIZER_NAME_OR_PATH \ + --fp16 \ + --relu \ + --seed 42 \ + --pad-vocab-size-to 50272 \ + --make-vocab-size-divisible-by 1\ + --no-bias-gelu-fusion\ +" + + +ZERO_STAGE=0 + +config_json="./ds_config.json" + +# Deepspeed figures out GAS dynamically from dynamic GBS via set_train_batch_size() +cat < $config_json +{ + "train_micro_batch_size_per_gpu": 1, + "train_batch_size": 1, + "gradient_clipping": 1.0, + "zero_optimization": { + "stage": $ZERO_STAGE + }, + "fp16": { + "enabled": true, + "loss_scale": 0, + "loss_scale_window": 500, + "hysteresis": 2, + "min_loss_scale": 1, + "initial_scale_power": 12 + }, + "steps_per_print": 2000, + "wall_clock_breakdown": false +} +EOT + +# --task_list GEM/web_nlg_en,GEM/web_nlg_en_challenge_test_numbers,GEM/web_nlg_en_challenge_test_scramble,GEM/web_nlg_en_challenge_validation_sample,GEM/web_nlg_ru,GEM/web_nlg_ru_challenge_test_scramble,GEM/web_nlg_ru_challenge_validation_sample,GEM/wiki_auto_asset_turk_challenge_test_asset_backtranslation,GEM/wiki_auto_asset_turk_challenge_test_asset_bfp02,GEM/wiki_auto_asset_turk_challenge_test_asset_bfp05,GEM/wiki_auto_asset_turk_challenge_test_asset_nopunc,GEM/wiki_auto_asset_turk_challenge_test_turk_backtranslation,GEM/wiki_auto_asset_turk_challenge_test_turk_bfp02,GEM/wiki_auto_asset_turk_challenge_test_turk_bfp05,GEM/wiki_auto_asset_turk_challenge_test_turk_nopunc,GEM/wiki_auto_asset_turk_test_asset,GEM/wiki_auto_asset_turk_test_turk,GEM/wiki_lingua_ar,GEM/wiki_lingua_cs,GEM/wiki_lingua_de,GEM/wiki_lingua_en,GEM/wiki_lingua_es,GEM/wiki_lingua_fr,GEM/wiki_lingua_hi,GEM/wiki_lingua_id,GEM/wiki_lingua_it,GEM/wiki_lingua_ja,GEM/wiki_lingua_ko,GEM/wiki_lingua_nl,GEM/wiki_lingua_pt,GEM/wiki_lingua_ru,GEM/wiki_lingua_th,GEM/wiki_lingua_tr,GEM/wiki_lingua_vi,GEM/wiki_lingua_zh,gem_xsum,gem_xsum_challenge_sample,gem_xsum_challenge_test_backtranslation,gem_xsum_challenge_test_bfp_02,gem_xsum_challenge_test_bfp_05,gem_xsum_challenge_test_covid,gem_xsum_challenge_test_nopunc,axb,axg,boolq,cb,cola,copa,crows_pairs_english,crows_pairs_french,diabla,e2e_nlg_cleaned,mnli,mnli_mismatched,multirc,piaf,qqp,rte,sst,tydiqa_primary,tydiqa_secondary,wic,wsc,wnli,wino_bias_type1_anti,wino_bias_type1_pro,wino_bias_type2_anti,wino_bias_type2_pro,xquad_ar,xquad_en\ + +TASKS=( +GEM/web_nlg_en +GEM/web_nlg_en_challenge_test_numbers +GEM/web_nlg_en_challenge_test_scramble +GEM/web_nlg_en_challenge_validation_sample +GEM/web_nlg_ru +GEM/web_nlg_ru_challenge_test_scramble +GEM/web_nlg_ru_challenge_validation_sample +GEM/wiki_auto_asset_turk_challenge_test_asset_backtranslation +GEM/wiki_auto_asset_turk_challenge_test_asset_bfp02 +GEM/wiki_auto_asset_turk_challenge_test_asset_bfp05 +GEM/wiki_auto_asset_turk_challenge_test_asset_nopunc +GEM/wiki_auto_asset_turk_challenge_test_turk_backtranslation +GEM/wiki_auto_asset_turk_challenge_test_turk_bfp02 +GEM/wiki_auto_asset_turk_challenge_test_turk_bfp05 +GEM/wiki_auto_asset_turk_challenge_test_turk_nopunc +GEM/wiki_auto_asset_turk_test_asset +GEM/wiki_auto_asset_turk_test_turk +GEM/wiki_lingua_ar +GEM/wiki_lingua_cs +GEM/wiki_lingua_de +GEM/wiki_lingua_en +GEM/wiki_lingua_es +GEM/wiki_lingua_fr +GEM/wiki_lingua_hi +GEM/wiki_lingua_id +GEM/wiki_lingua_it +GEM/wiki_lingua_ja +GEM/wiki_lingua_ko +GEM/wiki_lingua_nl +GEM/wiki_lingua_pt +GEM/wiki_lingua_ru +GEM/wiki_lingua_th +GEM/wiki_lingua_tr +GEM/wiki_lingua_vi +GEM/wiki_lingua_zh +gem_xsum +gem_xsum_challenge_sample +gem_xsum_challenge_test_backtranslation +gem_xsum_challenge_test_bfp_02 +gem_xsum_challenge_test_bfp_05 +gem_xsum_challenge_test_covid +gem_xsum_challenge_test_nopunc +axb +axg +boolq +cb +cola +copa +crows_pairs_english +crows_pairs_french +diabla +e2e_nlg_cleaned +mnli +mnli_mismatched +multirc +piaf +qqp +rte +sst +tydiqa_primary +tydiqa_secondary +wic +wsc +wnli +wino_bias_type1_anti +wino_bias_type1_pro +wino_bias_type2_anti +wino_bias_type2_pro +xquad_ar +xquad_en +) + +if [ "${#TASKS[@]}" -eq "$SLURM_ARRAY_TASK_COUNT" ]; + then + echo "Please update the array size as the it doesn't correspond to the number of models we want to evaluate. Array size: $SLURM_ARRAY_TASK_COUNT, number of models: ${#TASKS[@]}" + exit 1 +fi +TASK=${TASKS[$SLURM_ARRAY_TASK_ID]} + +OPT_FOLDER=$WORK/opt/opt-175b/$TASK +mkdir -p $OPT_FOLDER + +CMD="./tasks/eval_harness/evaluate.py \ + --load $CHECKPOINT_PATH \ + --results_path $OPT_FOLDER/eai_results.json \ + --tensor-model-parallel-size $TP_SIZE \ + --pipeline-model-parallel-size $PP_SIZE \ + --micro-batch-size $EVAL_MICRO_BATCH_SIZE \ + --no-load-optim \ + --no-load-rng \ + --inference \ + --task_list $TASK\ + --deepspeed \ + --deepspeed_config ds_config.json \ + --intermed_results \ + --adaptive_seq_len \ + --micro_bs_multiplier 4 \ + $MEGATRON_REQUIRED_ARGS \ + " + +GPUS_PER_NODE=8 +NNODES=$SLURM_NNODES +MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1) +MASTER_PORT=6000 +export LAUNCHER="python -u -m torch.distributed.run \ + --nproc_per_node $GPUS_PER_NODE \ + --nnodes $NNODES \ + --rdzv_endpoint $MASTER_ADDR:$MASTER_PORT \ + --rdzv_backend c10d \ + --max_restarts 0 \ + --tee 3 \ + " + +export CUDA_LAUNCH_BLOCKING=1 + +echo $LAUNCHER $CMD + +export PYTHONPATH=$MEGATRON_DEEPSPEED_REPO + +$LAUNCHER $CMD 2>&1 | tee $OPT_FOLDER/eval-harness.log diff --git a/evaluation/results/opt/run_opt_evaluation_125m.slurm b/evaluation/results/opt/run_opt_evaluation_125m.slurm index d3964792..a8d9129e 100644 --- a/evaluation/results/opt/run_opt_evaluation_125m.slurm +++ b/evaluation/results/opt/run_opt_evaluation_125m.slurm @@ -8,11 +8,12 @@ #SBATCH --time 20:00:00 #SBATCH --output=/gpfswork/rech/six/uty16tp/logs/%x-%j.out #SBATCH --account=six@v100 +#SBATCH --array=0-28 set -x -e source $six_ALL_CCFRWORK/start-py38-pt111 -conda activate thomas_lm_eval +# conda activate thomas_lm_eval echo "START TIME: $(date)" @@ -89,7 +90,47 @@ cat < $config_json } EOT -OPT_FOLDER=$WORK/opt/opt-125m +# --task_list arc_challenge,arc_easy,boolq,copa,headqa,hellaswag,lambada,logiqa,mathqa,mc_taco,mrpc,multirc,openbookqa,piqa,prost,pubmedqa,qnli,qqp,race,rte,sciq,sst,triviaqa,webqs,wic,winogrande,wnli,wsc \ +TASKS=( +arc_challenge +arc_easy +boolq +copa +headqa +hellaswag +lambada +logiqa +mathqa +mc_taco +mrpc +multirc +openbookqa +piqa +prost +pubmedqa +qnli +qqp +race +rte +sciq +sst +triviaqa +webqs +wic +winogrande +wnli +wsc +) + +if [ "${#TASKS[@]}" -eq "$SLURM_ARRAY_TASK_COUNT" ]; + then + echo "Please update the array size as the it doesn't correspond to the number of models we want to evaluate. Array size: $SLURM_ARRAY_TASK_COUNT, number of models: ${#TASKS[@]}" + exit 1 +fi +TASK=${TASKS[$SLURM_ARRAY_TASK_ID]} + +OPT_FOLDER=$WORK/opt/opt-125m/$TASK +mkdir -p $OPT_FOLDER CMD="./tasks/eval_harness/evaluate.py \ --load $CHECKPOINT_PATH \ @@ -100,7 +141,7 @@ CMD="./tasks/eval_harness/evaluate.py \ --no-load-optim \ --no-load-rng \ --inference \ - --task_list arc_challenge,arc_easy,boolq,copa,headqa,hellaswag,lambada,logiqa,mathqa,mc_taco,mrpc,multirc,openbookqa,piqa,prost,pubmedqa,qnli,qqp,race,rte,sciq,sst,triviaqa,webqs,wic,winogrande,wnli,wsc \ + --task_list $TASK\ --deepspeed \ --deepspeed_config ds_config.json \ --intermed_results \ diff --git a/evaluation/results/opt/run_opt_evaluation_175b.slurm b/evaluation/results/opt/run_opt_evaluation_175b.slurm index bc7bbab7..bd1eb5cc 100644 --- a/evaluation/results/opt/run_opt_evaluation_175b.slurm +++ b/evaluation/results/opt/run_opt_evaluation_175b.slurm @@ -11,11 +11,12 @@ #SBATCH --time 20:00:00 # maximum execution time (HH:MM:SS) #SBATCH --output=/gpfswork/rech/six/uty16tp/logs/%x-%j.out # output file name #SBATCH --account=six@a100 +#SBATCH --array=0-28 set -x -e source $six_ALL_CCFRWORK/start-py38-pt111 -conda activate thomas_lm_eval +# conda activate thomas_lm_eval echo "START TIME: $(date)" @@ -92,7 +93,47 @@ cat < $config_json } EOT -OPT_FOLDER=$WORK/opt/opt-175m +# --task_list arc_challenge,arc_easy,boolq,copa,headqa,hellaswag,lambada,logiqa,mathqa,mc_taco,mrpc,multirc,openbookqa,piqa,prost,pubmedqa,qnli,qqp,race,rte,sciq,sst,triviaqa,webqs,wic,winogrande,wnli,wsc \ +TASKS=( +arc_challenge +arc_easy +boolq +copa +headqa +hellaswag +lambada +logiqa +mathqa +mc_taco +mrpc +multirc +openbookqa +piqa +prost +pubmedqa +qnli +qqp +race +rte +sciq +sst +triviaqa +webqs +wic +winogrande +wnli +wsc +) + +if [ "${#TASKS[@]}" -eq "$SLURM_ARRAY_TASK_COUNT" ]; + then + echo "Please update the array size as the it doesn't correspond to the number of models we want to evaluate. Array size: $SLURM_ARRAY_TASK_COUNT, number of models: ${#TASKS[@]}" + exit 1 +fi +TASK=${TASKS[$SLURM_ARRAY_TASK_ID]} + +OPT_FOLDER=$WORK/opt/opt-175b/$TASK +mkdir -p $OPT_FOLDER CMD="./tasks/eval_harness/evaluate.py \ --load $CHECKPOINT_PATH \ @@ -103,7 +144,7 @@ CMD="./tasks/eval_harness/evaluate.py \ --no-load-optim \ --no-load-rng \ --inference \ - --task_list arc_challenge,arc_easy,boolq,copa,headqa,hellaswag,lambada,logiqa,mathqa,mc_taco,mrpc,multirc,openbookqa,piqa,prost,pubmedqa,qnli,qqp,race,rte,sciq,sst,triviaqa,webqs,wic,winogrande,wnli,wsc \ + --task_list $TASK\ --deepspeed \ --deepspeed_config ds_config.json \ --intermed_results \ From 87cf98012c0988c9a93a662044a7921857ddec89 Mon Sep 17 00:00:00 2001 From: thomasw21 <24695242+thomasw21@users.noreply.github.com> Date: Tue, 5 Jul 2022 16:05:07 +0200 Subject: [PATCH 20/35] Actually I need this env to opt tokenizer --- evaluation/results/opt/run_opt_evaluation_125m.slurm | 3 ++- evaluation/results/opt/run_opt_evaluation_175b.slurm | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/evaluation/results/opt/run_opt_evaluation_125m.slurm b/evaluation/results/opt/run_opt_evaluation_125m.slurm index a8d9129e..8baadcf1 100644 --- a/evaluation/results/opt/run_opt_evaluation_125m.slurm +++ b/evaluation/results/opt/run_opt_evaluation_125m.slurm @@ -13,7 +13,8 @@ set -x -e source $six_ALL_CCFRWORK/start-py38-pt111 -# conda activate thomas_lm_eval +# Required in order to load the opt tokenizer +conda activate thomas_lm_eval echo "START TIME: $(date)" diff --git a/evaluation/results/opt/run_opt_evaluation_175b.slurm b/evaluation/results/opt/run_opt_evaluation_175b.slurm index bd1eb5cc..1b7bf14e 100644 --- a/evaluation/results/opt/run_opt_evaluation_175b.slurm +++ b/evaluation/results/opt/run_opt_evaluation_175b.slurm @@ -16,7 +16,8 @@ set -x -e source $six_ALL_CCFRWORK/start-py38-pt111 -# conda activate thomas_lm_eval +# Required in order to load the opt tokenizer +conda activate thomas_lm_eval echo "START TIME: $(date)" From b6a652edc733593825fb4b6f0daa346b390539c0 Mon Sep 17 00:00:00 2001 From: thomasw21 <24695242+thomasw21@users.noreply.github.com> Date: Tue, 5 Jul 2022 16:48:55 +0200 Subject: [PATCH 21/35] Update config --- evaluation/results/opt/run_opt_bs_evaluation_175b.slurm | 3 ++- evaluation/results/opt/run_opt_evaluation_125m.slurm | 2 +- evaluation/results/opt/run_opt_evaluation_175b.slurm | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm b/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm index 85ed952d..1d2072b6 100644 --- a/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm +++ b/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm @@ -192,7 +192,8 @@ CMD="./tasks/eval_harness/evaluate.py \ --deepspeed_config ds_config.json \ --intermed_results \ --adaptive_seq_len \ - --micro_bs_multiplier 4 \ + --micro_bs_multiplier 16 \ + --offloadearly \ $MEGATRON_REQUIRED_ARGS \ " diff --git a/evaluation/results/opt/run_opt_evaluation_125m.slurm b/evaluation/results/opt/run_opt_evaluation_125m.slurm index 8baadcf1..0189ff31 100644 --- a/evaluation/results/opt/run_opt_evaluation_125m.slurm +++ b/evaluation/results/opt/run_opt_evaluation_125m.slurm @@ -147,7 +147,7 @@ CMD="./tasks/eval_harness/evaluate.py \ --deepspeed_config ds_config.json \ --intermed_results \ --adaptive_seq_len \ - --micro_bs_multiplier 4 \ + --micro_bs_multiplier 8 \ $MEGATRON_REQUIRED_ARGS \ " diff --git a/evaluation/results/opt/run_opt_evaluation_175b.slurm b/evaluation/results/opt/run_opt_evaluation_175b.slurm index 1b7bf14e..662779c6 100644 --- a/evaluation/results/opt/run_opt_evaluation_175b.slurm +++ b/evaluation/results/opt/run_opt_evaluation_175b.slurm @@ -150,7 +150,8 @@ CMD="./tasks/eval_harness/evaluate.py \ --deepspeed_config ds_config.json \ --intermed_results \ --adaptive_seq_len \ - --micro_bs_multiplier 4 \ + --micro_bs_multiplier 16 \ + --offloadearly \ $MEGATRON_REQUIRED_ARGS \ " From 9ce1ed1105d00506eecba1ab13cbb31dc8fea5b6 Mon Sep 17 00:00:00 2001 From: thomasw21 <24695242+thomasw21@users.noreply.github.com> Date: Tue, 5 Jul 2022 18:23:33 +0200 Subject: [PATCH 22/35] Woops --- .../results/opt/concatenate_all_results.py | 58 +++++++++++++++++++ .../opt/run_opt_bs_evaluation_125m.slurm | 2 +- .../opt/run_opt_bs_evaluation_175b.slurm | 2 +- 3 files changed, 60 insertions(+), 2 deletions(-) create mode 100644 evaluation/results/opt/concatenate_all_results.py diff --git a/evaluation/results/opt/concatenate_all_results.py b/evaluation/results/opt/concatenate_all_results.py new file mode 100644 index 00000000..62316c10 --- /dev/null +++ b/evaluation/results/opt/concatenate_all_results.py @@ -0,0 +1,58 @@ +import argparse +import json +import re +from pathlib import Path + + +def get_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--results-dir", required=True, type=Path, help="Path to the list of results") + parser.add_argument("--concatenate-output-file", required=True, type=Path, help="Path to store the final output file") + return parser.parse_args() + +def main(): + args = get_args() + + # Get all json files + json_files = [] + for folder in args.results_dir.iterdir(): + if folder.is_file(): + continue + for file in folder.iterdir(): + if file.is_dir(): + continue + match = re.match( + r"(?:eai|bs)_results_lm-eval_opt-175b-meg-ds_(?:\d{4})-(?:\d{2})-(?:\d{2})-(?:\d{2})-(?:\d{2})-(?:\d{2})\.json", + file.name, + ) + + if match is None: + continue + else: + # TODO @thomasw21 some folder can have multiple results we should take the latest + json_files.append(file) + break + + # Merge all json files + final_result = { + "results": {}, + "versions": {} + } + for file in json_files: + with open(file, "r") as fi: + task_result = json.load(fi) + + for key, value in task_result["results"].items(): + final_result["results"][key] = value + + for key, value in task_result["versions"].items(): + final_result["versions"][key] = value + + # Save result + with open(args.concatenate_output_file, "w") as fo: + json.dump(final_result, fo) + + pass + +if __name__ == "__main__": + main() diff --git a/evaluation/results/opt/run_opt_bs_evaluation_125m.slurm b/evaluation/results/opt/run_opt_bs_evaluation_125m.slurm index b5ca83f3..55903511 100644 --- a/evaluation/results/opt/run_opt_bs_evaluation_125m.slurm +++ b/evaluation/results/opt/run_opt_bs_evaluation_125m.slurm @@ -132,7 +132,7 @@ TASK=${TASKS[$SLURM_ARRAY_TASK_ID]} OPT_FOLDER=$WORK/opt/opt-125m/$TASK mkdir -p $OPT_FOLDER -CMD="./tasks/eval_harness/evaluate.py \ +CMD="./tasks/eval_harness/evaluate_bsevalharness.py \ --load $CHECKPOINT_PATH \ --results_path $OPT_FOLDER/bs_results.json \ --tensor-model-parallel-size $TP_SIZE \ diff --git a/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm b/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm index 1d2072b6..b2466f0a 100644 --- a/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm +++ b/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm @@ -178,7 +178,7 @@ TASK=${TASKS[$SLURM_ARRAY_TASK_ID]} OPT_FOLDER=$WORK/opt/opt-175b/$TASK mkdir -p $OPT_FOLDER -CMD="./tasks/eval_harness/evaluate.py \ +CMD="./tasks/eval_harness/evaluate_bsevalharness.py \ --load $CHECKPOINT_PATH \ --results_path $OPT_FOLDER/eai_results.json \ --tensor-model-parallel-size $TP_SIZE \ From 42303dcee77ff712a2e681f031bcfe9f12bf32de Mon Sep 17 00:00:00 2001 From: thomasw21 <24695242+thomasw21@users.noreply.github.com> Date: Tue, 5 Jul 2022 18:33:47 +0200 Subject: [PATCH 23/35] Array take end --- evaluation/results/opt/run_opt_bs_evaluation_125m.slurm | 2 +- evaluation/results/opt/run_opt_bs_evaluation_175b.slurm | 2 +- evaluation/results/opt/run_opt_evaluation_125m.slurm | 2 +- evaluation/results/opt/run_opt_evaluation_175b.slurm | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/evaluation/results/opt/run_opt_bs_evaluation_125m.slurm b/evaluation/results/opt/run_opt_bs_evaluation_125m.slurm index 55903511..d6a70416 100644 --- a/evaluation/results/opt/run_opt_bs_evaluation_125m.slurm +++ b/evaluation/results/opt/run_opt_bs_evaluation_125m.slurm @@ -8,7 +8,7 @@ #SBATCH --time 20:00:00 #SBATCH --output=/gpfswork/rech/six/uty16tp/logs/%x-%j.out #SBATCH --account=six@v100 -#SBATCH --array=0-70 +#SBATCH --array=0-69 set -x -e diff --git a/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm b/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm index b2466f0a..729b5484 100644 --- a/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm +++ b/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm @@ -11,7 +11,7 @@ #SBATCH --time 20:00:00 # maximum execution time (HH:MM:SS) #SBATCH --output=/gpfswork/rech/six/uty16tp/logs/%x-%j.out # output file name #SBATCH --account=six@a100 -#SBATCH --array=0-70 +#SBATCH --array=0-69 set -x -e diff --git a/evaluation/results/opt/run_opt_evaluation_125m.slurm b/evaluation/results/opt/run_opt_evaluation_125m.slurm index 0189ff31..2ef24b40 100644 --- a/evaluation/results/opt/run_opt_evaluation_125m.slurm +++ b/evaluation/results/opt/run_opt_evaluation_125m.slurm @@ -8,7 +8,7 @@ #SBATCH --time 20:00:00 #SBATCH --output=/gpfswork/rech/six/uty16tp/logs/%x-%j.out #SBATCH --account=six@v100 -#SBATCH --array=0-28 +#SBATCH --array=0-27 set -x -e diff --git a/evaluation/results/opt/run_opt_evaluation_175b.slurm b/evaluation/results/opt/run_opt_evaluation_175b.slurm index 662779c6..7726799a 100644 --- a/evaluation/results/opt/run_opt_evaluation_175b.slurm +++ b/evaluation/results/opt/run_opt_evaluation_175b.slurm @@ -11,7 +11,7 @@ #SBATCH --time 20:00:00 # maximum execution time (HH:MM:SS) #SBATCH --output=/gpfswork/rech/six/uty16tp/logs/%x-%j.out # output file name #SBATCH --account=six@a100 -#SBATCH --array=0-28 +#SBATCH --array=0-27 set -x -e From 2ac4d41a77935acacce011422be3c15009ab54ee Mon Sep 17 00:00:00 2001 From: thomasw21 <24695242+thomasw21@users.noreply.github.com> Date: Wed, 6 Jul 2022 15:08:44 +0200 Subject: [PATCH 24/35] Add bloom evaluation scripts --- .../bloom/run_bloom_bs_evaluation_176b.slurm | 202 ++++++++++++++++++ .../bloom/run_opt_evaluation_175b.slurm | 160 ++++++++++++++ 2 files changed, 362 insertions(+) create mode 100644 evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm create mode 100644 evaluation/results/bloom/run_opt_evaluation_175b.slurm diff --git a/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm b/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm new file mode 100644 index 00000000..63cd1d4f --- /dev/null +++ b/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm @@ -0,0 +1,202 @@ +#!/bin/bash +#SBATCH --job-name=bs-eval-bloom-176b +#SBATCH --partition=gpu_p5 +#SBATCH --constraint=a100 +#SBATCH --reservation=hug +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 # crucial - only 1 task per dist per node! +#SBATCH --cpus-per-task=64 # number of cores per tasks +#SBATCH --hint=nomultithread # we get physical cores not logical +#SBATCH --gres=gpu:8 # number of gpus +#SBATCH --time 20:00:00 # maximum execution time (HH:MM:SS) +#SBATCH --output=/gpfswork/rech/six/uty16tp/logs/%x-%j.out # output file name +#SBATCH --account=six@a100 +#SBATCH --array=0-69 + +set -x -e + +source $six_ALL_CCFRWORK/start-py38-pt111 +conda activate muennighofflmeval + +echo "START TIME: $(date)" + + +CHECKPOINT_PATH=$six_ALL_CCFRSCRATCH/checkpoints/tr11-176B-ml/checkpoints/main/global_step95000 +MEGATRON_DEEPSPEED_REPO=/gpfswork/rech/six/uty16tp/code/big_science/Megatron-DeepSpeed-bloom +export HF_DATASETS_OFFLINE=1 +export TRANSFORMERS_OFFLINE=1 + +export TRANSFORMERS_CACHE=$six_ALL_CCFRWORK/models +export HF_DATASETS_CACHE=$six_ALL_CCFRWORK/datasetseval +export HF_MODULES_CACHE=$six_ALL_CCFRWORK/modules +export HF_METRICS_CACHE=$six_ALL_CCFRWORK/metrics + +cd $MEGATRON_DEEPSPEED_REPO + +# Make sure you use the slow version of the tokenizer. +# Same tokenizer for 125m and 175b +TOKENIZER_NAME_OR_PATH=bigscience/tokenizer + +PP_SIZE=8 +TP_SIZE=1 +SEQ_LEN=2048 + +# different from the training MICRO_BATCH_SIZE - no optim memory, so can do bigger BS +# make as big as it can fit into gpu w/o OOM, but not too close to 100% +EVAL_MICRO_BATCH_SIZE=1 + +#dummy arguments to make megatron happy. +MEGATRON_REQUIRED_ARGS=" \ + --num-layers -1 \ + --hidden-size -1 \ + --num-attention-heads -1 \ + --seq-length -1 \ + --max-position-embeddings -1 \ +" + + +ZERO_STAGE=0 + +config_json="./ds_config.json" + +# Deepspeed figures out GAS dynamically from dynamic GBS via set_train_batch_size() +cat < $config_json +{ + "train_micro_batch_size_per_gpu": 1, + "train_batch_size": 1, + "gradient_clipping": 1.0, + "zero_optimization": { + "stage": $ZERO_STAGE + }, + "bf16": { + "enabled": true + }, + "steps_per_print": 2000, + "wall_clock_breakdown": false +} +EOT + +# --task_list GEM/web_nlg_en,GEM/web_nlg_en_challenge_test_numbers,GEM/web_nlg_en_challenge_test_scramble,GEM/web_nlg_en_challenge_validation_sample,GEM/web_nlg_ru,GEM/web_nlg_ru_challenge_test_scramble,GEM/web_nlg_ru_challenge_validation_sample,GEM/wiki_auto_asset_turk_challenge_test_asset_backtranslation,GEM/wiki_auto_asset_turk_challenge_test_asset_bfp02,GEM/wiki_auto_asset_turk_challenge_test_asset_bfp05,GEM/wiki_auto_asset_turk_challenge_test_asset_nopunc,GEM/wiki_auto_asset_turk_challenge_test_turk_backtranslation,GEM/wiki_auto_asset_turk_challenge_test_turk_bfp02,GEM/wiki_auto_asset_turk_challenge_test_turk_bfp05,GEM/wiki_auto_asset_turk_challenge_test_turk_nopunc,GEM/wiki_auto_asset_turk_test_asset,GEM/wiki_auto_asset_turk_test_turk,GEM/wiki_lingua_ar,GEM/wiki_lingua_cs,GEM/wiki_lingua_de,GEM/wiki_lingua_en,GEM/wiki_lingua_es,GEM/wiki_lingua_fr,GEM/wiki_lingua_hi,GEM/wiki_lingua_id,GEM/wiki_lingua_it,GEM/wiki_lingua_ja,GEM/wiki_lingua_ko,GEM/wiki_lingua_nl,GEM/wiki_lingua_pt,GEM/wiki_lingua_ru,GEM/wiki_lingua_th,GEM/wiki_lingua_tr,GEM/wiki_lingua_vi,GEM/wiki_lingua_zh,gem_xsum,gem_xsum_challenge_sample,gem_xsum_challenge_test_backtranslation,gem_xsum_challenge_test_bfp_02,gem_xsum_challenge_test_bfp_05,gem_xsum_challenge_test_covid,gem_xsum_challenge_test_nopunc,axb,axg,boolq,cb,cola,copa,crows_pairs_english,crows_pairs_french,diabla,e2e_nlg_cleaned,mnli,mnli_mismatched,multirc,piaf,qqp,rte,sst,tydiqa_primary,tydiqa_secondary,wic,wsc,wnli,wino_bias_type1_anti,wino_bias_type1_pro,wino_bias_type2_anti,wino_bias_type2_pro,xquad_ar,xquad_en\ + +TASKS=( +GEM/web_nlg_en +GEM/web_nlg_en_challenge_test_numbers +GEM/web_nlg_en_challenge_test_scramble +GEM/web_nlg_en_challenge_validation_sample +GEM/web_nlg_ru +GEM/web_nlg_ru_challenge_test_scramble +GEM/web_nlg_ru_challenge_validation_sample +GEM/wiki_auto_asset_turk_challenge_test_asset_backtranslation +GEM/wiki_auto_asset_turk_challenge_test_asset_bfp02 +GEM/wiki_auto_asset_turk_challenge_test_asset_bfp05 +GEM/wiki_auto_asset_turk_challenge_test_asset_nopunc +GEM/wiki_auto_asset_turk_challenge_test_turk_backtranslation +GEM/wiki_auto_asset_turk_challenge_test_turk_bfp02 +GEM/wiki_auto_asset_turk_challenge_test_turk_bfp05 +GEM/wiki_auto_asset_turk_challenge_test_turk_nopunc +GEM/wiki_auto_asset_turk_test_asset +GEM/wiki_auto_asset_turk_test_turk +GEM/wiki_lingua_ar +GEM/wiki_lingua_cs +GEM/wiki_lingua_de +GEM/wiki_lingua_en +GEM/wiki_lingua_es +GEM/wiki_lingua_fr +GEM/wiki_lingua_hi +GEM/wiki_lingua_id +GEM/wiki_lingua_it +GEM/wiki_lingua_ja +GEM/wiki_lingua_ko +GEM/wiki_lingua_nl +GEM/wiki_lingua_pt +GEM/wiki_lingua_ru +GEM/wiki_lingua_th +GEM/wiki_lingua_tr +GEM/wiki_lingua_vi +GEM/wiki_lingua_zh +gem_xsum +gem_xsum_challenge_sample +gem_xsum_challenge_test_backtranslation +gem_xsum_challenge_test_bfp_02 +gem_xsum_challenge_test_bfp_05 +gem_xsum_challenge_test_covid +gem_xsum_challenge_test_nopunc +axb +axg +boolq +cb +cola +copa +crows_pairs_english +crows_pairs_french +diabla +e2e_nlg_cleaned +mnli +mnli_mismatched +multirc +piaf +qqp +rte +sst +tydiqa_primary +tydiqa_secondary +wic +wsc +wnli +wino_bias_type1_anti +wino_bias_type1_pro +wino_bias_type2_anti +wino_bias_type2_pro +xquad_ar +xquad_en +) + +if [ "${#TASKS[@]}" -eq "$SLURM_ARRAY_TASK_COUNT" ]; + then + echo "Please update the array size as the it doesn't correspond to the number of models we want to evaluate. Array size: $SLURM_ARRAY_TASK_COUNT, number of models: ${#TASKS[@]}" + exit 1 +fi +TASK=${TASKS[$SLURM_ARRAY_TASK_ID]} + +BLOOM_FOLDER=$WORK/bloom/bloom-176b/$TASK +mkdir -p $OPT_FOLDER + +CMD="./tasks/eval_harness/evaluate_bsevalharness.py \ + --load $CHECKPOINT_PATH \ + --results_path $BLOOM_FOLDER/eai_results.json \ + --tensor-model-parallel-size $TP_SIZE \ + --pipeline-model-parallel-size $PP_SIZE \ + --micro-batch-size $EVAL_MICRO_BATCH_SIZE \ + --no-load-optim \ + --no-load-rng \ + --inference \ + --task_list $TASK\ + --deepspeed \ + --deepspeed_config ds_config.json \ + --intermed_results \ + --adaptive_seq_len \ + --micro_bs_multiplier 16 \ + --offloadearly \ + $MEGATRON_REQUIRED_ARGS \ + " + +GPUS_PER_NODE=8 +NNODES=$SLURM_NNODES +MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1) +MASTER_PORT=6000 +export LAUNCHER="python -u -m torch.distributed.run \ + --nproc_per_node $GPUS_PER_NODE \ + --nnodes $NNODES \ + --rdzv_endpoint $MASTER_ADDR:$MASTER_PORT \ + --rdzv_backend c10d \ + --max_restarts 0 \ + --tee 3 \ + " + +export CUDA_LAUNCH_BLOCKING=1 + +echo $LAUNCHER $CMD + +export PYTHONPATH=$MEGATRON_DEEPSPEED_REPO + +$LAUNCHER $CMD 2>&1 | tee $BLOOM_FOLDER/eval-harness.log diff --git a/evaluation/results/bloom/run_opt_evaluation_175b.slurm b/evaluation/results/bloom/run_opt_evaluation_175b.slurm new file mode 100644 index 00000000..9084d9bd --- /dev/null +++ b/evaluation/results/bloom/run_opt_evaluation_175b.slurm @@ -0,0 +1,160 @@ +#!/bin/bash +#SBATCH --job-name=eai-eval-bloom-176b +#SBATCH --partition=gpu_p5 +#SBATCH --constraint=a100 +#SBATCH --reservation=hug +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 # crucial - only 1 task per dist per node! +#SBATCH --cpus-per-task=64 # number of cores per tasks +#SBATCH --hint=nomultithread # we get physical cores not logical +#SBATCH --gres=gpu:8 # number of gpus +#SBATCH --time 20:00:00 # maximum execution time (HH:MM:SS) +#SBATCH --output=/gpfswork/rech/six/uty16tp/logs/%x-%j.out # output file name +#SBATCH --account=six@a100 +#SBATCH --array=0-27 + +set -x -e + +source $six_ALL_CCFRWORK/start-py38-pt111 +# Required in order to load the opt tokenizer +conda activate thomas_lm_eval + +echo "START TIME: $(date)" + + +CHECKPOINT_PATH=$six_ALL_CCFRSCRATCH/opt/opt-175b-meg-ds +MEGATRON_DEEPSPEED_REPO=/gpfswork/rech/six/uty16tp/code/big_science/Megatron-DeepSpeed-bloom +export HF_DATASETS_OFFLINE=1 +export TRANSFORMERS_OFFLINE=1 + +export TRANSFORMERS_CACHE=$six_ALL_CCFRWORK/models +export HF_DATASETS_CACHE=$six_ALL_CCFRWORK/datasets +export HF_MODULES_CACHE=$six_ALL_CCFRWORK/modules +export HF_METRICS_CACHE=$six_ALL_CCFRWORK/metrics + +cd $MEGATRON_DEEPSPEED_REPO + +# Make sure you use the slow version of the tokenizer. +# Same tokenizer for 125m and 175b +TOKENIZER_NAME_OR_PATH=bigscience/tokenizer + +PP_SIZE=8 +TP_SIZE=1 +SEQ_LEN=2048 + +# different from the training MICRO_BATCH_SIZE - no optim memory, so can do bigger BS +# make as big as it can fit into gpu w/o OOM, but not too close to 100% +EVAL_MICRO_BATCH_SIZE=1 + +#dummy arguments to make megatron happy. +MEGATRON_REQUIRED_ARGS=" \ + --num-layers -1 \ + --hidden-size -1 \ + --num-attention-heads -1 \ + --seq-length -1 \ + --max-position-embeddings -1 \ +" + + +ZERO_STAGE=0 + +config_json="./ds_config.json" + +# Deepspeed figures out GAS dynamically from dynamic GBS via set_train_batch_size() +cat < $config_json +{ + "train_micro_batch_size_per_gpu": 1, + "train_batch_size": 1, + "gradient_clipping": 1.0, + "zero_optimization": { + "stage": $ZERO_STAGE + }, + "bf16": { + "enabled": true + }, + "steps_per_print": 2000, + "wall_clock_breakdown": false +} +EOT + +# --task_list arc_challenge,arc_easy,boolq,copa,headqa,hellaswag,lambada,logiqa,mathqa,mc_taco,mrpc,multirc,openbookqa,piqa,prost,pubmedqa,qnli,qqp,race,rte,sciq,sst,triviaqa,webqs,wic,winogrande,wnli,wsc \ +TASKS=( +arc_challenge +arc_easy +boolq +copa +headqa +hellaswag +lambada +logiqa +mathqa +mc_taco +mrpc +multirc +openbookqa +piqa +prost +pubmedqa +qnli +qqp +race +rte +sciq +sst +triviaqa +webqs +wic +winogrande +wnli +wsc +) + +if [ "${#TASKS[@]}" -eq "$SLURM_ARRAY_TASK_COUNT" ]; + then + echo "Please update the array size as the it doesn't correspond to the number of models we want to evaluate. Array size: $SLURM_ARRAY_TASK_COUNT, number of models: ${#TASKS[@]}" + exit 1 +fi +TASK=${TASKS[$SLURM_ARRAY_TASK_ID]} + +BLOOM_FOLDER=$WORK/bloom/bloom-176b/$TASK +mkdir -p $OPT_FOLDER + +CMD="./tasks/eval_harness/evaluate.py \ + --load $CHECKPOINT_PATH \ + --results_path $BLOOM_FOLDER/eai_results.json \ + --tensor-model-parallel-size $TP_SIZE \ + --pipeline-model-parallel-size $PP_SIZE \ + --micro-batch-size $EVAL_MICRO_BATCH_SIZE \ + --no-load-optim \ + --no-load-rng \ + --inference \ + --task_list $TASK\ + --deepspeed \ + --deepspeed_config ds_config.json \ + --intermed_results \ + --adaptive_seq_len \ + --micro_bs_multiplier 16 \ + --offloadearly \ + $MEGATRON_REQUIRED_ARGS \ + " + +GPUS_PER_NODE=8 +NNODES=$SLURM_NNODES +MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1) +MASTER_PORT=6000 +export LAUNCHER="python -u -m torch.distributed.run \ + --nproc_per_node $GPUS_PER_NODE \ + --nnodes $NNODES \ + --rdzv_endpoint $MASTER_ADDR:$MASTER_PORT \ + --rdzv_backend c10d \ + --max_restarts 0 \ + --tee 3 \ + " + +export CUDA_LAUNCH_BLOCKING=1 + +echo $LAUNCHER $CMD + +export PYTHONPATH=$MEGATRON_DEEPSPEED_REPO + +$LAUNCHER $CMD 2>&1 | tee $BLOOM_FOLDER/eval-harness.log From 9cc3e7b31669b6167e3b951d5a43e4fec8dd27bb Mon Sep 17 00:00:00 2001 From: thomasw21 <24695242+thomasw21@users.noreply.github.com> Date: Wed, 6 Jul 2022 15:11:37 +0200 Subject: [PATCH 25/35] Rename --- ..._opt_evaluation_175b.slurm => run_bloom_evaluation_175b.slurm} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename evaluation/results/bloom/{run_opt_evaluation_175b.slurm => run_bloom_evaluation_175b.slurm} (100%) diff --git a/evaluation/results/bloom/run_opt_evaluation_175b.slurm b/evaluation/results/bloom/run_bloom_evaluation_175b.slurm similarity index 100% rename from evaluation/results/bloom/run_opt_evaluation_175b.slurm rename to evaluation/results/bloom/run_bloom_evaluation_175b.slurm From dbaa0db17ea4e5ea29514811db9ee66ebeca5a1a Mon Sep 17 00:00:00 2001 From: thomasw21 <24695242+thomasw21@users.noreply.github.com> Date: Wed, 6 Jul 2022 15:16:02 +0200 Subject: [PATCH 26/35] Woops --- evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm | 2 +- evaluation/results/bloom/run_bloom_evaluation_175b.slurm | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm b/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm index 63cd1d4f..061d6982 100644 --- a/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm +++ b/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm @@ -159,7 +159,7 @@ fi TASK=${TASKS[$SLURM_ARRAY_TASK_ID]} BLOOM_FOLDER=$WORK/bloom/bloom-176b/$TASK -mkdir -p $OPT_FOLDER +mkdir -p $BLOOM_FOLDER CMD="./tasks/eval_harness/evaluate_bsevalharness.py \ --load $CHECKPOINT_PATH \ diff --git a/evaluation/results/bloom/run_bloom_evaluation_175b.slurm b/evaluation/results/bloom/run_bloom_evaluation_175b.slurm index 9084d9bd..d71dc57c 100644 --- a/evaluation/results/bloom/run_bloom_evaluation_175b.slurm +++ b/evaluation/results/bloom/run_bloom_evaluation_175b.slurm @@ -117,7 +117,7 @@ fi TASK=${TASKS[$SLURM_ARRAY_TASK_ID]} BLOOM_FOLDER=$WORK/bloom/bloom-176b/$TASK -mkdir -p $OPT_FOLDER +mkdir -p $BLOOM_FOLDER CMD="./tasks/eval_harness/evaluate.py \ --load $CHECKPOINT_PATH \ From 1f9ccfcda07e2bcdac5a2420b39d9e7c2d64d6fb Mon Sep 17 00:00:00 2001 From: thomasw21 <24695242+thomasw21@users.noreply.github.com> Date: Wed, 6 Jul 2022 15:32:03 +0200 Subject: [PATCH 27/35] prevent tokenizer parallelism --- evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm | 1 + evaluation/results/bloom/run_bloom_evaluation_175b.slurm | 1 + 2 files changed, 2 insertions(+) diff --git a/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm b/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm index 061d6982..3790d81d 100644 --- a/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm +++ b/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm @@ -30,6 +30,7 @@ export TRANSFORMERS_CACHE=$six_ALL_CCFRWORK/models export HF_DATASETS_CACHE=$six_ALL_CCFRWORK/datasetseval export HF_MODULES_CACHE=$six_ALL_CCFRWORK/modules export HF_METRICS_CACHE=$six_ALL_CCFRWORK/metrics +export TOKENIZERS_PARALLELISM=false cd $MEGATRON_DEEPSPEED_REPO diff --git a/evaluation/results/bloom/run_bloom_evaluation_175b.slurm b/evaluation/results/bloom/run_bloom_evaluation_175b.slurm index d71dc57c..99ea0a16 100644 --- a/evaluation/results/bloom/run_bloom_evaluation_175b.slurm +++ b/evaluation/results/bloom/run_bloom_evaluation_175b.slurm @@ -31,6 +31,7 @@ export TRANSFORMERS_CACHE=$six_ALL_CCFRWORK/models export HF_DATASETS_CACHE=$six_ALL_CCFRWORK/datasets export HF_MODULES_CACHE=$six_ALL_CCFRWORK/modules export HF_METRICS_CACHE=$six_ALL_CCFRWORK/metrics +export TOKENIZERS_PARALLELISM=false cd $MEGATRON_DEEPSPEED_REPO From f94c925f4f477f921ef9e00c9c231bc92ad8a3e5 Mon Sep 17 00:00:00 2001 From: thomasw21 <24695242+thomasw21@users.noreply.github.com> Date: Wed, 6 Jul 2022 15:45:05 +0200 Subject: [PATCH 28/35] Woops overrided value --- evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm | 4 ++-- evaluation/results/opt/run_opt_bs_evaluation_175b.slurm | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm b/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm index 3790d81d..ad81f77a 100644 --- a/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm +++ b/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm @@ -164,7 +164,7 @@ mkdir -p $BLOOM_FOLDER CMD="./tasks/eval_harness/evaluate_bsevalharness.py \ --load $CHECKPOINT_PATH \ - --results_path $BLOOM_FOLDER/eai_results.json \ + --results_path $BLOOM_FOLDER/bs_results.json \ --tensor-model-parallel-size $TP_SIZE \ --pipeline-model-parallel-size $PP_SIZE \ --micro-batch-size $EVAL_MICRO_BATCH_SIZE \ @@ -200,4 +200,4 @@ echo $LAUNCHER $CMD export PYTHONPATH=$MEGATRON_DEEPSPEED_REPO -$LAUNCHER $CMD 2>&1 | tee $BLOOM_FOLDER/eval-harness.log +$LAUNCHER $CMD 2>&1 | tee $BLOOM_FOLDER/bs-eval-harness.log diff --git a/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm b/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm index 729b5484..c9c8c808 100644 --- a/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm +++ b/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm @@ -180,7 +180,7 @@ mkdir -p $OPT_FOLDER CMD="./tasks/eval_harness/evaluate_bsevalharness.py \ --load $CHECKPOINT_PATH \ - --results_path $OPT_FOLDER/eai_results.json \ + --results_path $OPT_FOLDER/bs_results.json \ --tensor-model-parallel-size $TP_SIZE \ --pipeline-model-parallel-size $PP_SIZE \ --micro-batch-size $EVAL_MICRO_BATCH_SIZE \ @@ -216,4 +216,4 @@ echo $LAUNCHER $CMD export PYTHONPATH=$MEGATRON_DEEPSPEED_REPO -$LAUNCHER $CMD 2>&1 | tee $OPT_FOLDER/eval-harness.log +$LAUNCHER $CMD 2>&1 | tee $OPT_FOLDER/bs-eval-harness.log From e282bf293e1195df6f48a122f4ab45a103ee2f79 Mon Sep 17 00:00:00 2001 From: thomasw21 <24695242+thomasw21@users.noreply.github.com> Date: Wed, 6 Jul 2022 17:31:26 +0200 Subject: [PATCH 29/35] Fix 125m opt evaluation script --- .../opt/run_opt_bs_evaluation_125m.slurm | 83 ++++++++++++++----- 1 file changed, 63 insertions(+), 20 deletions(-) diff --git a/evaluation/results/opt/run_opt_bs_evaluation_125m.slurm b/evaluation/results/opt/run_opt_bs_evaluation_125m.slurm index d6a70416..815dc083 100644 --- a/evaluation/results/opt/run_opt_bs_evaluation_125m.slurm +++ b/evaluation/results/opt/run_opt_bs_evaluation_125m.slurm @@ -91,35 +91,78 @@ cat < $config_json EOT # --task_list GEM/web_nlg_en,GEM/web_nlg_en_challenge_test_numbers,GEM/web_nlg_en_challenge_test_scramble,GEM/web_nlg_en_challenge_validation_sample,GEM/web_nlg_ru,GEM/web_nlg_ru_challenge_test_scramble,GEM/web_nlg_ru_challenge_validation_sample,GEM/wiki_auto_asset_turk_challenge_test_asset_backtranslation,GEM/wiki_auto_asset_turk_challenge_test_asset_bfp02,GEM/wiki_auto_asset_turk_challenge_test_asset_bfp05,GEM/wiki_auto_asset_turk_challenge_test_asset_nopunc,GEM/wiki_auto_asset_turk_challenge_test_turk_backtranslation,GEM/wiki_auto_asset_turk_challenge_test_turk_bfp02,GEM/wiki_auto_asset_turk_challenge_test_turk_bfp05,GEM/wiki_auto_asset_turk_challenge_test_turk_nopunc,GEM/wiki_auto_asset_turk_test_asset,GEM/wiki_auto_asset_turk_test_turk,GEM/wiki_lingua_ar,GEM/wiki_lingua_cs,GEM/wiki_lingua_de,GEM/wiki_lingua_en,GEM/wiki_lingua_es,GEM/wiki_lingua_fr,GEM/wiki_lingua_hi,GEM/wiki_lingua_id,GEM/wiki_lingua_it,GEM/wiki_lingua_ja,GEM/wiki_lingua_ko,GEM/wiki_lingua_nl,GEM/wiki_lingua_pt,GEM/wiki_lingua_ru,GEM/wiki_lingua_th,GEM/wiki_lingua_tr,GEM/wiki_lingua_vi,GEM/wiki_lingua_zh,gem_xsum,gem_xsum_challenge_sample,gem_xsum_challenge_test_backtranslation,gem_xsum_challenge_test_bfp_02,gem_xsum_challenge_test_bfp_05,gem_xsum_challenge_test_covid,gem_xsum_challenge_test_nopunc,axb,axg,boolq,cb,cola,copa,crows_pairs_english,crows_pairs_french,diabla,e2e_nlg_cleaned,mnli,mnli_mismatched,multirc,piaf,qqp,rte,sst,tydiqa_primary,tydiqa_secondary,wic,wsc,wnli,wino_bias_type1_anti,wino_bias_type1_pro,wino_bias_type2_anti,wino_bias_type2_pro,xquad_ar,xquad_en\ + TASKS=( -arc_challenge -arc_easy +GEM/web_nlg_en +GEM/web_nlg_en_challenge_test_numbers +GEM/web_nlg_en_challenge_test_scramble +GEM/web_nlg_en_challenge_validation_sample +GEM/web_nlg_ru +GEM/web_nlg_ru_challenge_test_scramble +GEM/web_nlg_ru_challenge_validation_sample +GEM/wiki_auto_asset_turk_challenge_test_asset_backtranslation +GEM/wiki_auto_asset_turk_challenge_test_asset_bfp02 +GEM/wiki_auto_asset_turk_challenge_test_asset_bfp05 +GEM/wiki_auto_asset_turk_challenge_test_asset_nopunc +GEM/wiki_auto_asset_turk_challenge_test_turk_backtranslation +GEM/wiki_auto_asset_turk_challenge_test_turk_bfp02 +GEM/wiki_auto_asset_turk_challenge_test_turk_bfp05 +GEM/wiki_auto_asset_turk_challenge_test_turk_nopunc +GEM/wiki_auto_asset_turk_test_asset +GEM/wiki_auto_asset_turk_test_turk +GEM/wiki_lingua_ar +GEM/wiki_lingua_cs +GEM/wiki_lingua_de +GEM/wiki_lingua_en +GEM/wiki_lingua_es +GEM/wiki_lingua_fr +GEM/wiki_lingua_hi +GEM/wiki_lingua_id +GEM/wiki_lingua_it +GEM/wiki_lingua_ja +GEM/wiki_lingua_ko +GEM/wiki_lingua_nl +GEM/wiki_lingua_pt +GEM/wiki_lingua_ru +GEM/wiki_lingua_th +GEM/wiki_lingua_tr +GEM/wiki_lingua_vi +GEM/wiki_lingua_zh +gem_xsum +gem_xsum_challenge_sample +gem_xsum_challenge_test_backtranslation +gem_xsum_challenge_test_bfp_02 +gem_xsum_challenge_test_bfp_05 +gem_xsum_challenge_test_covid +gem_xsum_challenge_test_nopunc +axb +axg boolq +cb +cola copa -headqa -hellaswag -lambada -logiqa -mathqa -mc_taco -mrpc +crows_pairs_english +crows_pairs_french +diabla +e2e_nlg_cleaned +mnli +mnli_mismatched multirc -openbookqa -piqa -prost -pubmedqa -qnli +piaf qqp -race rte -sciq sst -triviaqa -webqs +tydiqa_primary +tydiqa_secondary wic -winogrande -wnli wsc +wnli +wino_bias_type1_anti +wino_bias_type1_pro +wino_bias_type2_anti +wino_bias_type2_pro +xquad_ar +xquad_en ) if [ "${#TASKS[@]}" -eq "$SLURM_ARRAY_TASK_COUNT" ]; From 21a5865c29377c143f0c99abc05f5c9c12ad87c9 Mon Sep 17 00:00:00 2001 From: thomasw21 <24695242+thomasw21@users.noreply.github.com> Date: Wed, 6 Jul 2022 22:56:58 +0200 Subject: [PATCH 30/35] Woops --- evaluation/results/bloom/run_bloom_evaluation_175b.slurm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evaluation/results/bloom/run_bloom_evaluation_175b.slurm b/evaluation/results/bloom/run_bloom_evaluation_175b.slurm index 99ea0a16..3ba5622e 100644 --- a/evaluation/results/bloom/run_bloom_evaluation_175b.slurm +++ b/evaluation/results/bloom/run_bloom_evaluation_175b.slurm @@ -22,7 +22,7 @@ conda activate thomas_lm_eval echo "START TIME: $(date)" -CHECKPOINT_PATH=$six_ALL_CCFRSCRATCH/opt/opt-175b-meg-ds +CHECKPOINT_PATH=$six_ALL_CCFRSCRATCH/checkpoints/tr11-176B-ml/checkpoints/main/global_step95000 MEGATRON_DEEPSPEED_REPO=/gpfswork/rech/six/uty16tp/code/big_science/Megatron-DeepSpeed-bloom export HF_DATASETS_OFFLINE=1 export TRANSFORMERS_OFFLINE=1 From 04fbe66b07462041068b4b2a41d1d0cb8d7a1f6a Mon Sep 17 00:00:00 2001 From: thomasw21 <24695242+thomasw21@users.noreply.github.com> Date: Thu, 7 Jul 2022 09:09:37 +0200 Subject: [PATCH 31/35] Add all flores ppl --- .../bloom/run_bloom_bs_evaluation_176b.slurm | 104 +++++++++++++++++- .../bloom/run_bloom_evaluation_175b.slurm | 7 +- .../opt/run_opt_bs_evaluation_175b.slurm | 104 +++++++++++++++++- .../results/opt/run_opt_evaluation_175b.slurm | 7 +- 4 files changed, 218 insertions(+), 4 deletions(-) diff --git a/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm b/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm index ad81f77a..21a1ee37 100644 --- a/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm +++ b/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm @@ -11,7 +11,7 @@ #SBATCH --time 20:00:00 # maximum execution time (HH:MM:SS) #SBATCH --output=/gpfswork/rech/six/uty16tp/logs/%x-%j.out # output file name #SBATCH --account=six@a100 -#SBATCH --array=0-69 +#SBATCH --array=0-171 set -x -e @@ -150,6 +150,108 @@ wino_bias_type2_anti wino_bias_type2_pro xquad_ar xquad_en +gsarti/flores_101_afr +gsarti/flores_101_amh +gsarti/flores_101_ara +gsarti/flores_101_hye +gsarti/flores_101_asm +gsarti/flores_101_ast +gsarti/flores_101_azj +gsarti/flores_101_bel +gsarti/flores_101_ben +gsarti/flores_101_bos +gsarti/flores_101_bul +gsarti/flores_101_mya +gsarti/flores_101_cat +gsarti/flores_101_ceb +gsarti/flores_101_zho_simpl +gsarti/flores_101_zho_trad +gsarti/flores_101_hrv +gsarti/flores_101_ces +gsarti/flores_101_dan +gsarti/flores_101_nld +gsarti/flores_101_eng +gsarti/flores_101_est +gsarti/flores_101_tgl +gsarti/flores_101_fin +gsarti/flores_101_fra +gsarti/flores_101_ful +gsarti/flores_101_glg +gsarti/flores_101_lug +gsarti/flores_101_kat +gsarti/flores_101_deu +gsarti/flores_101_ell +gsarti/flores_101_guj +gsarti/flores_101_hau +gsarti/flores_101_heb +gsarti/flores_101_hin +gsarti/flores_101_hun +gsarti/flores_101_isl +gsarti/flores_101_ibo +gsarti/flores_101_ind +gsarti/flores_101_gle +gsarti/flores_101_ita +gsarti/flores_101_jpn +gsarti/flores_101_jav +gsarti/flores_101_kea +gsarti/flores_101_kam +gsarti/flores_101_kan +gsarti/flores_101_kaz +gsarti/flores_101_khm +gsarti/flores_101_kor +gsarti/flores_101_kir +gsarti/flores_101_lao +gsarti/flores_101_lav +gsarti/flores_101_lin +gsarti/flores_101_lit +gsarti/flores_101_luo +gsarti/flores_101_ltz +gsarti/flores_101_mkd +gsarti/flores_101_msa +gsarti/flores_101_mal +gsarti/flores_101_mlt +gsarti/flores_101_mri +gsarti/flores_101_mar +gsarti/flores_101_mon +gsarti/flores_101_npi +gsarti/flores_101_nso +gsarti/flores_101_nob +gsarti/flores_101_nya +gsarti/flores_101_oci +gsarti/flores_101_ory +gsarti/flores_101_orm +gsarti/flores_101_pus +gsarti/flores_101_fas +gsarti/flores_101_pol +gsarti/flores_101_por +gsarti/flores_101_pan +gsarti/flores_101_ron +gsarti/flores_101_rus +gsarti/flores_101_srp +gsarti/flores_101_sna +gsarti/flores_101_snd +gsarti/flores_101_slk +gsarti/flores_101_slv +gsarti/flores_101_som +gsarti/flores_101_ckb +gsarti/flores_101_spa +gsarti/flores_101_swh +gsarti/flores_101_swe +gsarti/flores_101_tgk +gsarti/flores_101_tam +gsarti/flores_101_tel +gsarti/flores_101_tha +gsarti/flores_101_tur +gsarti/flores_101_ukr +gsarti/flores_101_umb +gsarti/flores_101_urd +gsarti/flores_101_uzb +gsarti/flores_101_vie +gsarti/flores_101_cym +gsarti/flores_101_wol +gsarti/flores_101_xho +gsarti/flores_101_yor +gsarti/flores_101_zul ) if [ "${#TASKS[@]}" -eq "$SLURM_ARRAY_TASK_COUNT" ]; diff --git a/evaluation/results/bloom/run_bloom_evaluation_175b.slurm b/evaluation/results/bloom/run_bloom_evaluation_175b.slurm index 3ba5622e..bca13d0b 100644 --- a/evaluation/results/bloom/run_bloom_evaluation_175b.slurm +++ b/evaluation/results/bloom/run_bloom_evaluation_175b.slurm @@ -11,7 +11,7 @@ #SBATCH --time 20:00:00 # maximum execution time (HH:MM:SS) #SBATCH --output=/gpfswork/rech/six/uty16tp/logs/%x-%j.out # output file name #SBATCH --account=six@a100 -#SBATCH --array=0-27 +#SBATCH --array=0-32 set -x -e @@ -108,6 +108,11 @@ wic winogrande wnli wsc +lambada_mt_en +lambada_mt_fr +lambada_mt_de +lambada_mt_it +lambada_mt_es ) if [ "${#TASKS[@]}" -eq "$SLURM_ARRAY_TASK_COUNT" ]; diff --git a/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm b/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm index c9c8c808..e4520864 100644 --- a/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm +++ b/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm @@ -11,7 +11,7 @@ #SBATCH --time 20:00:00 # maximum execution time (HH:MM:SS) #SBATCH --output=/gpfswork/rech/six/uty16tp/logs/%x-%j.out # output file name #SBATCH --account=six@a100 -#SBATCH --array=0-69 +#SBATCH --array=0-171 set -x -e @@ -166,6 +166,108 @@ wino_bias_type2_anti wino_bias_type2_pro xquad_ar xquad_en +gsarti/flores_101_afr +gsarti/flores_101_amh +gsarti/flores_101_ara +gsarti/flores_101_hye +gsarti/flores_101_asm +gsarti/flores_101_ast +gsarti/flores_101_azj +gsarti/flores_101_bel +gsarti/flores_101_ben +gsarti/flores_101_bos +gsarti/flores_101_bul +gsarti/flores_101_mya +gsarti/flores_101_cat +gsarti/flores_101_ceb +gsarti/flores_101_zho_simpl +gsarti/flores_101_zho_trad +gsarti/flores_101_hrv +gsarti/flores_101_ces +gsarti/flores_101_dan +gsarti/flores_101_nld +gsarti/flores_101_eng +gsarti/flores_101_est +gsarti/flores_101_tgl +gsarti/flores_101_fin +gsarti/flores_101_fra +gsarti/flores_101_ful +gsarti/flores_101_glg +gsarti/flores_101_lug +gsarti/flores_101_kat +gsarti/flores_101_deu +gsarti/flores_101_ell +gsarti/flores_101_guj +gsarti/flores_101_hau +gsarti/flores_101_heb +gsarti/flores_101_hin +gsarti/flores_101_hun +gsarti/flores_101_isl +gsarti/flores_101_ibo +gsarti/flores_101_ind +gsarti/flores_101_gle +gsarti/flores_101_ita +gsarti/flores_101_jpn +gsarti/flores_101_jav +gsarti/flores_101_kea +gsarti/flores_101_kam +gsarti/flores_101_kan +gsarti/flores_101_kaz +gsarti/flores_101_khm +gsarti/flores_101_kor +gsarti/flores_101_kir +gsarti/flores_101_lao +gsarti/flores_101_lav +gsarti/flores_101_lin +gsarti/flores_101_lit +gsarti/flores_101_luo +gsarti/flores_101_ltz +gsarti/flores_101_mkd +gsarti/flores_101_msa +gsarti/flores_101_mal +gsarti/flores_101_mlt +gsarti/flores_101_mri +gsarti/flores_101_mar +gsarti/flores_101_mon +gsarti/flores_101_npi +gsarti/flores_101_nso +gsarti/flores_101_nob +gsarti/flores_101_nya +gsarti/flores_101_oci +gsarti/flores_101_ory +gsarti/flores_101_orm +gsarti/flores_101_pus +gsarti/flores_101_fas +gsarti/flores_101_pol +gsarti/flores_101_por +gsarti/flores_101_pan +gsarti/flores_101_ron +gsarti/flores_101_rus +gsarti/flores_101_srp +gsarti/flores_101_sna +gsarti/flores_101_snd +gsarti/flores_101_slk +gsarti/flores_101_slv +gsarti/flores_101_som +gsarti/flores_101_ckb +gsarti/flores_101_spa +gsarti/flores_101_swh +gsarti/flores_101_swe +gsarti/flores_101_tgk +gsarti/flores_101_tam +gsarti/flores_101_tel +gsarti/flores_101_tha +gsarti/flores_101_tur +gsarti/flores_101_ukr +gsarti/flores_101_umb +gsarti/flores_101_urd +gsarti/flores_101_uzb +gsarti/flores_101_vie +gsarti/flores_101_cym +gsarti/flores_101_wol +gsarti/flores_101_xho +gsarti/flores_101_yor +gsarti/flores_101_zul ) if [ "${#TASKS[@]}" -eq "$SLURM_ARRAY_TASK_COUNT" ]; diff --git a/evaluation/results/opt/run_opt_evaluation_175b.slurm b/evaluation/results/opt/run_opt_evaluation_175b.slurm index 7726799a..10a7ea2d 100644 --- a/evaluation/results/opt/run_opt_evaluation_175b.slurm +++ b/evaluation/results/opt/run_opt_evaluation_175b.slurm @@ -11,7 +11,7 @@ #SBATCH --time 20:00:00 # maximum execution time (HH:MM:SS) #SBATCH --output=/gpfswork/rech/six/uty16tp/logs/%x-%j.out # output file name #SBATCH --account=six@a100 -#SBATCH --array=0-27 +#SBATCH --array=0-32 set -x -e @@ -124,6 +124,11 @@ wic winogrande wnli wsc +lambada_mt_en +lambada_mt_fr +lambada_mt_de +lambada_mt_it +lambada_mt_es ) if [ "${#TASKS[@]}" -eq "$SLURM_ARRAY_TASK_COUNT" ]; From 2eba492605813a36a31959bc6e31e8087cfe6b11 Mon Sep 17 00:00:00 2001 From: thomasw21 <24695242+thomasw21@users.noreply.github.com> Date: Thu, 7 Jul 2022 09:11:32 +0200 Subject: [PATCH 32/35] Comment out quite annoying check --- .../results/bloom/run_bloom_bs_evaluation_176b.slurm | 10 +++++----- .../results/bloom/run_bloom_evaluation_175b.slurm | 10 +++++----- .../results/opt/run_opt_bs_evaluation_125m.slurm | 10 +++++----- .../results/opt/run_opt_bs_evaluation_175b.slurm | 10 +++++----- evaluation/results/opt/run_opt_evaluation_125m.slurm | 10 +++++----- evaluation/results/opt/run_opt_evaluation_175b.slurm | 10 +++++----- 6 files changed, 30 insertions(+), 30 deletions(-) diff --git a/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm b/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm index 21a1ee37..5e24f21d 100644 --- a/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm +++ b/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm @@ -254,11 +254,11 @@ gsarti/flores_101_yor gsarti/flores_101_zul ) -if [ "${#TASKS[@]}" -eq "$SLURM_ARRAY_TASK_COUNT" ]; - then - echo "Please update the array size as the it doesn't correspond to the number of models we want to evaluate. Array size: $SLURM_ARRAY_TASK_COUNT, number of models: ${#TASKS[@]}" - exit 1 -fi +#if [ "${#TASKS[@]}" -ne "$SLURM_ARRAY_TASK_COUNT" ]; +# then +# echo "Please update the array size as the it doesn't correspond to the number of models we want to evaluate. Array size: $SLURM_ARRAY_TASK_COUNT, number of models: ${#TASKS[@]}" +# exit 1 +#fi TASK=${TASKS[$SLURM_ARRAY_TASK_ID]} BLOOM_FOLDER=$WORK/bloom/bloom-176b/$TASK diff --git a/evaluation/results/bloom/run_bloom_evaluation_175b.slurm b/evaluation/results/bloom/run_bloom_evaluation_175b.slurm index bca13d0b..679ae7d2 100644 --- a/evaluation/results/bloom/run_bloom_evaluation_175b.slurm +++ b/evaluation/results/bloom/run_bloom_evaluation_175b.slurm @@ -115,11 +115,11 @@ lambada_mt_it lambada_mt_es ) -if [ "${#TASKS[@]}" -eq "$SLURM_ARRAY_TASK_COUNT" ]; - then - echo "Please update the array size as the it doesn't correspond to the number of models we want to evaluate. Array size: $SLURM_ARRAY_TASK_COUNT, number of models: ${#TASKS[@]}" - exit 1 -fi +#if [ "${#TASKS[@]}" -ne "$SLURM_ARRAY_TASK_COUNT" ]; +# then +# echo "Please update the array size as the it doesn't correspond to the number of models we want to evaluate. Array size: $SLURM_ARRAY_TASK_COUNT, number of models: ${#TASKS[@]}" +# exit 1 +#fi TASK=${TASKS[$SLURM_ARRAY_TASK_ID]} BLOOM_FOLDER=$WORK/bloom/bloom-176b/$TASK diff --git a/evaluation/results/opt/run_opt_bs_evaluation_125m.slurm b/evaluation/results/opt/run_opt_bs_evaluation_125m.slurm index 815dc083..bf4afde9 100644 --- a/evaluation/results/opt/run_opt_bs_evaluation_125m.slurm +++ b/evaluation/results/opt/run_opt_bs_evaluation_125m.slurm @@ -165,11 +165,11 @@ xquad_ar xquad_en ) -if [ "${#TASKS[@]}" -eq "$SLURM_ARRAY_TASK_COUNT" ]; - then - echo "Please update the array size as the it doesn't correspond to the number of models we want to evaluate. Array size: $SLURM_ARRAY_TASK_COUNT, number of models: ${#TASKS[@]}" - exit 1 -fi +#if [ "${#TASKS[@]}" -ne "$SLURM_ARRAY_TASK_COUNT" ]; +# then +# echo "Please update the array size as the it doesn't correspond to the number of models we want to evaluate. Array size: $SLURM_ARRAY_TASK_COUNT, number of models: ${#TASKS[@]}" +# exit 1 +#fi TASK=${TASKS[$SLURM_ARRAY_TASK_ID]} OPT_FOLDER=$WORK/opt/opt-125m/$TASK diff --git a/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm b/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm index e4520864..097581be 100644 --- a/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm +++ b/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm @@ -270,11 +270,11 @@ gsarti/flores_101_yor gsarti/flores_101_zul ) -if [ "${#TASKS[@]}" -eq "$SLURM_ARRAY_TASK_COUNT" ]; - then - echo "Please update the array size as the it doesn't correspond to the number of models we want to evaluate. Array size: $SLURM_ARRAY_TASK_COUNT, number of models: ${#TASKS[@]}" - exit 1 -fi +#if [ "${#TASKS[@]}" -ne "$SLURM_ARRAY_TASK_COUNT" ]; +# then +# echo "Please update the array size as the it doesn't correspond to the number of models we want to evaluate. Array size: $SLURM_ARRAY_TASK_COUNT, number of models: ${#TASKS[@]}" +# exit 1 +#fi TASK=${TASKS[$SLURM_ARRAY_TASK_ID]} OPT_FOLDER=$WORK/opt/opt-175b/$TASK diff --git a/evaluation/results/opt/run_opt_evaluation_125m.slurm b/evaluation/results/opt/run_opt_evaluation_125m.slurm index 2ef24b40..cc657325 100644 --- a/evaluation/results/opt/run_opt_evaluation_125m.slurm +++ b/evaluation/results/opt/run_opt_evaluation_125m.slurm @@ -123,11 +123,11 @@ wnli wsc ) -if [ "${#TASKS[@]}" -eq "$SLURM_ARRAY_TASK_COUNT" ]; - then - echo "Please update the array size as the it doesn't correspond to the number of models we want to evaluate. Array size: $SLURM_ARRAY_TASK_COUNT, number of models: ${#TASKS[@]}" - exit 1 -fi +#if [ "${#TASKS[@]}" -ne "$SLURM_ARRAY_TASK_COUNT" ]; +# then +# echo "Please update the array size as the it doesn't correspond to the number of models we want to evaluate. Array size: $SLURM_ARRAY_TASK_COUNT, number of models: ${#TASKS[@]}" +# exit 1 +#fi TASK=${TASKS[$SLURM_ARRAY_TASK_ID]} OPT_FOLDER=$WORK/opt/opt-125m/$TASK diff --git a/evaluation/results/opt/run_opt_evaluation_175b.slurm b/evaluation/results/opt/run_opt_evaluation_175b.slurm index 10a7ea2d..515564cd 100644 --- a/evaluation/results/opt/run_opt_evaluation_175b.slurm +++ b/evaluation/results/opt/run_opt_evaluation_175b.slurm @@ -131,11 +131,11 @@ lambada_mt_it lambada_mt_es ) -if [ "${#TASKS[@]}" -eq "$SLURM_ARRAY_TASK_COUNT" ]; - then - echo "Please update the array size as the it doesn't correspond to the number of models we want to evaluate. Array size: $SLURM_ARRAY_TASK_COUNT, number of models: ${#TASKS[@]}" - exit 1 -fi +#if [ "${#TASKS[@]}" -ne "$SLURM_ARRAY_TASK_COUNT" ]; +# then +# echo "Please update the array size as the it doesn't correspond to the number of models we want to evaluate. Array size: $SLURM_ARRAY_TASK_COUNT, number of models: ${#TASKS[@]}" +# exit 1 +#fi TASK=${TASKS[$SLURM_ARRAY_TASK_ID]} OPT_FOLDER=$WORK/opt/opt-175b/$TASK From ceb785b020195888cc42cbb3c3b2b217b22487e6 Mon Sep 17 00:00:00 2001 From: thomasw21 <24695242+thomasw21@users.noreply.github.com> Date: Thu, 7 Jul 2022 15:36:49 +0200 Subject: [PATCH 33/35] Fix the script to work to query both EAI and BS results --- .../results/opt/concatenate_all_results.py | 79 ++++++++++++++----- 1 file changed, 60 insertions(+), 19 deletions(-) diff --git a/evaluation/results/opt/concatenate_all_results.py b/evaluation/results/opt/concatenate_all_results.py index 62316c10..21f67424 100644 --- a/evaluation/results/opt/concatenate_all_results.py +++ b/evaluation/results/opt/concatenate_all_results.py @@ -2,6 +2,8 @@ import json import re from pathlib import Path +from re import Pattern +from typing import List, Dict def get_args(): @@ -10,28 +12,46 @@ def get_args(): parser.add_argument("--concatenate-output-file", required=True, type=Path, help="Path to store the final output file") return parser.parse_args() -def main(): - args = get_args() - - # Get all json files +MODEL = "opt-175b-meg-ds" +# MODEL = "global_step95000" +RESULTS_REGEX = re.compile(rf"(eai|bs)_results_lm-eval_{MODEL}_(\d{4}-\d{2}-\d{2}-\d{2}-\d{2}-\d{2})_backup\.json") +def get_all_files_that_match_results_in_folder(root_folder: Path, regex: Pattern) -> List[Path]: json_files = [] - for folder in args.results_dir.iterdir(): - if folder.is_file(): - continue - for file in folder.iterdir(): - if file.is_dir(): - continue - match = re.match( - r"(?:eai|bs)_results_lm-eval_opt-175b-meg-ds_(?:\d{4})-(?:\d{2})-(?:\d{2})-(?:\d{2})-(?:\d{2})-(?:\d{2})\.json", - file.name, - ) + for folder in root_folder.iterdir(): + if folder.is_dir(): + json_files += get_all_files_that_match_results_in_folder(folder) + else: + # it's actually a file + file = folder + + match = RESULTS_REGEX.match(file.name) if match is None: continue else: - # TODO @thomasw21 some folder can have multiple results we should take the latest json_files.append(file) - break + return json_files + +def sort_dict(dictionary: Dict) -> Dict: + results = {} + + for key, value in sorted(dictionary.items(), key=lambda item: item[1]): + new_value = value + + if isinstance(value, dict): + new_value = sort_dict(new_value) + elif isinstance(value, list): + new_value = sorted(value) + + results[key] = new_value + + return results + +def main(): + args = get_args() + + # Get all json files + json_files = get_all_files_that_match_results_in_folder(args.results_dir) # Merge all json files final_result = { @@ -42,15 +62,36 @@ def main(): with open(file, "r") as fi: task_result = json.load(fi) - for key, value in task_result["results"].items(): - final_result["results"][key] = value + match = RESULTS_REGEX.match(file.name) + assert match is not None + prefix = match.group(1) + datetime_string = match.group(2) + + if prefix == "eai": + results_key = "results" + elif prefix == "bs": + results_key = "table_results" + else: + raise ValueError(f"Unsupported key: {prefix}") + + for key, value in task_result[results_key].items(): + if key not in final_result["results"]: + final_result["results"][key] = { + datetime_string: value + } + else: + assert datetime_string not in final_result["results"][key] + final_result["results"][key][datetime_string] = value for key, value in task_result["versions"].items(): final_result["versions"][key] = value + # We sort dict, better for serialization + final_result = sort_dict(final_result) + # Save result with open(args.concatenate_output_file, "w") as fo: - json.dump(final_result, fo) + json.dump(final_result, fo, indent=2) pass From c99efd8a3933601c3b214163411476eeaf50324c Mon Sep 17 00:00:00 2001 From: Niklas Muennighoff Date: Fri, 15 Jul 2022 12:21:33 +0200 Subject: [PATCH 34/35] Update evaluation/results/opt/concatenate_all_results.py Co-authored-by: Thomas Wang <24695242+thomasw21@users.noreply.github.com> --- evaluation/results/opt/concatenate_all_results.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evaluation/results/opt/concatenate_all_results.py b/evaluation/results/opt/concatenate_all_results.py index 21f67424..dc2ff023 100644 --- a/evaluation/results/opt/concatenate_all_results.py +++ b/evaluation/results/opt/concatenate_all_results.py @@ -35,7 +35,7 @@ def get_all_files_that_match_results_in_folder(root_folder: Path, regex: Pattern def sort_dict(dictionary: Dict) -> Dict: results = {} - for key, value in sorted(dictionary.items(), key=lambda item: item[1]): + for key, value in sorted(dictionary.items(), key=lambda item: item[0]): new_value = value if isinstance(value, dict): From 3b701d0bce66a471c2a40a0ce069edbf195b5559 Mon Sep 17 00:00:00 2001 From: Thomas Wang <24695242+thomasw21@users.noreply.github.com> Date: Fri, 15 Jul 2022 22:26:09 +0200 Subject: [PATCH 35/35] Update evaluation/results/opt/concatenate_all_results.py Co-authored-by: Niklas Muennighoff --- evaluation/results/opt/concatenate_all_results.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evaluation/results/opt/concatenate_all_results.py b/evaluation/results/opt/concatenate_all_results.py index dc2ff023..4e8fe616 100644 --- a/evaluation/results/opt/concatenate_all_results.py +++ b/evaluation/results/opt/concatenate_all_results.py @@ -15,7 +15,7 @@ def get_args(): MODEL = "opt-175b-meg-ds" # MODEL = "global_step95000" RESULTS_REGEX = re.compile(rf"(eai|bs)_results_lm-eval_{MODEL}_(\d{4}-\d{2}-\d{2}-\d{2}-\d{2}-\d{2})_backup\.json") -def get_all_files_that_match_results_in_folder(root_folder: Path, regex: Pattern) -> List[Path]: +def get_all_files_that_match_results_in_folder(root_folder: Path) -> List[Path]: json_files = [] for folder in root_folder.iterdir(): if folder.is_dir():