From 69eae1846000884cbc231ef2f7d161ac32da559f Mon Sep 17 00:00:00 2001
From: thomasw21 <24695242+thomasw21@users.noreply.github.com>
Date: Mon, 4 Jul 2022 20:02:33 +0200
Subject: [PATCH 01/35] Get OPT specific code

---
 evaluation/results/opt/README.md              |   1 +
 ...nvert_transformers_checkpoint_to_meg_ds.py | 196 ++++++++++++++++++
 .../results/opt/run_opt_evaluation.slurm      | 121 +++++++++++
 3 files changed, 318 insertions(+)
 create mode 100644 evaluation/results/opt/README.md
 create mode 100644 evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
 create mode 100644 evaluation/results/opt/run_opt_evaluation.slurm

diff --git a/evaluation/results/opt/README.md b/evaluation/results/opt/README.md
new file mode 100644
index 00000000..5fb71237
--- /dev/null
+++ b/evaluation/results/opt/README.md
@@ -0,0 +1 @@
+Utilities requires to run OPT evaluation
diff --git a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
new file mode 100644
index 00000000..abb9803c
--- /dev/null
+++ b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
@@ -0,0 +1,196 @@
+import argparse
+import json
+import re, os
+from functools import partial
+from multiprocessing import Pool
+from typing import List, Optional, Dict
+
+import torch
+from tqdm import tqdm
+
+
+def get_args():
+    parser = argparse.ArgumentParser()
+    # Required parameters
+    parser.add_argument(
+        "--opt_checkpoint_path",
+        default=None,
+        type=str,
+        required=True,
+        help="Path to the transformers OPT checkpoint path.",
+    )
+    parser.add_argument(
+        "--opt_sharded_index_path",
+        default=None,
+        type=str,
+        required=True,
+        help="Path to the transformers OPT checkpoint metadata path.",
+    )
+    parser.add_argument(
+        "--megatron_dump_folder_path", default=None, type=str, required=True,
+        help="Path to the output Megatron-DS model."
+    )
+    parser.add_argument(
+        "--num-proc", default=1, type=int,
+    )
+    return parser.parse_args()
+
+
+def compute_meg_ds_weight_names(num_layers: int):
+    return {
+        "layer_01-model_00-model_states.pt": [
+            "word_embeddings.weight",
+            "position_embeddings.weight",
+        ],
+        **{
+            f"layer_{str(layer_id).zfill(2)}-model_00-model_states.pt": [
+                "input_layernorm.weight",
+                "input_layernorm.bias",
+                "self_attention.query_key_value.weight",
+                "self_attention.query_key_value.bias",
+                "self_attention.dense.weight",
+                "self_attention.dense.bias",
+                "post_attention_layernorm.weight",
+                "post_attention_layernorm.bias",
+                "mlp.dense_h_to_4h.weight",
+                "mlp.dense_h_to_4h.bias",
+                "mlp.dense_4h_to_h.weight",
+                "mlp.dense_4h_to_h.bias",
+            ]
+            for layer_id in range(3, num_layers + 3)
+        },
+        f"layer_{str(num_layers + 5).zfill(2)}-model_00-model_states.pt": [
+            "weight",
+            "bias"
+        ]
+    }
+
+NON_TRANSFORMERS_BLOCK_WEIGHTS = {
+    "word_embeddings.weight": "decoder.embed_tokens.weight",
+    "position_embeddings.weight": "decoder.embed_positions.weight",
+    "weight": "decoder.final_layer_norm.weight",
+    "bias": "decoder.final_layer_norm.bias"
+}
+TRANSFORMERS_BLOCK_WEIGHTS = {
+    "input_layernorm.weight": ["self_attn_layer_norm.weight"],
+    "input_layernorm.bias": ["self_attn_layer_norm.bias"],
+    "self_attention.query_key_value.weight": ["self_attn.q_proj.weight", "self_attn.k_proj.weight", "self_attn.v_proj.weight"],
+    "self_attention.query_key_value.bias": ["self_attn.q_proj.bias", "self_attn.k_proj.bias", "self_attn.v_proj.bias"],
+    "self_attention.dense.weight": ["self_attn.out_proj.weight"],
+    "self_attention.dense.bias": ["self_attn.out_proj.bias"],
+    "post_attention_layernorm.weight": ["final_layer_norm.weight"],
+    "post_attention_layernorm.bias": ["final_layer_norm.bias"],
+    "mlp.dense_h_to_4h.weight": ["fc1.weight"],
+    "mlp.dense_h_to_4h.bias": ["fc1.bias"],
+    "mlp.dense_4h_to_h.weight": ["fc2.weight"],
+    "mlp.dense_4h_to_h.bias": ["fc2.bias"]
+}
+def get_transformers_weight_names(meg_ds_weight: str, layer_id: Optional[int]) -> List[str]:
+    if layer_id is None:
+        return [NON_TRANSFORMERS_BLOCK_WEIGHTS[meg_ds_weight]]
+    else:
+        return [f"decoder.layers.{layer_id}.{tfrs_block_name}" for tfrs_block_name in TRANSFORMERS_BLOCK_WEIGHTS[meg_ds_weight]]
+
+def get_layer_id(meg_ds_filename: str, total_num_layers: int) -> Optional[int]:
+    layer_id = int(re.match(r"layer_(\d*)-model_00-model_states.pt", meg_ds_filename)[1]) - 3
+
+    if layer_id < 0:
+        return None
+
+    if layer_id >= total_num_layers:
+        return None
+
+    return layer_id
+
+def find_transformers_weights_and_save_meg_ds_weights(
+    meg_ds_filename: str,
+    meg_ds_weight_names: List[str],
+    opt_checkpoint_path: str,
+    megatron_dump_folder_path:str,
+    total_num_layers: int,
+    trfs_weight_map: Dict[str, str]
+):
+    layer_id = get_layer_id(meg_ds_filename, total_num_layers=total_num_layers)
+    trfs_weight_namess = {meg_ds_weight_name: get_transformers_weight_names(meg_ds_weight_name, layer_id=layer_id) for meg_ds_weight_name in meg_ds_weight_names}
+
+    # Find the path they live in.
+    trfs_filenames = set(trfs_weight_map[trfs_weight_name] for trfs_weight_names in trfs_weight_namess.values() for trfs_weight_name in trfs_weight_names)
+    trfs_filename_to_weights = {
+        trfs_filename: torch.load(os.path.join(opt_checkpoint_path, trfs_filename), map_location="cpu")
+        for trfs_filename in trfs_filenames
+    }
+
+    # query those weights
+    result = {
+        meg_ds_weight_name: [
+            trfs_filename_to_weights[trfs_weight_map[tfrs_weight_name]][tfrs_weight_name]
+            for tfrs_weight_name in tfrs_weight_names
+        ]
+        for meg_ds_weight_name, tfrs_weight_names in trfs_weight_namess.items()
+    }
+
+    # possibly concatenate
+    save_path = os.path.join(megatron_dump_folder_path, meg_ds_filename)
+    with open(save_path, "wb") as fo:
+        torch.save(
+            {key: torch.cat(value) for key, value in result.items()},
+            fo
+        )
+
+
+def convert_opt_checkpoint_to_megatron(
+    opt_checkpoint_path: str,
+    megatron_dump_folder_path: str,
+    opt_index_path: str,
+    num_proc: int
+):
+    # Get total number of layers
+    with open(opt_index_path, "r") as fi:
+        index_file = json.load(fi)["weight_map"]
+    # Compute total amount of layers
+    total_amount_of_layers = 0
+    for weight_name in index_file.keys():
+        match = re.match(r"decoder.layers.(\d*).*", weight_name)
+        if match is not None:
+            total_amount_of_layers = max(int(match[1]), total_amount_of_layers)
+    total_amount_of_layers += 1
+
+    # Given the total number of layers we can compute exactly each meg_ds params we need to find.
+    meg_ds_filename_to_meg_ds_weights = compute_meg_ds_weight_names(total_amount_of_layers)
+
+    # Given the needed weights we can query them from the transformers checkpoint
+    # We have to be smart about it and load a bin file once and get everything.
+    if num_proc == 1:
+        for meg_ds_filename, meg_ds_weight_names in tqdm(meg_ds_filename_to_meg_ds_weights.items()):
+            find_transformers_weights_and_save_meg_ds_weights(
+                meg_ds_filename=meg_ds_filename,
+                meg_ds_weight_names=meg_ds_weight_names,
+                opt_checkpoint_path=opt_checkpoint_path,
+                megatron_dump_folder_path=megatron_dump_folder_path,
+                total_num_layers=total_amount_of_layers,
+                trfs_weight_map=index_file
+            )
+    else:
+        with Pool(num_proc) as pool:
+            pool.starmap(
+                partial(
+                    find_transformers_weights_and_save_meg_ds_weights,
+                    opt_checkpoint_path=opt_checkpoint_path,
+                    megatron_dump_folder_path=megatron_dump_folder_path,
+                    total_num_layers=total_amount_of_layers,
+                    trfs_weight_map=index_file
+                ),
+                tqdm(meg_ds_filename_to_meg_ds_weights.items())
+            )
+
+def main():
+    args = get_args()
+    convert_opt_checkpoint_to_megatron(
+        opt_checkpoint_path=args.opt_checkpoint_path,
+        megatron_dump_folder_path=args.megatron_dump_folder_path,
+        opt_index_path=args.opt_sharded_index_path,
+        num_proc=args.num_proc
+    )
+
+if __name__ == "__main__":
+    main()
diff --git a/evaluation/results/opt/run_opt_evaluation.slurm b/evaluation/results/opt/run_opt_evaluation.slurm
new file mode 100644
index 00000000..b068b3c2
--- /dev/null
+++ b/evaluation/results/opt/run_opt_evaluation.slurm
@@ -0,0 +1,121 @@
+#!/bin/bash
+#SBATCH --job-name=eai-eval-opt
+#SBATCH --partition=gpu_p5
+#SBATCH --constraint=a100
+#SBATCH --reservation=hug
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=1          # crucial - only 1 task per dist per node!
+#SBATCH --cpus-per-task=64           # number of cores per tasks
+#SBATCH --hint=nomultithread         # we get physical cores not logical
+#SBATCH --gres=gpu:8                 # number of gpus
+#SBATCH --time 20:00:00              # maximum execution time (HH:MM:SS)
+#SBATCH --output=/gpfswork/rech/six/uty16tp/logs/%x-%j.out           # output file name
+#SBATCH --account=six@a100
+
+set -x -e
+
+source $six_ALL_CCFRWORK/start-py38-pt111
+conda activate thomas_lm_eval
+
+echo "START TIME: $(date)"
+
+
+CHECKPOINT_PATH=$six_ALL_CCFRSCRATCH/opt/opt-175b-meg-ds
+MEGATRON_DEEPSPEED_REPO=/gpfswork/rech/six/uty16tp/code/big_science/Megatron-DeepSpeed
+export HF_DATASETS_OFFLINE=1
+export TRANSFORMERS_OFFLINE=1
+
+export TRANSFORMERS_CACHE=$six_ALL_CCFRWORK/models
+export HF_DATASETS_CACHE=$six_ALL_CCFRWORK/datasets
+export HF_MODULES_CACHE=$six_ALL_CCFRWORK/modules
+export HF_METRICS_CACHE=$six_ALL_CCFRWORK/metrics
+
+cd $MEGATRON_DEEPSPEED_REPO
+
+# Make sure you use the slow version of the tokenizer.
+TOKENIZER_NAME_OR_PATH=bigscience/opt
+
+PP_SIZE=8
+TP_SIZE=1
+SEQ_LEN=2048
+
+# different from the training MICRO_BATCH_SIZE - no optim memory, so can do bigger BS
+# make as big as it can fit into gpu w/o OOM, but not too close to 100%
+EVAL_MICRO_BATCH_SIZE=1
+
+#dummy arguments to make megatron happy.
+MEGATRON_REQUIRED_ARGS=" \
+    --num-layers -1 \
+    --hidden-size -1 \
+    --num-attention-heads -1 \
+    --seq-length -1  \
+    --max-position-embeddings -1 \
+"
+
+
+ZERO_STAGE=0
+
+config_json="./ds_config.json"
+
+# Deepspeed figures out GAS dynamically from dynamic GBS via set_train_batch_size()
+cat <<EOT > $config_json
+{
+  "train_micro_batch_size_per_gpu": 1,
+  "train_batch_size": 1,
+  "gradient_clipping": 1.0,
+  "zero_optimization": {
+    "stage": $ZERO_STAGE
+  },
+  "bf16": {
+    "enabled": true
+  },
+  "steps_per_print": 2000,
+  "wall_clock_breakdown": false
+}
+EOT
+
+OPT_FOLDER=$WORK/opt
+
+CMD="./tasks/eval_harness/evaluate.py  \
+    --load $CHECKPOINT_PATH \
+    --results_path $OPT_FOLDER/eai_results.json \
+    --tensor-model-parallel-size $TP_SIZE  \
+    --pipeline-model-parallel-size $PP_SIZE \
+    --tokenizer-type PretrainedFromHF \
+    --tokenizer-name-or-path $TOKENIZER_NAME_OR_PATH \
+    --micro-batch-size $EVAL_MICRO_BATCH_SIZE \
+    --no-load-optim \
+    --relu \
+    --no-load-rng \
+    --fp16 \
+    --inference \
+    --seq-length $SEQ_LEN \
+    --task_list arc_challenge,arc_easy,boolq,copa,headqa,hellaswag,lambada,logiqa,mathqa,mc_taco,mrpc,multirc,openbookqa,piqa,prost,pubmedqa,qnli,qqp,race,rte,sciq,sst,triviaqa,webqs,wic,winogrande,wnli,wsc \
+    --deepspeed \
+    --deepspeed_config ds_config.json \
+    --intermed_results \
+    --adaptive_seq_len \
+    --micro_bs_multiplier 4 \
+    $MEGATRON_REQUIRED_ARGS \
+    "
+
+GPUS_PER_NODE=8
+NNODES=$SLURM_NNODES
+MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1)
+MASTER_PORT=6000
+export LAUNCHER="python -u -m torch.distributed.run \
+    --nproc_per_node $GPUS_PER_NODE \
+    --nnodes $NNODES \
+    --rdzv_endpoint $MASTER_ADDR:$MASTER_PORT \
+    --rdzv_backend c10d \
+    --max_restarts 0 \
+    --tee 3 \
+    "
+
+export CUDA_LAUNCH_BLOCKING=1
+
+echo $LAUNCHER $CMD
+
+export PYTHONPATH=$MEGATRON_DEEPSPEED_REPO
+
+$LAUNCHER $CMD 2>&1 | tee $OPT_FOLDER/eval-harness.log

From ea5fe63d105110063c5fa4edbd3895d270db0271 Mon Sep 17 00:00:00 2001
From: thomasw21 <24695242+thomasw21@users.noreply.github.com>
Date: Mon, 4 Jul 2022 21:41:47 +0200
Subject: [PATCH 02/35] Run inference for opt

---
 .../results/opt/run_opt_evaluation.slurm      | 37 ++++++++++++-------
 1 file changed, 24 insertions(+), 13 deletions(-)

diff --git a/evaluation/results/opt/run_opt_evaluation.slurm b/evaluation/results/opt/run_opt_evaluation.slurm
index b068b3c2..b0d3daf3 100644
--- a/evaluation/results/opt/run_opt_evaluation.slurm
+++ b/evaluation/results/opt/run_opt_evaluation.slurm
@@ -33,23 +33,39 @@ export HF_METRICS_CACHE=$six_ALL_CCFRWORK/metrics
 cd $MEGATRON_DEEPSPEED_REPO
 
 # Make sure you use the slow version of the tokenizer.
-TOKENIZER_NAME_OR_PATH=bigscience/opt
+# Same tokenizer for 125m and 175b
+TOKENIZER_NAME_OR_PATH=/gpfsscratch/rech/six/commun/opt/opt-125m
 
 PP_SIZE=8
 TP_SIZE=1
+
+NHIDDEN=12288
+NLAYERS=96
+NHEADS=96
 SEQ_LEN=2048
 
 # different from the training MICRO_BATCH_SIZE - no optim memory, so can do bigger BS
 # make as big as it can fit into gpu w/o OOM, but not too close to 100%
 EVAL_MICRO_BATCH_SIZE=1
 
-#dummy arguments to make megatron happy.
-MEGATRON_REQUIRED_ARGS=" \
-    --num-layers -1 \
-    --hidden-size -1 \
-    --num-attention-heads -1 \
-    --seq-length -1  \
-    --max-position-embeddings -1 \
+MEGATRON_REQUIRED_ARGS="
+    --pp-partition-method 'type:transformer' \
+    --num-layers $NLAYERS \
+    --hidden-size $NHIDDEN \
+    --num-attention-heads $NHEADS \
+    --seq-length $SEQ_LEN \
+    --max-position-embeddings $SEQ_LEN \
+    --tokenizer-type PretrainedFromHF \
+    --tokenizer-name-or-path $TOKENIZER_NAME_OR_PATH \
+    --fp16 \
+    --relu \
+    --bf16 \
+    --seed 42 \
+    --position-embedding-type alibi \
+    --checkpoint-activations \
+    --abort-on-unmet-fused-kernel-constraints \
+    --kill-switch-path $KILL_SWITCH_PATH \
+    --pad-vocab-size-to 250880 \
 "
 
 
@@ -81,15 +97,10 @@ CMD="./tasks/eval_harness/evaluate.py  \
     --results_path $OPT_FOLDER/eai_results.json \
     --tensor-model-parallel-size $TP_SIZE  \
     --pipeline-model-parallel-size $PP_SIZE \
-    --tokenizer-type PretrainedFromHF \
-    --tokenizer-name-or-path $TOKENIZER_NAME_OR_PATH \
     --micro-batch-size $EVAL_MICRO_BATCH_SIZE \
     --no-load-optim \
-    --relu \
     --no-load-rng \
-    --fp16 \
     --inference \
-    --seq-length $SEQ_LEN \
     --task_list arc_challenge,arc_easy,boolq,copa,headqa,hellaswag,lambada,logiqa,mathqa,mc_taco,mrpc,multirc,openbookqa,piqa,prost,pubmedqa,qnli,qqp,race,rte,sciq,sst,triviaqa,webqs,wic,winogrande,wnli,wsc \
     --deepspeed \
     --deepspeed_config ds_config.json \

From 5539293a31486bdad0de56ea09ed80492daded64 Mon Sep 17 00:00:00 2001
From: thomasw21 <24695242+thomasw21@users.noreply.github.com>
Date: Mon, 4 Jul 2022 21:47:46 +0200
Subject: [PATCH 03/35] Run inference for opt

---
 .../results/opt/run_opt_evaluation_125m.slurm | 128 ++++++++++++++++++
 ...on.slurm => run_opt_evaluation_175b.slurm} |  17 ++-
 2 files changed, 136 insertions(+), 9 deletions(-)
 create mode 100644 evaluation/results/opt/run_opt_evaluation_125m.slurm
 rename evaluation/results/opt/{run_opt_evaluation.slurm => run_opt_evaluation_175b.slurm} (92%)

diff --git a/evaluation/results/opt/run_opt_evaluation_125m.slurm b/evaluation/results/opt/run_opt_evaluation_125m.slurm
new file mode 100644
index 00000000..627dc58e
--- /dev/null
+++ b/evaluation/results/opt/run_opt_evaluation_125m.slurm
@@ -0,0 +1,128 @@
+#!/bin/bash
+#SBATCH --job-name=eai-eval-opt-125m
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=1
+#SBATCH --cpus-per-task=10
+#SBATCH --hint=nomultithread
+#SBATCH --gres=gpu:1
+#SBATCH --time 20:00:00
+#SBATCH --output=/gpfswork/rech/six/uty16tp/logs/%x-%j.out
+#SBATCH --account=six@v100
+
+set -x -e
+
+source $six_ALL_CCFRWORK/start-py38-pt111
+conda activate thomas_lm_eval
+
+echo "START TIME: $(date)"
+
+
+CHECKPOINT_PATH=$six_ALL_CCFRSCRATCH/opt/opt-125m-meg-ds
+MEGATRON_DEEPSPEED_REPO=/gpfswork/rech/six/uty16tp/code/big_science/Megatron-DeepSpeed
+export HF_DATASETS_OFFLINE=1
+export TRANSFORMERS_OFFLINE=1
+
+export TRANSFORMERS_CACHE=$six_ALL_CCFRWORK/models
+export HF_DATASETS_CACHE=$six_ALL_CCFRWORK/datasets
+export HF_MODULES_CACHE=$six_ALL_CCFRWORK/modules
+export HF_METRICS_CACHE=$six_ALL_CCFRWORK/metrics
+
+cd $MEGATRON_DEEPSPEED_REPO
+
+# Make sure you use the slow version of the tokenizer.
+# Same tokenizer for 125m and 175b
+TOKENIZER_NAME_OR_PATH=/gpfsscratch/rech/six/commun/opt/opt-125m
+
+PP_SIZE=1
+TP_SIZE=1
+
+NHIDDEN=768
+NLAYERS=12
+NHEADS=12
+SEQ_LEN=2048
+
+# different from the training MICRO_BATCH_SIZE - no optim memory, so can do bigger BS
+# make as big as it can fit into gpu w/o OOM, but not too close to 100%
+EVAL_MICRO_BATCH_SIZE=1
+
+MEGATRON_REQUIRED_ARGS="
+    --pp-partition-method 'type:transformer' \
+    --num-layers $NLAYERS \
+    --hidden-size $NHIDDEN \
+    --num-attention-heads $NHEADS \
+    --seq-length $SEQ_LEN \
+    --max-position-embeddings $SEQ_LEN \
+    --tokenizer-type PretrainedFromHF \
+    --tokenizer-name-or-path $TOKENIZER_NAME_OR_PATH \
+    --fp16 \
+    --relu \
+    --seed 42 \
+"
+
+
+ZERO_STAGE=0
+
+config_json="./ds_config.json"
+
+# Deepspeed figures out GAS dynamically from dynamic GBS via set_train_batch_size()
+cat <<EOT > $config_json
+{
+  "train_micro_batch_size_per_gpu": 1,
+  "train_batch_size": 1,
+  "gradient_clipping": 1.0,
+  "zero_optimization": {
+    "stage": $ZERO_STAGE
+  },
+  "fp16": {
+    "enabled": true,
+    "loss_scale": 0,
+    "loss_scale_window": 500,
+    "hysteresis": 2,
+    "min_loss_scale": 1,
+    "initial_scale_power": 12
+  },
+  "steps_per_print": 2000,
+  "wall_clock_breakdown": false
+}
+EOT
+
+OPT_FOLDER=$WORK/opt
+
+CMD="./tasks/eval_harness/evaluate.py  \
+    --load $CHECKPOINT_PATH \
+    --results_path $OPT_FOLDER/eai_results.json \
+    --tensor-model-parallel-size $TP_SIZE  \
+    --pipeline-model-parallel-size $PP_SIZE \
+    --micro-batch-size $EVAL_MICRO_BATCH_SIZE \
+    --no-load-optim \
+    --no-load-rng \
+    --inference \
+    --task_list arc_challenge,arc_easy,boolq,copa,headqa,hellaswag,lambada,logiqa,mathqa,mc_taco,mrpc,multirc,openbookqa,piqa,prost,pubmedqa,qnli,qqp,race,rte,sciq,sst,triviaqa,webqs,wic,winogrande,wnli,wsc \
+    --deepspeed \
+    --deepspeed_config ds_config.json \
+    --intermed_results \
+    --adaptive_seq_len \
+    --micro_bs_multiplier 4 \
+    $MEGATRON_REQUIRED_ARGS \
+    "
+
+GPUS_PER_NODE=1
+NNODES=$SLURM_NNODES
+MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1)
+MASTER_PORT=6000
+export LAUNCHER="python -u -m torch.distributed.run \
+    --nproc_per_node $GPUS_PER_NODE \
+    --nnodes $NNODES \
+    --rdzv_endpoint $MASTER_ADDR:$MASTER_PORT \
+    --rdzv_backend c10d \
+    --max_restarts 0 \
+    --tee 3 \
+    "
+
+export CUDA_LAUNCH_BLOCKING=1
+
+echo $LAUNCHER $CMD
+
+export PYTHONPATH=$MEGATRON_DEEPSPEED_REPO
+
+$LAUNCHER $CMD 2>&1 | tee $OPT_FOLDER/eval-harness.log
diff --git a/evaluation/results/opt/run_opt_evaluation.slurm b/evaluation/results/opt/run_opt_evaluation_175b.slurm
similarity index 92%
rename from evaluation/results/opt/run_opt_evaluation.slurm
rename to evaluation/results/opt/run_opt_evaluation_175b.slurm
index b0d3daf3..d6bf0215 100644
--- a/evaluation/results/opt/run_opt_evaluation.slurm
+++ b/evaluation/results/opt/run_opt_evaluation_175b.slurm
@@ -1,5 +1,5 @@
 #!/bin/bash
-#SBATCH --job-name=eai-eval-opt
+#SBATCH --job-name=eai-eval-opt-175b
 #SBATCH --partition=gpu_p5
 #SBATCH --constraint=a100
 #SBATCH --reservation=hug
@@ -59,13 +59,7 @@ MEGATRON_REQUIRED_ARGS="
     --tokenizer-name-or-path $TOKENIZER_NAME_OR_PATH \
     --fp16 \
     --relu \
-    --bf16 \
     --seed 42 \
-    --position-embedding-type alibi \
-    --checkpoint-activations \
-    --abort-on-unmet-fused-kernel-constraints \
-    --kill-switch-path $KILL_SWITCH_PATH \
-    --pad-vocab-size-to 250880 \
 "
 
 
@@ -82,8 +76,13 @@ cat <<EOT > $config_json
   "zero_optimization": {
     "stage": $ZERO_STAGE
   },
-  "bf16": {
-    "enabled": true
+  "fp16": {
+    "enabled": true,
+    "loss_scale": 0,
+    "loss_scale_window": 500,
+    "hysteresis": 2,
+    "min_loss_scale": 1,
+    "initial_scale_power": 12
   },
   "steps_per_print": 2000,
   "wall_clock_breakdown": false

From 3d54dd9ca509c738a82919e76e3b19e74ea98c5e Mon Sep 17 00:00:00 2001
From: thomasw21 <24695242+thomasw21@users.noreply.github.com>
Date: Mon, 4 Jul 2022 23:20:12 +0200
Subject: [PATCH 04/35] Got it running

---
 .../opt/convert_transformers_checkpoint_to_meg_ds.py      | 5 ++++-
 evaluation/results/opt/run_opt_evaluation_125m.slurm      | 8 +++++---
 evaluation/results/opt/run_opt_evaluation_175b.slurm      | 8 +++++---
 3 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
index abb9803c..be8fde9b 100644
--- a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
+++ b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
@@ -59,7 +59,7 @@ def compute_meg_ds_weight_names(num_layers: int):
             ]
             for layer_id in range(3, num_layers + 3)
         },
-        f"layer_{str(num_layers + 5).zfill(2)}-model_00-model_states.pt": [
+        f"layer_{str(num_layers + 4).zfill(2)}-model_00-model_states.pt": [
             "weight",
             "bias"
         ]
@@ -183,6 +183,9 @@ def convert_opt_checkpoint_to_megatron(
                 tqdm(meg_ds_filename_to_meg_ds_weights.items())
             )
 
+    # Create dummy mp_rank_00_model_states.pt
+    torch.save({"mp_world_size": 1, "module": None, "dp_world_size": 1}, os.path.join(megatron_dump_folder_path, "mp_rank_00_model_states.pt"))
+
 def main():
     args = get_args()
     convert_opt_checkpoint_to_megatron(
diff --git a/evaluation/results/opt/run_opt_evaluation_125m.slurm b/evaluation/results/opt/run_opt_evaluation_125m.slurm
index 627dc58e..0ab36bb6 100644
--- a/evaluation/results/opt/run_opt_evaluation_125m.slurm
+++ b/evaluation/results/opt/run_opt_evaluation_125m.slurm
@@ -40,23 +40,25 @@ NHIDDEN=768
 NLAYERS=12
 NHEADS=12
 SEQ_LEN=2048
+MAX_POSITION_EMBEDDINGS=2050
 
 # different from the training MICRO_BATCH_SIZE - no optim memory, so can do bigger BS
 # make as big as it can fit into gpu w/o OOM, but not too close to 100%
 EVAL_MICRO_BATCH_SIZE=1
 
 MEGATRON_REQUIRED_ARGS="
-    --pp-partition-method 'type:transformer' \
     --num-layers $NLAYERS \
     --hidden-size $NHIDDEN \
     --num-attention-heads $NHEADS \
     --seq-length $SEQ_LEN \
-    --max-position-embeddings $SEQ_LEN \
+    --max-position-embeddings $MAX_POSITION_EMBEDDINGS \
     --tokenizer-type PretrainedFromHF \
     --tokenizer-name-or-path $TOKENIZER_NAME_OR_PATH \
     --fp16 \
     --relu \
     --seed 42 \
+    --pad-vocab-size-to 50272 \
+    --make-vocab-size-divisible-by 1\
 "
 
 
@@ -86,7 +88,7 @@ cat <<EOT > $config_json
 }
 EOT
 
-OPT_FOLDER=$WORK/opt
+OPT_FOLDER=$WORK/opt/opt-125m
 
 CMD="./tasks/eval_harness/evaluate.py  \
     --load $CHECKPOINT_PATH \
diff --git a/evaluation/results/opt/run_opt_evaluation_175b.slurm b/evaluation/results/opt/run_opt_evaluation_175b.slurm
index d6bf0215..a2d40139 100644
--- a/evaluation/results/opt/run_opt_evaluation_175b.slurm
+++ b/evaluation/results/opt/run_opt_evaluation_175b.slurm
@@ -43,23 +43,25 @@ NHIDDEN=12288
 NLAYERS=96
 NHEADS=96
 SEQ_LEN=2048
+MAX_POSITION_EMBEDDINGS=2050
 
 # different from the training MICRO_BATCH_SIZE - no optim memory, so can do bigger BS
 # make as big as it can fit into gpu w/o OOM, but not too close to 100%
 EVAL_MICRO_BATCH_SIZE=1
 
 MEGATRON_REQUIRED_ARGS="
-    --pp-partition-method 'type:transformer' \
     --num-layers $NLAYERS \
     --hidden-size $NHIDDEN \
     --num-attention-heads $NHEADS \
     --seq-length $SEQ_LEN \
-    --max-position-embeddings $SEQ_LEN \
+    --max-position-embeddings $MAX_POSITION_EMBEDDINGS \
     --tokenizer-type PretrainedFromHF \
     --tokenizer-name-or-path $TOKENIZER_NAME_OR_PATH \
     --fp16 \
     --relu \
     --seed 42 \
+    --pad-vocab-size-to 50272 \
+    --make-vocab-size-divisible-by 1\
 "
 
 
@@ -89,7 +91,7 @@ cat <<EOT > $config_json
 }
 EOT
 
-OPT_FOLDER=$WORK/opt
+OPT_FOLDER=$WORK/opt/opt-175m
 
 CMD="./tasks/eval_harness/evaluate.py  \
     --load $CHECKPOINT_PATH \

From e2bf1b90a3bf367ca533843dc4270b858bc0999b Mon Sep 17 00:00:00 2001
From: thomasw21 <24695242+thomasw21@users.noreply.github.com>
Date: Tue, 5 Jul 2022 00:50:25 +0200
Subject: [PATCH 05/35] Turns out meg-ds has a weird say of merging qkv

---
 ...nvert_transformers_checkpoint_to_meg_ds.py | 30 +++++++++++++------
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
index be8fde9b..0aa7e599 100644
--- a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
+++ b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
@@ -108,6 +108,8 @@ def find_transformers_weights_and_save_meg_ds_weights(
     opt_checkpoint_path: str,
     megatron_dump_folder_path:str,
     total_num_layers: int,
+    num_heads: int,
+    hidden_size: int,
     trfs_weight_map: Dict[str, str]
 ):
     layer_id = get_layer_id(meg_ds_filename, total_num_layers=total_num_layers)
@@ -132,8 +134,15 @@ def find_transformers_weights_and_save_meg_ds_weights(
     # possibly concatenate
     save_path = os.path.join(megatron_dump_folder_path, meg_ds_filename)
     with open(save_path, "wb") as fo:
+        # qkv are mixed s.t. [q1 k1 v1 q2 k2 v2 ...] with (1,2..) being head_id
         torch.save(
-            {key: torch.cat(value) for key, value in result.items()},
+            {
+                key: torch.cat(
+                    value.view(num_heads, 1, hidden_size//num_heads, hidden_size),
+                    dim=1
+                ).resize(3 * hidden_size, hidden_size)
+                for key, value in result.items()
+            },
             fo
         )
 
@@ -148,12 +157,11 @@ def convert_opt_checkpoint_to_megatron(
     with open(opt_index_path, "r") as fi:
         index_file = json.load(fi)["weight_map"]
     # Compute total amount of layers
-    total_amount_of_layers = 0
-    for weight_name in index_file.keys():
-        match = re.match(r"decoder.layers.(\d*).*", weight_name)
-        if match is not None:
-            total_amount_of_layers = max(int(match[1]), total_amount_of_layers)
-    total_amount_of_layers += 1
+    with open(os.path.join(opt_checkpoint_path, "config.json"), "r") as fi:
+        config = json.load(fi)
+    total_amount_of_layers = config["num_hidden_layers"]
+    num_heads = config["num_attention_heads"]
+    hidden_size = config["hidden_size"]
 
     # Given the total number of layers we can compute exactly each meg_ds params we need to find.
     meg_ds_filename_to_meg_ds_weights = compute_meg_ds_weight_names(total_amount_of_layers)
@@ -168,7 +176,9 @@ def convert_opt_checkpoint_to_megatron(
                 opt_checkpoint_path=opt_checkpoint_path,
                 megatron_dump_folder_path=megatron_dump_folder_path,
                 total_num_layers=total_amount_of_layers,
-                trfs_weight_map=index_file
+                trfs_weight_map=index_file,
+                num_heads=num_heads,
+                hidden_size=hidden_size
             )
     else:
         with Pool(num_proc) as pool:
@@ -178,7 +188,9 @@ def convert_opt_checkpoint_to_megatron(
                     opt_checkpoint_path=opt_checkpoint_path,
                     megatron_dump_folder_path=megatron_dump_folder_path,
                     total_num_layers=total_amount_of_layers,
-                    trfs_weight_map=index_file
+                    trfs_weight_map=index_file,
+                    num_heads=num_heads,
+                    hidden_size=hidden_size
                 ),
                 tqdm(meg_ds_filename_to_meg_ds_weights.items())
             )

From bbd3b5d5ee516e9d9fc4e5b6fb5fa6693584e0ef Mon Sep 17 00:00:00 2001
From: thomasw21 <24695242+thomasw21@users.noreply.github.com>
Date: Tue, 5 Jul 2022 00:54:42 +0200
Subject: [PATCH 06/35] Woops

---
 .../opt/convert_transformers_checkpoint_to_meg_ds.py       | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
index 0aa7e599..6f02dcf1 100644
--- a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
+++ b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
@@ -138,10 +138,13 @@ def find_transformers_weights_and_save_meg_ds_weights(
         torch.save(
             {
                 key: torch.cat(
-                    value.view(num_heads, 1, hidden_size//num_heads, hidden_size),
+                    [
+                        value.view(num_heads, 1, hidden_size//num_heads, hidden_size)
+                        for value in values
+                    ],
                     dim=1
                 ).resize(3 * hidden_size, hidden_size)
-                for key, value in result.items()
+                for key, values in result.items()
             },
             fo
         )

From f2d77a3ec940fe8d8d637307572ca96ccf1098c7 Mon Sep 17 00:00:00 2001
From: thomasw21 <24695242+thomasw21@users.noreply.github.com>
Date: Tue, 5 Jul 2022 00:57:42 +0200
Subject: [PATCH 07/35] Woops

---
 ...nvert_transformers_checkpoint_to_meg_ds.py | 21 ++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
index 6f02dcf1..14f4810d 100644
--- a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
+++ b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
@@ -102,6 +102,19 @@ def get_layer_id(meg_ds_filename: str, total_num_layers: int) -> Optional[int]:
 
     return layer_id
 
+def merge_layers(layers, num_heads: int, hidden_size: int):
+    if len(layers):
+        return layers[0]
+    else:
+        # We merge QKV
+        return torch.cat(
+            [
+                layer.view(num_heads, 1, hidden_size // num_heads, hidden_size)
+                for layer in layers
+            ],
+            dim=1
+        ).resize(3 * hidden_size, hidden_size)
+
 def find_transformers_weights_and_save_meg_ds_weights(
     meg_ds_filename: str,
     meg_ds_weight_names: List[str],
@@ -137,13 +150,7 @@ def find_transformers_weights_and_save_meg_ds_weights(
         # qkv are mixed s.t. [q1 k1 v1 q2 k2 v2 ...] with (1,2..) being head_id
         torch.save(
             {
-                key: torch.cat(
-                    [
-                        value.view(num_heads, 1, hidden_size//num_heads, hidden_size)
-                        for value in values
-                    ],
-                    dim=1
-                ).resize(3 * hidden_size, hidden_size)
+                key: merge_layers(values, num_heads=num_heads, hidden_size=hidden_size)
                 for key, values in result.items()
             },
             fo

From a852d048108d09a256d0f8e4201c1c1414f36def Mon Sep 17 00:00:00 2001
From: thomasw21 <24695242+thomasw21@users.noreply.github.com>
Date: Tue, 5 Jul 2022 01:02:11 +0200
Subject: [PATCH 08/35] Woops

---
 ...convert_transformers_checkpoint_to_meg_ds.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
index 14f4810d..2a8fb9b7 100644
--- a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
+++ b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
@@ -107,13 +107,16 @@ def merge_layers(layers, num_heads: int, hidden_size: int):
         return layers[0]
     else:
         # We merge QKV
-        return torch.cat(
-            [
-                layer.view(num_heads, 1, hidden_size // num_heads, hidden_size)
-                for layer in layers
-            ],
-            dim=1
-        ).resize(3 * hidden_size, hidden_size)
+        return torch.reshape(
+            torch.cat(
+                [
+                    layer.view(num_heads, 1, hidden_size // num_heads, hidden_size)
+                    for layer in layers
+                ],
+                dim=1
+            ),
+            (3 * hidden_size, hidden_size)
+        )
 
 def find_transformers_weights_and_save_meg_ds_weights(
     meg_ds_filename: str,

From e1d131ed8d8060e68b208d92ac0042382af665b4 Mon Sep 17 00:00:00 2001
From: thomasw21 <24695242+thomasw21@users.noreply.github.com>
Date: Tue, 5 Jul 2022 01:04:32 +0200
Subject: [PATCH 09/35] Woops

---
 .../results/opt/convert_transformers_checkpoint_to_meg_ds.py    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
index 2a8fb9b7..fe88418b 100644
--- a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
+++ b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
@@ -103,7 +103,7 @@ def get_layer_id(meg_ds_filename: str, total_num_layers: int) -> Optional[int]:
     return layer_id
 
 def merge_layers(layers, num_heads: int, hidden_size: int):
-    if len(layers):
+    if len(layers) == 0:
         return layers[0]
     else:
         # We merge QKV

From fa9c4f5ba618d0b741b7e4588bbd94fec31f5d00 Mon Sep 17 00:00:00 2001
From: thomasw21 <24695242+thomasw21@users.noreply.github.com>
Date: Tue, 5 Jul 2022 01:05:07 +0200
Subject: [PATCH 10/35] Woops

---
 .../results/opt/convert_transformers_checkpoint_to_meg_ds.py    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
index fe88418b..29d647b0 100644
--- a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
+++ b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
@@ -103,7 +103,7 @@ def get_layer_id(meg_ds_filename: str, total_num_layers: int) -> Optional[int]:
     return layer_id
 
 def merge_layers(layers, num_heads: int, hidden_size: int):
-    if len(layers) == 0:
+    if len(layers) == 1:
         return layers[0]
     else:
         # We merge QKV

From 65cef670601f79d8d6f8fb187061b67f77dc0298 Mon Sep 17 00:00:00 2001
From: thomasw21 <24695242+thomasw21@users.noreply.github.com>
Date: Tue, 5 Jul 2022 01:07:42 +0200
Subject: [PATCH 11/35] Woops

---
 ...nvert_transformers_checkpoint_to_meg_ds.py | 34 +++++++++++++------
 1 file changed, 24 insertions(+), 10 deletions(-)

diff --git a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
index 29d647b0..2fcc3972 100644
--- a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
+++ b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
@@ -107,16 +107,30 @@ def merge_layers(layers, num_heads: int, hidden_size: int):
         return layers[0]
     else:
         # We merge QKV
-        return torch.reshape(
-            torch.cat(
-                [
-                    layer.view(num_heads, 1, hidden_size // num_heads, hidden_size)
-                    for layer in layers
-                ],
-                dim=1
-            ),
-            (3 * hidden_size, hidden_size)
-        )
+        if len(layers[0].shape) == 1:
+            # bias
+            return torch.reshape(
+                torch.cat(
+                    [
+                        layer.view(num_heads, 1, hidden_size // num_heads)
+                        for layer in layers
+                    ],
+                    dim=1
+                ),
+                (3 * hidden_size, )
+            )
+        else:
+            #weight
+            return torch.reshape(
+                torch.cat(
+                    [
+                        layer.view(num_heads, 1, hidden_size // num_heads, hidden_size)
+                        for layer in layers
+                    ],
+                    dim=1
+                ),
+                (3 * hidden_size, hidden_size)
+            )
 
 def find_transformers_weights_and_save_meg_ds_weights(
     meg_ds_filename: str,

From 8ddfba990f28ffe2ac20af6bc02379ab6927aca3 Mon Sep 17 00:00:00 2001
From: thomasw21 <24695242+thomasw21@users.noreply.github.com>
Date: Tue, 5 Jul 2022 01:24:49 +0200
Subject: [PATCH 12/35] Turns out this is the last combination

---
 .../results/opt/convert_transformers_checkpoint_to_meg_ds.py  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
index 2fcc3972..dab032d6 100644
--- a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
+++ b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
@@ -112,7 +112,7 @@ def merge_layers(layers, num_heads: int, hidden_size: int):
             return torch.reshape(
                 torch.cat(
                     [
-                        layer.view(num_heads, 1, hidden_size // num_heads)
+                        layer.view(num_heads, hidden_size // num_heads, 1)
                         for layer in layers
                     ],
                     dim=1
@@ -124,7 +124,7 @@ def merge_layers(layers, num_heads: int, hidden_size: int):
             return torch.reshape(
                 torch.cat(
                     [
-                        layer.view(num_heads, 1, hidden_size // num_heads, hidden_size)
+                        layer.view(num_heads, hidden_size // num_heads, 1, hidden_size)
                         for layer in layers
                     ],
                     dim=1

From cc465ca334f0cd12c09658433dda561748d51a63 Mon Sep 17 00:00:00 2001
From: thomasw21 <24695242+thomasw21@users.noreply.github.com>
Date: Tue, 5 Jul 2022 01:45:19 +0200
Subject: [PATCH 13/35] Hope this is better

---
 .../results/opt/convert_transformers_checkpoint_to_meg_ds.py  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
index dab032d6..2fcc3972 100644
--- a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
+++ b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
@@ -112,7 +112,7 @@ def merge_layers(layers, num_heads: int, hidden_size: int):
             return torch.reshape(
                 torch.cat(
                     [
-                        layer.view(num_heads, hidden_size // num_heads, 1)
+                        layer.view(num_heads, 1, hidden_size // num_heads)
                         for layer in layers
                     ],
                     dim=1
@@ -124,7 +124,7 @@ def merge_layers(layers, num_heads: int, hidden_size: int):
             return torch.reshape(
                 torch.cat(
                     [
-                        layer.view(num_heads, hidden_size // num_heads, 1, hidden_size)
+                        layer.view(num_heads, 1, hidden_size // num_heads, hidden_size)
                         for layer in layers
                     ],
                     dim=1

From 2c9bea5542f348baa20c6efb8c9c851eaab5ae20 Mon Sep 17 00:00:00 2001
From: thomasw21 <24695242+thomasw21@users.noreply.github.com>
Date: Tue, 5 Jul 2022 02:52:59 +0200
Subject: [PATCH 14/35] Update conversion script

---
 .../opt/convert_transformers_checkpoint_to_meg_ds.py        | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
index 2fcc3972..62d083fa 100644
--- a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
+++ b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
@@ -62,6 +62,9 @@ def compute_meg_ds_weight_names(num_layers: int):
         f"layer_{str(num_layers + 4).zfill(2)}-model_00-model_states.pt": [
             "weight",
             "bias"
+        ],
+        f"layer_{str(num_layers + 5).zfill(2)}-model_00-model_states.pt": [
+            "lm_head.weight",
         ]
     }
 
@@ -69,7 +72,8 @@ def compute_meg_ds_weight_names(num_layers: int):
     "word_embeddings.weight": "decoder.embed_tokens.weight",
     "position_embeddings.weight": "decoder.embed_positions.weight",
     "weight": "decoder.final_layer_norm.weight",
-    "bias": "decoder.final_layer_norm.bias"
+    "bias": "decoder.final_layer_norm.bias",
+    "lm_head.weight": "lm_head.weight"
 }
 TRANSFORMERS_BLOCK_WEIGHTS = {
     "input_layernorm.weight": ["self_attn_layer_norm.weight"],

From 357f22ffa179ca5f4f0014d8d87bb3297f5793e2 Mon Sep 17 00:00:00 2001
From: thomasw21 <24695242+thomasw21@users.noreply.github.com>
Date: Tue, 5 Jul 2022 03:05:57 +0200
Subject: [PATCH 15/35] Revert "Update conversion script"

This reverts commit 2c9bea5542f348baa20c6efb8c9c851eaab5ae20.
---
 .../opt/convert_transformers_checkpoint_to_meg_ds.py        | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
index 62d083fa..2fcc3972 100644
--- a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
+++ b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
@@ -62,9 +62,6 @@ def compute_meg_ds_weight_names(num_layers: int):
         f"layer_{str(num_layers + 4).zfill(2)}-model_00-model_states.pt": [
             "weight",
             "bias"
-        ],
-        f"layer_{str(num_layers + 5).zfill(2)}-model_00-model_states.pt": [
-            "lm_head.weight",
         ]
     }
 
@@ -72,8 +69,7 @@ def compute_meg_ds_weight_names(num_layers: int):
     "word_embeddings.weight": "decoder.embed_tokens.weight",
     "position_embeddings.weight": "decoder.embed_positions.weight",
     "weight": "decoder.final_layer_norm.weight",
-    "bias": "decoder.final_layer_norm.bias",
-    "lm_head.weight": "lm_head.weight"
+    "bias": "decoder.final_layer_norm.bias"
 }
 TRANSFORMERS_BLOCK_WEIGHTS = {
     "input_layernorm.weight": ["self_attn_layer_norm.weight"],

From 80016c2b14593600f2236c8320f8e20dbe96f45f Mon Sep 17 00:00:00 2001
From: thomasw21 <24695242+thomasw21@users.noreply.github.com>
Date: Tue, 5 Jul 2022 11:27:27 +0200
Subject: [PATCH 16/35] Add checkpoint version

---
 .../opt/convert_transformers_checkpoint_to_meg_ds.py   | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
index 2fcc3972..aa497086 100644
--- a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
+++ b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
@@ -223,7 +223,15 @@ def convert_opt_checkpoint_to_megatron(
             )
 
     # Create dummy mp_rank_00_model_states.pt
-    torch.save({"mp_world_size": 1, "module": None, "dp_world_size": 1}, os.path.join(megatron_dump_folder_path, "mp_rank_00_model_states.pt"))
+    torch.save(
+        {
+            "mp_world_size": 1,
+            "module": None,
+            "dp_world_size": 1,
+            "checkpoint_version": 3
+        },
+        os.path.join(megatron_dump_folder_path, "mp_rank_00_model_states.pt")
+    )
 
 def main():
     args = get_args()

From ebb84d8fcf8b9d1e721e9b3482423e9e9394d216 Mon Sep 17 00:00:00 2001
From: thomasw21 <24695242+thomasw21@users.noreply.github.com>
Date: Tue, 5 Jul 2022 11:55:44 +0200
Subject: [PATCH 17/35] Add iteration

---
 .../results/opt/convert_transformers_checkpoint_to_meg_ds.py   | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
index aa497086..b49dae9e 100644
--- a/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
+++ b/evaluation/results/opt/convert_transformers_checkpoint_to_meg_ds.py
@@ -228,7 +228,8 @@ def convert_opt_checkpoint_to_megatron(
             "mp_world_size": 1,
             "module": None,
             "dp_world_size": 1,
-            "checkpoint_version": 3
+            "checkpoint_version": 3,
+            "iteration": 0
         },
         os.path.join(megatron_dump_folder_path, "mp_rank_00_model_states.pt")
     )

From b213b1af8a390483524276b71599e41f1be61f2e Mon Sep 17 00:00:00 2001
From: thomasw21 <24695242+thomasw21@users.noreply.github.com>
Date: Tue, 5 Jul 2022 14:36:39 +0200
Subject: [PATCH 18/35] I have Meg-DS

---
 evaluation/results/opt/run_opt_evaluation_125m.slurm | 1 +
 evaluation/results/opt/run_opt_evaluation_175b.slurm | 1 +
 2 files changed, 2 insertions(+)

diff --git a/evaluation/results/opt/run_opt_evaluation_125m.slurm b/evaluation/results/opt/run_opt_evaluation_125m.slurm
index 0ab36bb6..d3964792 100644
--- a/evaluation/results/opt/run_opt_evaluation_125m.slurm
+++ b/evaluation/results/opt/run_opt_evaluation_125m.slurm
@@ -59,6 +59,7 @@ MEGATRON_REQUIRED_ARGS="
     --seed 42 \
     --pad-vocab-size-to 50272 \
     --make-vocab-size-divisible-by 1\
+    --no-bias-gelu-fusion\
 "
 
 
diff --git a/evaluation/results/opt/run_opt_evaluation_175b.slurm b/evaluation/results/opt/run_opt_evaluation_175b.slurm
index a2d40139..bc7bbab7 100644
--- a/evaluation/results/opt/run_opt_evaluation_175b.slurm
+++ b/evaluation/results/opt/run_opt_evaluation_175b.slurm
@@ -62,6 +62,7 @@ MEGATRON_REQUIRED_ARGS="
     --seed 42 \
     --pad-vocab-size-to 50272 \
     --make-vocab-size-divisible-by 1\
+    --no-bias-gelu-fusion\
 "
 
 

From 87418444c86aa77de582e7dd9dd0e5fb431794d5 Mon Sep 17 00:00:00 2001
From: thomasw21 <24695242+thomasw21@users.noreply.github.com>
Date: Tue, 5 Jul 2022 15:37:49 +0200
Subject: [PATCH 19/35] Launch huge array

---
 .../opt/run_opt_bs_evaluation_125m.slurm      | 172 ++++++++++++++
 .../opt/run_opt_bs_evaluation_175b.slurm      | 218 ++++++++++++++++++
 .../results/opt/run_opt_evaluation_125m.slurm |  47 +++-
 .../results/opt/run_opt_evaluation_175b.slurm |  47 +++-
 4 files changed, 478 insertions(+), 6 deletions(-)
 create mode 100644 evaluation/results/opt/run_opt_bs_evaluation_125m.slurm
 create mode 100644 evaluation/results/opt/run_opt_bs_evaluation_175b.slurm

diff --git a/evaluation/results/opt/run_opt_bs_evaluation_125m.slurm b/evaluation/results/opt/run_opt_bs_evaluation_125m.slurm
new file mode 100644
index 00000000..b5ca83f3
--- /dev/null
+++ b/evaluation/results/opt/run_opt_bs_evaluation_125m.slurm
@@ -0,0 +1,172 @@
+#!/bin/bash
+#SBATCH --job-name=bs-eval-opt-125m
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=1
+#SBATCH --cpus-per-task=10
+#SBATCH --hint=nomultithread
+#SBATCH --gres=gpu:1
+#SBATCH --time 20:00:00
+#SBATCH --output=/gpfswork/rech/six/uty16tp/logs/%x-%j.out
+#SBATCH --account=six@v100
+#SBATCH --array=0-70
+
+set -x -e
+
+source $six_ALL_CCFRWORK/start-py38-pt111
+conda activate muennighofflmeval
+
+echo "START TIME: $(date)"
+
+
+CHECKPOINT_PATH=$six_ALL_CCFRSCRATCH/opt/opt-125m-meg-ds
+MEGATRON_DEEPSPEED_REPO=/gpfswork/rech/six/uty16tp/code/big_science/Megatron-DeepSpeed
+export HF_DATASETS_OFFLINE=1
+export TRANSFORMERS_OFFLINE=1
+
+export TRANSFORMERS_CACHE=$six_ALL_CCFRWORK/models
+export HF_DATASETS_CACHE=$six_ALL_CCFRWORK/datasetseval
+export HF_MODULES_CACHE=$six_ALL_CCFRWORK/modules
+export HF_METRICS_CACHE=$six_ALL_CCFRWORK/metrics
+
+cd $MEGATRON_DEEPSPEED_REPO
+
+# Make sure you use the slow version of the tokenizer.
+# Same tokenizer for 125m and 175b
+TOKENIZER_NAME_OR_PATH=/gpfsscratch/rech/six/commun/opt/opt-125m
+
+PP_SIZE=1
+TP_SIZE=1
+
+NHIDDEN=768
+NLAYERS=12
+NHEADS=12
+SEQ_LEN=2048
+MAX_POSITION_EMBEDDINGS=2050
+
+# different from the training MICRO_BATCH_SIZE - no optim memory, so can do bigger BS
+# make as big as it can fit into gpu w/o OOM, but not too close to 100%
+EVAL_MICRO_BATCH_SIZE=1
+
+MEGATRON_REQUIRED_ARGS="
+    --num-layers $NLAYERS \
+    --hidden-size $NHIDDEN \
+    --num-attention-heads $NHEADS \
+    --seq-length $SEQ_LEN \
+    --max-position-embeddings $MAX_POSITION_EMBEDDINGS \
+    --tokenizer-type PretrainedFromHF \
+    --tokenizer-name-or-path $TOKENIZER_NAME_OR_PATH \
+    --fp16 \
+    --relu \
+    --seed 42 \
+    --pad-vocab-size-to 50272 \
+    --make-vocab-size-divisible-by 1\
+    --no-bias-gelu-fusion\
+"
+
+
+ZERO_STAGE=0
+
+config_json="./ds_config.json"
+
+# Deepspeed figures out GAS dynamically from dynamic GBS via set_train_batch_size()
+cat <<EOT > $config_json
+{
+  "train_micro_batch_size_per_gpu": 1,
+  "train_batch_size": 1,
+  "gradient_clipping": 1.0,
+  "zero_optimization": {
+    "stage": $ZERO_STAGE
+  },
+  "fp16": {
+    "enabled": true,
+    "loss_scale": 0,
+    "loss_scale_window": 500,
+    "hysteresis": 2,
+    "min_loss_scale": 1,
+    "initial_scale_power": 12
+  },
+  "steps_per_print": 2000,
+  "wall_clock_breakdown": false
+}
+EOT
+
+# --task_list GEM/web_nlg_en,GEM/web_nlg_en_challenge_test_numbers,GEM/web_nlg_en_challenge_test_scramble,GEM/web_nlg_en_challenge_validation_sample,GEM/web_nlg_ru,GEM/web_nlg_ru_challenge_test_scramble,GEM/web_nlg_ru_challenge_validation_sample,GEM/wiki_auto_asset_turk_challenge_test_asset_backtranslation,GEM/wiki_auto_asset_turk_challenge_test_asset_bfp02,GEM/wiki_auto_asset_turk_challenge_test_asset_bfp05,GEM/wiki_auto_asset_turk_challenge_test_asset_nopunc,GEM/wiki_auto_asset_turk_challenge_test_turk_backtranslation,GEM/wiki_auto_asset_turk_challenge_test_turk_bfp02,GEM/wiki_auto_asset_turk_challenge_test_turk_bfp05,GEM/wiki_auto_asset_turk_challenge_test_turk_nopunc,GEM/wiki_auto_asset_turk_test_asset,GEM/wiki_auto_asset_turk_test_turk,GEM/wiki_lingua_ar,GEM/wiki_lingua_cs,GEM/wiki_lingua_de,GEM/wiki_lingua_en,GEM/wiki_lingua_es,GEM/wiki_lingua_fr,GEM/wiki_lingua_hi,GEM/wiki_lingua_id,GEM/wiki_lingua_it,GEM/wiki_lingua_ja,GEM/wiki_lingua_ko,GEM/wiki_lingua_nl,GEM/wiki_lingua_pt,GEM/wiki_lingua_ru,GEM/wiki_lingua_th,GEM/wiki_lingua_tr,GEM/wiki_lingua_vi,GEM/wiki_lingua_zh,gem_xsum,gem_xsum_challenge_sample,gem_xsum_challenge_test_backtranslation,gem_xsum_challenge_test_bfp_02,gem_xsum_challenge_test_bfp_05,gem_xsum_challenge_test_covid,gem_xsum_challenge_test_nopunc,axb,axg,boolq,cb,cola,copa,crows_pairs_english,crows_pairs_french,diabla,e2e_nlg_cleaned,mnli,mnli_mismatched,multirc,piaf,qqp,rte,sst,tydiqa_primary,tydiqa_secondary,wic,wsc,wnli,wino_bias_type1_anti,wino_bias_type1_pro,wino_bias_type2_anti,wino_bias_type2_pro,xquad_ar,xquad_en\
+TASKS=(
+arc_challenge
+arc_easy
+boolq
+copa
+headqa
+hellaswag
+lambada
+logiqa
+mathqa
+mc_taco
+mrpc
+multirc
+openbookqa
+piqa
+prost
+pubmedqa
+qnli
+qqp
+race
+rte
+sciq
+sst
+triviaqa
+webqs
+wic
+winogrande
+wnli
+wsc
+)
+
+if [ "${#TASKS[@]}" -eq "$SLURM_ARRAY_TASK_COUNT" ];
+    then
+    echo "Please update the array size as the it doesn't correspond to the number of models we want to evaluate. Array size: $SLURM_ARRAY_TASK_COUNT, number of models: ${#TASKS[@]}"
+    exit 1
+fi
+TASK=${TASKS[$SLURM_ARRAY_TASK_ID]}
+
+OPT_FOLDER=$WORK/opt/opt-125m/$TASK
+mkdir -p $OPT_FOLDER
+
+CMD="./tasks/eval_harness/evaluate.py  \
+    --load $CHECKPOINT_PATH \
+    --results_path $OPT_FOLDER/bs_results.json \
+    --tensor-model-parallel-size $TP_SIZE  \
+    --pipeline-model-parallel-size $PP_SIZE \
+    --micro-batch-size $EVAL_MICRO_BATCH_SIZE \
+    --no-load-optim \
+    --no-load-rng \
+    --inference \
+    --task_list $TASK\
+    --deepspeed \
+    --deepspeed_config ds_config.json \
+    --intermed_results \
+    --adaptive_seq_len \
+    --micro_bs_multiplier 4 \
+    $MEGATRON_REQUIRED_ARGS \
+    "
+
+GPUS_PER_NODE=1
+NNODES=$SLURM_NNODES
+MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1)
+MASTER_PORT=6000
+export LAUNCHER="python -u -m torch.distributed.run \
+    --nproc_per_node $GPUS_PER_NODE \
+    --nnodes $NNODES \
+    --rdzv_endpoint $MASTER_ADDR:$MASTER_PORT \
+    --rdzv_backend c10d \
+    --max_restarts 0 \
+    --tee 3 \
+    "
+
+export CUDA_LAUNCH_BLOCKING=1
+
+echo $LAUNCHER $CMD
+
+export PYTHONPATH=$MEGATRON_DEEPSPEED_REPO
+
+$LAUNCHER $CMD 2>&1 | tee $OPT_FOLDER/bs-eval-harness.log
diff --git a/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm b/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm
new file mode 100644
index 00000000..85ed952d
--- /dev/null
+++ b/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm
@@ -0,0 +1,218 @@
+#!/bin/bash
+#SBATCH --job-name=bs-eval-opt-175b
+#SBATCH --partition=gpu_p5
+#SBATCH --constraint=a100
+#SBATCH --reservation=hug
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=1          # crucial - only 1 task per dist per node!
+#SBATCH --cpus-per-task=64           # number of cores per tasks
+#SBATCH --hint=nomultithread         # we get physical cores not logical
+#SBATCH --gres=gpu:8                 # number of gpus
+#SBATCH --time 20:00:00              # maximum execution time (HH:MM:SS)
+#SBATCH --output=/gpfswork/rech/six/uty16tp/logs/%x-%j.out           # output file name
+#SBATCH --account=six@a100
+#SBATCH --array=0-70
+
+set -x -e
+
+source $six_ALL_CCFRWORK/start-py38-pt111
+conda activate muennighofflmeval
+
+echo "START TIME: $(date)"
+
+
+CHECKPOINT_PATH=$six_ALL_CCFRSCRATCH/opt/opt-175b-meg-ds
+MEGATRON_DEEPSPEED_REPO=/gpfswork/rech/six/uty16tp/code/big_science/Megatron-DeepSpeed
+export HF_DATASETS_OFFLINE=1
+export TRANSFORMERS_OFFLINE=1
+
+export TRANSFORMERS_CACHE=$six_ALL_CCFRWORK/models
+export HF_DATASETS_CACHE=$six_ALL_CCFRWORK/datasetseval
+export HF_MODULES_CACHE=$six_ALL_CCFRWORK/modules
+export HF_METRICS_CACHE=$six_ALL_CCFRWORK/metrics
+
+cd $MEGATRON_DEEPSPEED_REPO
+
+# Make sure you use the slow version of the tokenizer.
+# Same tokenizer for 125m and 175b
+TOKENIZER_NAME_OR_PATH=/gpfsscratch/rech/six/commun/opt/opt-125m
+
+PP_SIZE=8
+TP_SIZE=1
+
+NHIDDEN=12288
+NLAYERS=96
+NHEADS=96
+SEQ_LEN=2048
+MAX_POSITION_EMBEDDINGS=2050
+
+# different from the training MICRO_BATCH_SIZE - no optim memory, so can do bigger BS
+# make as big as it can fit into gpu w/o OOM, but not too close to 100%
+EVAL_MICRO_BATCH_SIZE=1
+
+MEGATRON_REQUIRED_ARGS="
+    --num-layers $NLAYERS \
+    --hidden-size $NHIDDEN \
+    --num-attention-heads $NHEADS \
+    --seq-length $SEQ_LEN \
+    --max-position-embeddings $MAX_POSITION_EMBEDDINGS \
+    --tokenizer-type PretrainedFromHF \
+    --tokenizer-name-or-path $TOKENIZER_NAME_OR_PATH \
+    --fp16 \
+    --relu \
+    --seed 42 \
+    --pad-vocab-size-to 50272 \
+    --make-vocab-size-divisible-by 1\
+    --no-bias-gelu-fusion\
+"
+
+
+ZERO_STAGE=0
+
+config_json="./ds_config.json"
+
+# Deepspeed figures out GAS dynamically from dynamic GBS via set_train_batch_size()
+cat <<EOT > $config_json
+{
+  "train_micro_batch_size_per_gpu": 1,
+  "train_batch_size": 1,
+  "gradient_clipping": 1.0,
+  "zero_optimization": {
+    "stage": $ZERO_STAGE
+  },
+  "fp16": {
+    "enabled": true,
+    "loss_scale": 0,
+    "loss_scale_window": 500,
+    "hysteresis": 2,
+    "min_loss_scale": 1,
+    "initial_scale_power": 12
+  },
+  "steps_per_print": 2000,
+  "wall_clock_breakdown": false
+}
+EOT
+
+# --task_list GEM/web_nlg_en,GEM/web_nlg_en_challenge_test_numbers,GEM/web_nlg_en_challenge_test_scramble,GEM/web_nlg_en_challenge_validation_sample,GEM/web_nlg_ru,GEM/web_nlg_ru_challenge_test_scramble,GEM/web_nlg_ru_challenge_validation_sample,GEM/wiki_auto_asset_turk_challenge_test_asset_backtranslation,GEM/wiki_auto_asset_turk_challenge_test_asset_bfp02,GEM/wiki_auto_asset_turk_challenge_test_asset_bfp05,GEM/wiki_auto_asset_turk_challenge_test_asset_nopunc,GEM/wiki_auto_asset_turk_challenge_test_turk_backtranslation,GEM/wiki_auto_asset_turk_challenge_test_turk_bfp02,GEM/wiki_auto_asset_turk_challenge_test_turk_bfp05,GEM/wiki_auto_asset_turk_challenge_test_turk_nopunc,GEM/wiki_auto_asset_turk_test_asset,GEM/wiki_auto_asset_turk_test_turk,GEM/wiki_lingua_ar,GEM/wiki_lingua_cs,GEM/wiki_lingua_de,GEM/wiki_lingua_en,GEM/wiki_lingua_es,GEM/wiki_lingua_fr,GEM/wiki_lingua_hi,GEM/wiki_lingua_id,GEM/wiki_lingua_it,GEM/wiki_lingua_ja,GEM/wiki_lingua_ko,GEM/wiki_lingua_nl,GEM/wiki_lingua_pt,GEM/wiki_lingua_ru,GEM/wiki_lingua_th,GEM/wiki_lingua_tr,GEM/wiki_lingua_vi,GEM/wiki_lingua_zh,gem_xsum,gem_xsum_challenge_sample,gem_xsum_challenge_test_backtranslation,gem_xsum_challenge_test_bfp_02,gem_xsum_challenge_test_bfp_05,gem_xsum_challenge_test_covid,gem_xsum_challenge_test_nopunc,axb,axg,boolq,cb,cola,copa,crows_pairs_english,crows_pairs_french,diabla,e2e_nlg_cleaned,mnli,mnli_mismatched,multirc,piaf,qqp,rte,sst,tydiqa_primary,tydiqa_secondary,wic,wsc,wnli,wino_bias_type1_anti,wino_bias_type1_pro,wino_bias_type2_anti,wino_bias_type2_pro,xquad_ar,xquad_en\
+
+TASKS=(
+GEM/web_nlg_en
+GEM/web_nlg_en_challenge_test_numbers
+GEM/web_nlg_en_challenge_test_scramble
+GEM/web_nlg_en_challenge_validation_sample
+GEM/web_nlg_ru
+GEM/web_nlg_ru_challenge_test_scramble
+GEM/web_nlg_ru_challenge_validation_sample
+GEM/wiki_auto_asset_turk_challenge_test_asset_backtranslation
+GEM/wiki_auto_asset_turk_challenge_test_asset_bfp02
+GEM/wiki_auto_asset_turk_challenge_test_asset_bfp05
+GEM/wiki_auto_asset_turk_challenge_test_asset_nopunc
+GEM/wiki_auto_asset_turk_challenge_test_turk_backtranslation
+GEM/wiki_auto_asset_turk_challenge_test_turk_bfp02
+GEM/wiki_auto_asset_turk_challenge_test_turk_bfp05
+GEM/wiki_auto_asset_turk_challenge_test_turk_nopunc
+GEM/wiki_auto_asset_turk_test_asset
+GEM/wiki_auto_asset_turk_test_turk
+GEM/wiki_lingua_ar
+GEM/wiki_lingua_cs
+GEM/wiki_lingua_de
+GEM/wiki_lingua_en
+GEM/wiki_lingua_es
+GEM/wiki_lingua_fr
+GEM/wiki_lingua_hi
+GEM/wiki_lingua_id
+GEM/wiki_lingua_it
+GEM/wiki_lingua_ja
+GEM/wiki_lingua_ko
+GEM/wiki_lingua_nl
+GEM/wiki_lingua_pt
+GEM/wiki_lingua_ru
+GEM/wiki_lingua_th
+GEM/wiki_lingua_tr
+GEM/wiki_lingua_vi
+GEM/wiki_lingua_zh
+gem_xsum
+gem_xsum_challenge_sample
+gem_xsum_challenge_test_backtranslation
+gem_xsum_challenge_test_bfp_02
+gem_xsum_challenge_test_bfp_05
+gem_xsum_challenge_test_covid
+gem_xsum_challenge_test_nopunc
+axb
+axg
+boolq
+cb
+cola
+copa
+crows_pairs_english
+crows_pairs_french
+diabla
+e2e_nlg_cleaned
+mnli
+mnli_mismatched
+multirc
+piaf
+qqp
+rte
+sst
+tydiqa_primary
+tydiqa_secondary
+wic
+wsc
+wnli
+wino_bias_type1_anti
+wino_bias_type1_pro
+wino_bias_type2_anti
+wino_bias_type2_pro
+xquad_ar
+xquad_en
+)
+
+if [ "${#TASKS[@]}" -eq "$SLURM_ARRAY_TASK_COUNT" ];
+    then
+    echo "Please update the array size as the it doesn't correspond to the number of models we want to evaluate. Array size: $SLURM_ARRAY_TASK_COUNT, number of models: ${#TASKS[@]}"
+    exit 1
+fi
+TASK=${TASKS[$SLURM_ARRAY_TASK_ID]}
+
+OPT_FOLDER=$WORK/opt/opt-175b/$TASK
+mkdir -p $OPT_FOLDER
+
+CMD="./tasks/eval_harness/evaluate.py  \
+    --load $CHECKPOINT_PATH \
+    --results_path $OPT_FOLDER/eai_results.json \
+    --tensor-model-parallel-size $TP_SIZE  \
+    --pipeline-model-parallel-size $PP_SIZE \
+    --micro-batch-size $EVAL_MICRO_BATCH_SIZE \
+    --no-load-optim \
+    --no-load-rng \
+    --inference \
+    --task_list $TASK\
+    --deepspeed \
+    --deepspeed_config ds_config.json \
+    --intermed_results \
+    --adaptive_seq_len \
+    --micro_bs_multiplier 4 \
+    $MEGATRON_REQUIRED_ARGS \
+    "
+
+GPUS_PER_NODE=8
+NNODES=$SLURM_NNODES
+MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1)
+MASTER_PORT=6000
+export LAUNCHER="python -u -m torch.distributed.run \
+    --nproc_per_node $GPUS_PER_NODE \
+    --nnodes $NNODES \
+    --rdzv_endpoint $MASTER_ADDR:$MASTER_PORT \
+    --rdzv_backend c10d \
+    --max_restarts 0 \
+    --tee 3 \
+    "
+
+export CUDA_LAUNCH_BLOCKING=1
+
+echo $LAUNCHER $CMD
+
+export PYTHONPATH=$MEGATRON_DEEPSPEED_REPO
+
+$LAUNCHER $CMD 2>&1 | tee $OPT_FOLDER/eval-harness.log
diff --git a/evaluation/results/opt/run_opt_evaluation_125m.slurm b/evaluation/results/opt/run_opt_evaluation_125m.slurm
index d3964792..a8d9129e 100644
--- a/evaluation/results/opt/run_opt_evaluation_125m.slurm
+++ b/evaluation/results/opt/run_opt_evaluation_125m.slurm
@@ -8,11 +8,12 @@
 #SBATCH --time 20:00:00
 #SBATCH --output=/gpfswork/rech/six/uty16tp/logs/%x-%j.out
 #SBATCH --account=six@v100
+#SBATCH --array=0-28
 
 set -x -e
 
 source $six_ALL_CCFRWORK/start-py38-pt111
-conda activate thomas_lm_eval
+# conda activate thomas_lm_eval
 
 echo "START TIME: $(date)"
 
@@ -89,7 +90,47 @@ cat <<EOT > $config_json
 }
 EOT
 
-OPT_FOLDER=$WORK/opt/opt-125m
+# --task_list arc_challenge,arc_easy,boolq,copa,headqa,hellaswag,lambada,logiqa,mathqa,mc_taco,mrpc,multirc,openbookqa,piqa,prost,pubmedqa,qnli,qqp,race,rte,sciq,sst,triviaqa,webqs,wic,winogrande,wnli,wsc \
+TASKS=(
+arc_challenge
+arc_easy
+boolq
+copa
+headqa
+hellaswag
+lambada
+logiqa
+mathqa
+mc_taco
+mrpc
+multirc
+openbookqa
+piqa
+prost
+pubmedqa
+qnli
+qqp
+race
+rte
+sciq
+sst
+triviaqa
+webqs
+wic
+winogrande
+wnli
+wsc
+)
+
+if [ "${#TASKS[@]}" -eq "$SLURM_ARRAY_TASK_COUNT" ];
+    then
+    echo "Please update the array size as the it doesn't correspond to the number of models we want to evaluate. Array size: $SLURM_ARRAY_TASK_COUNT, number of models: ${#TASKS[@]}"
+    exit 1
+fi
+TASK=${TASKS[$SLURM_ARRAY_TASK_ID]}
+
+OPT_FOLDER=$WORK/opt/opt-125m/$TASK
+mkdir -p $OPT_FOLDER
 
 CMD="./tasks/eval_harness/evaluate.py  \
     --load $CHECKPOINT_PATH \
@@ -100,7 +141,7 @@ CMD="./tasks/eval_harness/evaluate.py  \
     --no-load-optim \
     --no-load-rng \
     --inference \
-    --task_list arc_challenge,arc_easy,boolq,copa,headqa,hellaswag,lambada,logiqa,mathqa,mc_taco,mrpc,multirc,openbookqa,piqa,prost,pubmedqa,qnli,qqp,race,rte,sciq,sst,triviaqa,webqs,wic,winogrande,wnli,wsc \
+    --task_list $TASK\
     --deepspeed \
     --deepspeed_config ds_config.json \
     --intermed_results \
diff --git a/evaluation/results/opt/run_opt_evaluation_175b.slurm b/evaluation/results/opt/run_opt_evaluation_175b.slurm
index bc7bbab7..bd1eb5cc 100644
--- a/evaluation/results/opt/run_opt_evaluation_175b.slurm
+++ b/evaluation/results/opt/run_opt_evaluation_175b.slurm
@@ -11,11 +11,12 @@
 #SBATCH --time 20:00:00              # maximum execution time (HH:MM:SS)
 #SBATCH --output=/gpfswork/rech/six/uty16tp/logs/%x-%j.out           # output file name
 #SBATCH --account=six@a100
+#SBATCH --array=0-28
 
 set -x -e
 
 source $six_ALL_CCFRWORK/start-py38-pt111
-conda activate thomas_lm_eval
+# conda activate thomas_lm_eval
 
 echo "START TIME: $(date)"
 
@@ -92,7 +93,47 @@ cat <<EOT > $config_json
 }
 EOT
 
-OPT_FOLDER=$WORK/opt/opt-175m
+# --task_list arc_challenge,arc_easy,boolq,copa,headqa,hellaswag,lambada,logiqa,mathqa,mc_taco,mrpc,multirc,openbookqa,piqa,prost,pubmedqa,qnli,qqp,race,rte,sciq,sst,triviaqa,webqs,wic,winogrande,wnli,wsc \
+TASKS=(
+arc_challenge
+arc_easy
+boolq
+copa
+headqa
+hellaswag
+lambada
+logiqa
+mathqa
+mc_taco
+mrpc
+multirc
+openbookqa
+piqa
+prost
+pubmedqa
+qnli
+qqp
+race
+rte
+sciq
+sst
+triviaqa
+webqs
+wic
+winogrande
+wnli
+wsc
+)
+
+if [ "${#TASKS[@]}" -eq "$SLURM_ARRAY_TASK_COUNT" ];
+    then
+    echo "Please update the array size as the it doesn't correspond to the number of models we want to evaluate. Array size: $SLURM_ARRAY_TASK_COUNT, number of models: ${#TASKS[@]}"
+    exit 1
+fi
+TASK=${TASKS[$SLURM_ARRAY_TASK_ID]}
+
+OPT_FOLDER=$WORK/opt/opt-175b/$TASK
+mkdir -p $OPT_FOLDER
 
 CMD="./tasks/eval_harness/evaluate.py  \
     --load $CHECKPOINT_PATH \
@@ -103,7 +144,7 @@ CMD="./tasks/eval_harness/evaluate.py  \
     --no-load-optim \
     --no-load-rng \
     --inference \
-    --task_list arc_challenge,arc_easy,boolq,copa,headqa,hellaswag,lambada,logiqa,mathqa,mc_taco,mrpc,multirc,openbookqa,piqa,prost,pubmedqa,qnli,qqp,race,rte,sciq,sst,triviaqa,webqs,wic,winogrande,wnli,wsc \
+    --task_list $TASK\
     --deepspeed \
     --deepspeed_config ds_config.json \
     --intermed_results \

From 87cf98012c0988c9a93a662044a7921857ddec89 Mon Sep 17 00:00:00 2001
From: thomasw21 <24695242+thomasw21@users.noreply.github.com>
Date: Tue, 5 Jul 2022 16:05:07 +0200
Subject: [PATCH 20/35] Actually I need this env to opt tokenizer

---
 evaluation/results/opt/run_opt_evaluation_125m.slurm | 3 ++-
 evaluation/results/opt/run_opt_evaluation_175b.slurm | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/evaluation/results/opt/run_opt_evaluation_125m.slurm b/evaluation/results/opt/run_opt_evaluation_125m.slurm
index a8d9129e..8baadcf1 100644
--- a/evaluation/results/opt/run_opt_evaluation_125m.slurm
+++ b/evaluation/results/opt/run_opt_evaluation_125m.slurm
@@ -13,7 +13,8 @@
 set -x -e
 
 source $six_ALL_CCFRWORK/start-py38-pt111
-# conda activate thomas_lm_eval
+# Required in order to load the opt tokenizer
+conda activate thomas_lm_eval
 
 echo "START TIME: $(date)"
 
diff --git a/evaluation/results/opt/run_opt_evaluation_175b.slurm b/evaluation/results/opt/run_opt_evaluation_175b.slurm
index bd1eb5cc..1b7bf14e 100644
--- a/evaluation/results/opt/run_opt_evaluation_175b.slurm
+++ b/evaluation/results/opt/run_opt_evaluation_175b.slurm
@@ -16,7 +16,8 @@
 set -x -e
 
 source $six_ALL_CCFRWORK/start-py38-pt111
-# conda activate thomas_lm_eval
+# Required in order to load the opt tokenizer
+conda activate thomas_lm_eval
 
 echo "START TIME: $(date)"
 

From b6a652edc733593825fb4b6f0daa346b390539c0 Mon Sep 17 00:00:00 2001
From: thomasw21 <24695242+thomasw21@users.noreply.github.com>
Date: Tue, 5 Jul 2022 16:48:55 +0200
Subject: [PATCH 21/35] Update config

---
 evaluation/results/opt/run_opt_bs_evaluation_175b.slurm | 3 ++-
 evaluation/results/opt/run_opt_evaluation_125m.slurm    | 2 +-
 evaluation/results/opt/run_opt_evaluation_175b.slurm    | 3 ++-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm b/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm
index 85ed952d..1d2072b6 100644
--- a/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm
+++ b/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm
@@ -192,7 +192,8 @@ CMD="./tasks/eval_harness/evaluate.py  \
     --deepspeed_config ds_config.json \
     --intermed_results \
     --adaptive_seq_len \
-    --micro_bs_multiplier 4 \
+    --micro_bs_multiplier 16 \
+    --offloadearly \
     $MEGATRON_REQUIRED_ARGS \
     "
 
diff --git a/evaluation/results/opt/run_opt_evaluation_125m.slurm b/evaluation/results/opt/run_opt_evaluation_125m.slurm
index 8baadcf1..0189ff31 100644
--- a/evaluation/results/opt/run_opt_evaluation_125m.slurm
+++ b/evaluation/results/opt/run_opt_evaluation_125m.slurm
@@ -147,7 +147,7 @@ CMD="./tasks/eval_harness/evaluate.py  \
     --deepspeed_config ds_config.json \
     --intermed_results \
     --adaptive_seq_len \
-    --micro_bs_multiplier 4 \
+    --micro_bs_multiplier 8 \
     $MEGATRON_REQUIRED_ARGS \
     "
 
diff --git a/evaluation/results/opt/run_opt_evaluation_175b.slurm b/evaluation/results/opt/run_opt_evaluation_175b.slurm
index 1b7bf14e..662779c6 100644
--- a/evaluation/results/opt/run_opt_evaluation_175b.slurm
+++ b/evaluation/results/opt/run_opt_evaluation_175b.slurm
@@ -150,7 +150,8 @@ CMD="./tasks/eval_harness/evaluate.py  \
     --deepspeed_config ds_config.json \
     --intermed_results \
     --adaptive_seq_len \
-    --micro_bs_multiplier 4 \
+    --micro_bs_multiplier 16 \
+    --offloadearly \
     $MEGATRON_REQUIRED_ARGS \
     "
 

From 9ce1ed1105d00506eecba1ab13cbb31dc8fea5b6 Mon Sep 17 00:00:00 2001
From: thomasw21 <24695242+thomasw21@users.noreply.github.com>
Date: Tue, 5 Jul 2022 18:23:33 +0200
Subject: [PATCH 22/35] Woops

---
 .../results/opt/concatenate_all_results.py    | 58 +++++++++++++++++++
 .../opt/run_opt_bs_evaluation_125m.slurm      |  2 +-
 .../opt/run_opt_bs_evaluation_175b.slurm      |  2 +-
 3 files changed, 60 insertions(+), 2 deletions(-)
 create mode 100644 evaluation/results/opt/concatenate_all_results.py

diff --git a/evaluation/results/opt/concatenate_all_results.py b/evaluation/results/opt/concatenate_all_results.py
new file mode 100644
index 00000000..62316c10
--- /dev/null
+++ b/evaluation/results/opt/concatenate_all_results.py
@@ -0,0 +1,58 @@
+import argparse
+import json
+import re
+from pathlib import Path
+
+
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--results-dir", required=True, type=Path, help="Path to the list of results")
+    parser.add_argument("--concatenate-output-file", required=True, type=Path, help="Path to store the final output file")
+    return parser.parse_args()
+
+def main():
+    args = get_args()
+
+    # Get all json files
+    json_files = []
+    for folder in args.results_dir.iterdir():
+        if folder.is_file():
+            continue
+        for file in folder.iterdir():
+            if file.is_dir():
+                continue
+            match = re.match(
+                r"(?:eai|bs)_results_lm-eval_opt-175b-meg-ds_(?:\d{4})-(?:\d{2})-(?:\d{2})-(?:\d{2})-(?:\d{2})-(?:\d{2})\.json",
+                file.name,
+            )
+
+            if match is None:
+                continue
+            else:
+                # TODO @thomasw21 some folder can have multiple results we should take the latest
+                json_files.append(file)
+                break
+
+    # Merge all json files
+    final_result = {
+        "results": {},
+        "versions": {}
+    }
+    for file in json_files:
+        with open(file, "r") as fi:
+            task_result = json.load(fi)
+
+        for key, value in task_result["results"].items():
+            final_result["results"][key] = value
+
+        for key, value in task_result["versions"].items():
+            final_result["versions"][key] = value
+
+    # Save result
+    with open(args.concatenate_output_file, "w") as fo:
+        json.dump(final_result, fo)
+
+    pass
+
+if __name__ == "__main__":
+    main()
diff --git a/evaluation/results/opt/run_opt_bs_evaluation_125m.slurm b/evaluation/results/opt/run_opt_bs_evaluation_125m.slurm
index b5ca83f3..55903511 100644
--- a/evaluation/results/opt/run_opt_bs_evaluation_125m.slurm
+++ b/evaluation/results/opt/run_opt_bs_evaluation_125m.slurm
@@ -132,7 +132,7 @@ TASK=${TASKS[$SLURM_ARRAY_TASK_ID]}
 OPT_FOLDER=$WORK/opt/opt-125m/$TASK
 mkdir -p $OPT_FOLDER
 
-CMD="./tasks/eval_harness/evaluate.py  \
+CMD="./tasks/eval_harness/evaluate_bsevalharness.py  \
     --load $CHECKPOINT_PATH \
     --results_path $OPT_FOLDER/bs_results.json \
     --tensor-model-parallel-size $TP_SIZE  \
diff --git a/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm b/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm
index 1d2072b6..b2466f0a 100644
--- a/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm
+++ b/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm
@@ -178,7 +178,7 @@ TASK=${TASKS[$SLURM_ARRAY_TASK_ID]}
 OPT_FOLDER=$WORK/opt/opt-175b/$TASK
 mkdir -p $OPT_FOLDER
 
-CMD="./tasks/eval_harness/evaluate.py  \
+CMD="./tasks/eval_harness/evaluate_bsevalharness.py  \
     --load $CHECKPOINT_PATH \
     --results_path $OPT_FOLDER/eai_results.json \
     --tensor-model-parallel-size $TP_SIZE  \

From 42303dcee77ff712a2e681f031bcfe9f12bf32de Mon Sep 17 00:00:00 2001
From: thomasw21 <24695242+thomasw21@users.noreply.github.com>
Date: Tue, 5 Jul 2022 18:33:47 +0200
Subject: [PATCH 23/35] Array take end

---
 evaluation/results/opt/run_opt_bs_evaluation_125m.slurm | 2 +-
 evaluation/results/opt/run_opt_bs_evaluation_175b.slurm | 2 +-
 evaluation/results/opt/run_opt_evaluation_125m.slurm    | 2 +-
 evaluation/results/opt/run_opt_evaluation_175b.slurm    | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/evaluation/results/opt/run_opt_bs_evaluation_125m.slurm b/evaluation/results/opt/run_opt_bs_evaluation_125m.slurm
index 55903511..d6a70416 100644
--- a/evaluation/results/opt/run_opt_bs_evaluation_125m.slurm
+++ b/evaluation/results/opt/run_opt_bs_evaluation_125m.slurm
@@ -8,7 +8,7 @@
 #SBATCH --time 20:00:00
 #SBATCH --output=/gpfswork/rech/six/uty16tp/logs/%x-%j.out
 #SBATCH --account=six@v100
-#SBATCH --array=0-70
+#SBATCH --array=0-69
 
 set -x -e
 
diff --git a/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm b/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm
index b2466f0a..729b5484 100644
--- a/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm
+++ b/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm
@@ -11,7 +11,7 @@
 #SBATCH --time 20:00:00              # maximum execution time (HH:MM:SS)
 #SBATCH --output=/gpfswork/rech/six/uty16tp/logs/%x-%j.out           # output file name
 #SBATCH --account=six@a100
-#SBATCH --array=0-70
+#SBATCH --array=0-69
 
 set -x -e
 
diff --git a/evaluation/results/opt/run_opt_evaluation_125m.slurm b/evaluation/results/opt/run_opt_evaluation_125m.slurm
index 0189ff31..2ef24b40 100644
--- a/evaluation/results/opt/run_opt_evaluation_125m.slurm
+++ b/evaluation/results/opt/run_opt_evaluation_125m.slurm
@@ -8,7 +8,7 @@
 #SBATCH --time 20:00:00
 #SBATCH --output=/gpfswork/rech/six/uty16tp/logs/%x-%j.out
 #SBATCH --account=six@v100
-#SBATCH --array=0-28
+#SBATCH --array=0-27
 
 set -x -e
 
diff --git a/evaluation/results/opt/run_opt_evaluation_175b.slurm b/evaluation/results/opt/run_opt_evaluation_175b.slurm
index 662779c6..7726799a 100644
--- a/evaluation/results/opt/run_opt_evaluation_175b.slurm
+++ b/evaluation/results/opt/run_opt_evaluation_175b.slurm
@@ -11,7 +11,7 @@
 #SBATCH --time 20:00:00              # maximum execution time (HH:MM:SS)
 #SBATCH --output=/gpfswork/rech/six/uty16tp/logs/%x-%j.out           # output file name
 #SBATCH --account=six@a100
-#SBATCH --array=0-28
+#SBATCH --array=0-27
 
 set -x -e
 

From 2ac4d41a77935acacce011422be3c15009ab54ee Mon Sep 17 00:00:00 2001
From: thomasw21 <24695242+thomasw21@users.noreply.github.com>
Date: Wed, 6 Jul 2022 15:08:44 +0200
Subject: [PATCH 24/35] Add bloom evaluation scripts

---
 .../bloom/run_bloom_bs_evaluation_176b.slurm  | 202 ++++++++++++++++++
 .../bloom/run_opt_evaluation_175b.slurm       | 160 ++++++++++++++
 2 files changed, 362 insertions(+)
 create mode 100644 evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm
 create mode 100644 evaluation/results/bloom/run_opt_evaluation_175b.slurm

diff --git a/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm b/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm
new file mode 100644
index 00000000..63cd1d4f
--- /dev/null
+++ b/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm
@@ -0,0 +1,202 @@
+#!/bin/bash
+#SBATCH --job-name=bs-eval-bloom-176b
+#SBATCH --partition=gpu_p5
+#SBATCH --constraint=a100
+#SBATCH --reservation=hug
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=1          # crucial - only 1 task per dist per node!
+#SBATCH --cpus-per-task=64           # number of cores per tasks
+#SBATCH --hint=nomultithread         # we get physical cores not logical
+#SBATCH --gres=gpu:8                 # number of gpus
+#SBATCH --time 20:00:00              # maximum execution time (HH:MM:SS)
+#SBATCH --output=/gpfswork/rech/six/uty16tp/logs/%x-%j.out           # output file name
+#SBATCH --account=six@a100
+#SBATCH --array=0-69
+
+set -x -e
+
+source $six_ALL_CCFRWORK/start-py38-pt111
+conda activate muennighofflmeval
+
+echo "START TIME: $(date)"
+
+
+CHECKPOINT_PATH=$six_ALL_CCFRSCRATCH/checkpoints/tr11-176B-ml/checkpoints/main/global_step95000
+MEGATRON_DEEPSPEED_REPO=/gpfswork/rech/six/uty16tp/code/big_science/Megatron-DeepSpeed-bloom
+export HF_DATASETS_OFFLINE=1
+export TRANSFORMERS_OFFLINE=1
+
+export TRANSFORMERS_CACHE=$six_ALL_CCFRWORK/models
+export HF_DATASETS_CACHE=$six_ALL_CCFRWORK/datasetseval
+export HF_MODULES_CACHE=$six_ALL_CCFRWORK/modules
+export HF_METRICS_CACHE=$six_ALL_CCFRWORK/metrics
+
+cd $MEGATRON_DEEPSPEED_REPO
+
+# Make sure you use the slow version of the tokenizer.
+# Same tokenizer for 125m and 175b
+TOKENIZER_NAME_OR_PATH=bigscience/tokenizer
+
+PP_SIZE=8
+TP_SIZE=1
+SEQ_LEN=2048
+
+# different from the training MICRO_BATCH_SIZE - no optim memory, so can do bigger BS
+# make as big as it can fit into gpu w/o OOM, but not too close to 100%
+EVAL_MICRO_BATCH_SIZE=1
+
+#dummy arguments to make megatron happy.
+MEGATRON_REQUIRED_ARGS=" \
+    --num-layers -1 \
+    --hidden-size -1 \
+    --num-attention-heads -1 \
+    --seq-length -1  \
+    --max-position-embeddings -1 \
+"
+
+
+ZERO_STAGE=0
+
+config_json="./ds_config.json"
+
+# Deepspeed figures out GAS dynamically from dynamic GBS via set_train_batch_size()
+cat <<EOT > $config_json
+{
+  "train_micro_batch_size_per_gpu": 1,
+  "train_batch_size": 1,
+  "gradient_clipping": 1.0,
+  "zero_optimization": {
+    "stage": $ZERO_STAGE
+  },
+  "bf16": {
+    "enabled": true
+  },
+  "steps_per_print": 2000,
+  "wall_clock_breakdown": false
+}
+EOT
+
+# --task_list GEM/web_nlg_en,GEM/web_nlg_en_challenge_test_numbers,GEM/web_nlg_en_challenge_test_scramble,GEM/web_nlg_en_challenge_validation_sample,GEM/web_nlg_ru,GEM/web_nlg_ru_challenge_test_scramble,GEM/web_nlg_ru_challenge_validation_sample,GEM/wiki_auto_asset_turk_challenge_test_asset_backtranslation,GEM/wiki_auto_asset_turk_challenge_test_asset_bfp02,GEM/wiki_auto_asset_turk_challenge_test_asset_bfp05,GEM/wiki_auto_asset_turk_challenge_test_asset_nopunc,GEM/wiki_auto_asset_turk_challenge_test_turk_backtranslation,GEM/wiki_auto_asset_turk_challenge_test_turk_bfp02,GEM/wiki_auto_asset_turk_challenge_test_turk_bfp05,GEM/wiki_auto_asset_turk_challenge_test_turk_nopunc,GEM/wiki_auto_asset_turk_test_asset,GEM/wiki_auto_asset_turk_test_turk,GEM/wiki_lingua_ar,GEM/wiki_lingua_cs,GEM/wiki_lingua_de,GEM/wiki_lingua_en,GEM/wiki_lingua_es,GEM/wiki_lingua_fr,GEM/wiki_lingua_hi,GEM/wiki_lingua_id,GEM/wiki_lingua_it,GEM/wiki_lingua_ja,GEM/wiki_lingua_ko,GEM/wiki_lingua_nl,GEM/wiki_lingua_pt,GEM/wiki_lingua_ru,GEM/wiki_lingua_th,GEM/wiki_lingua_tr,GEM/wiki_lingua_vi,GEM/wiki_lingua_zh,gem_xsum,gem_xsum_challenge_sample,gem_xsum_challenge_test_backtranslation,gem_xsum_challenge_test_bfp_02,gem_xsum_challenge_test_bfp_05,gem_xsum_challenge_test_covid,gem_xsum_challenge_test_nopunc,axb,axg,boolq,cb,cola,copa,crows_pairs_english,crows_pairs_french,diabla,e2e_nlg_cleaned,mnli,mnli_mismatched,multirc,piaf,qqp,rte,sst,tydiqa_primary,tydiqa_secondary,wic,wsc,wnli,wino_bias_type1_anti,wino_bias_type1_pro,wino_bias_type2_anti,wino_bias_type2_pro,xquad_ar,xquad_en\
+
+TASKS=(
+GEM/web_nlg_en
+GEM/web_nlg_en_challenge_test_numbers
+GEM/web_nlg_en_challenge_test_scramble
+GEM/web_nlg_en_challenge_validation_sample
+GEM/web_nlg_ru
+GEM/web_nlg_ru_challenge_test_scramble
+GEM/web_nlg_ru_challenge_validation_sample
+GEM/wiki_auto_asset_turk_challenge_test_asset_backtranslation
+GEM/wiki_auto_asset_turk_challenge_test_asset_bfp02
+GEM/wiki_auto_asset_turk_challenge_test_asset_bfp05
+GEM/wiki_auto_asset_turk_challenge_test_asset_nopunc
+GEM/wiki_auto_asset_turk_challenge_test_turk_backtranslation
+GEM/wiki_auto_asset_turk_challenge_test_turk_bfp02
+GEM/wiki_auto_asset_turk_challenge_test_turk_bfp05
+GEM/wiki_auto_asset_turk_challenge_test_turk_nopunc
+GEM/wiki_auto_asset_turk_test_asset
+GEM/wiki_auto_asset_turk_test_turk
+GEM/wiki_lingua_ar
+GEM/wiki_lingua_cs
+GEM/wiki_lingua_de
+GEM/wiki_lingua_en
+GEM/wiki_lingua_es
+GEM/wiki_lingua_fr
+GEM/wiki_lingua_hi
+GEM/wiki_lingua_id
+GEM/wiki_lingua_it
+GEM/wiki_lingua_ja
+GEM/wiki_lingua_ko
+GEM/wiki_lingua_nl
+GEM/wiki_lingua_pt
+GEM/wiki_lingua_ru
+GEM/wiki_lingua_th
+GEM/wiki_lingua_tr
+GEM/wiki_lingua_vi
+GEM/wiki_lingua_zh
+gem_xsum
+gem_xsum_challenge_sample
+gem_xsum_challenge_test_backtranslation
+gem_xsum_challenge_test_bfp_02
+gem_xsum_challenge_test_bfp_05
+gem_xsum_challenge_test_covid
+gem_xsum_challenge_test_nopunc
+axb
+axg
+boolq
+cb
+cola
+copa
+crows_pairs_english
+crows_pairs_french
+diabla
+e2e_nlg_cleaned
+mnli
+mnli_mismatched
+multirc
+piaf
+qqp
+rte
+sst
+tydiqa_primary
+tydiqa_secondary
+wic
+wsc
+wnli
+wino_bias_type1_anti
+wino_bias_type1_pro
+wino_bias_type2_anti
+wino_bias_type2_pro
+xquad_ar
+xquad_en
+)
+
+if [ "${#TASKS[@]}" -eq "$SLURM_ARRAY_TASK_COUNT" ];
+    then
+    echo "Please update the array size as the it doesn't correspond to the number of models we want to evaluate. Array size: $SLURM_ARRAY_TASK_COUNT, number of models: ${#TASKS[@]}"
+    exit 1
+fi
+TASK=${TASKS[$SLURM_ARRAY_TASK_ID]}
+
+BLOOM_FOLDER=$WORK/bloom/bloom-176b/$TASK
+mkdir -p $OPT_FOLDER
+
+CMD="./tasks/eval_harness/evaluate_bsevalharness.py  \
+    --load $CHECKPOINT_PATH \
+    --results_path $BLOOM_FOLDER/eai_results.json \
+    --tensor-model-parallel-size $TP_SIZE  \
+    --pipeline-model-parallel-size $PP_SIZE \
+    --micro-batch-size $EVAL_MICRO_BATCH_SIZE \
+    --no-load-optim \
+    --no-load-rng \
+    --inference \
+    --task_list $TASK\
+    --deepspeed \
+    --deepspeed_config ds_config.json \
+    --intermed_results \
+    --adaptive_seq_len \
+    --micro_bs_multiplier 16 \
+    --offloadearly \
+    $MEGATRON_REQUIRED_ARGS \
+    "
+
+GPUS_PER_NODE=8
+NNODES=$SLURM_NNODES
+MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1)
+MASTER_PORT=6000
+export LAUNCHER="python -u -m torch.distributed.run \
+    --nproc_per_node $GPUS_PER_NODE \
+    --nnodes $NNODES \
+    --rdzv_endpoint $MASTER_ADDR:$MASTER_PORT \
+    --rdzv_backend c10d \
+    --max_restarts 0 \
+    --tee 3 \
+    "
+
+export CUDA_LAUNCH_BLOCKING=1
+
+echo $LAUNCHER $CMD
+
+export PYTHONPATH=$MEGATRON_DEEPSPEED_REPO
+
+$LAUNCHER $CMD 2>&1 | tee $BLOOM_FOLDER/eval-harness.log
diff --git a/evaluation/results/bloom/run_opt_evaluation_175b.slurm b/evaluation/results/bloom/run_opt_evaluation_175b.slurm
new file mode 100644
index 00000000..9084d9bd
--- /dev/null
+++ b/evaluation/results/bloom/run_opt_evaluation_175b.slurm
@@ -0,0 +1,160 @@
+#!/bin/bash
+#SBATCH --job-name=eai-eval-bloom-176b
+#SBATCH --partition=gpu_p5
+#SBATCH --constraint=a100
+#SBATCH --reservation=hug
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=1          # crucial - only 1 task per dist per node!
+#SBATCH --cpus-per-task=64           # number of cores per tasks
+#SBATCH --hint=nomultithread         # we get physical cores not logical
+#SBATCH --gres=gpu:8                 # number of gpus
+#SBATCH --time 20:00:00              # maximum execution time (HH:MM:SS)
+#SBATCH --output=/gpfswork/rech/six/uty16tp/logs/%x-%j.out           # output file name
+#SBATCH --account=six@a100
+#SBATCH --array=0-27
+
+set -x -e
+
+source $six_ALL_CCFRWORK/start-py38-pt111
+# Required in order to load the opt tokenizer
+conda activate thomas_lm_eval
+
+echo "START TIME: $(date)"
+
+
+CHECKPOINT_PATH=$six_ALL_CCFRSCRATCH/opt/opt-175b-meg-ds
+MEGATRON_DEEPSPEED_REPO=/gpfswork/rech/six/uty16tp/code/big_science/Megatron-DeepSpeed-bloom
+export HF_DATASETS_OFFLINE=1
+export TRANSFORMERS_OFFLINE=1
+
+export TRANSFORMERS_CACHE=$six_ALL_CCFRWORK/models
+export HF_DATASETS_CACHE=$six_ALL_CCFRWORK/datasets
+export HF_MODULES_CACHE=$six_ALL_CCFRWORK/modules
+export HF_METRICS_CACHE=$six_ALL_CCFRWORK/metrics
+
+cd $MEGATRON_DEEPSPEED_REPO
+
+# Make sure you use the slow version of the tokenizer.
+# Same tokenizer for 125m and 175b
+TOKENIZER_NAME_OR_PATH=bigscience/tokenizer
+
+PP_SIZE=8
+TP_SIZE=1
+SEQ_LEN=2048
+
+# different from the training MICRO_BATCH_SIZE - no optim memory, so can do bigger BS
+# make as big as it can fit into gpu w/o OOM, but not too close to 100%
+EVAL_MICRO_BATCH_SIZE=1
+
+#dummy arguments to make megatron happy.
+MEGATRON_REQUIRED_ARGS=" \
+    --num-layers -1 \
+    --hidden-size -1 \
+    --num-attention-heads -1 \
+    --seq-length -1  \
+    --max-position-embeddings -1 \
+"
+
+
+ZERO_STAGE=0
+
+config_json="./ds_config.json"
+
+# Deepspeed figures out GAS dynamically from dynamic GBS via set_train_batch_size()
+cat <<EOT > $config_json
+{
+  "train_micro_batch_size_per_gpu": 1,
+  "train_batch_size": 1,
+  "gradient_clipping": 1.0,
+  "zero_optimization": {
+    "stage": $ZERO_STAGE
+  },
+  "bf16": {
+    "enabled": true
+  },
+  "steps_per_print": 2000,
+  "wall_clock_breakdown": false
+}
+EOT
+
+# --task_list arc_challenge,arc_easy,boolq,copa,headqa,hellaswag,lambada,logiqa,mathqa,mc_taco,mrpc,multirc,openbookqa,piqa,prost,pubmedqa,qnli,qqp,race,rte,sciq,sst,triviaqa,webqs,wic,winogrande,wnli,wsc \
+TASKS=(
+arc_challenge
+arc_easy
+boolq
+copa
+headqa
+hellaswag
+lambada
+logiqa
+mathqa
+mc_taco
+mrpc
+multirc
+openbookqa
+piqa
+prost
+pubmedqa
+qnli
+qqp
+race
+rte
+sciq
+sst
+triviaqa
+webqs
+wic
+winogrande
+wnli
+wsc
+)
+
+if [ "${#TASKS[@]}" -eq "$SLURM_ARRAY_TASK_COUNT" ];
+    then
+    echo "Please update the array size as the it doesn't correspond to the number of models we want to evaluate. Array size: $SLURM_ARRAY_TASK_COUNT, number of models: ${#TASKS[@]}"
+    exit 1
+fi
+TASK=${TASKS[$SLURM_ARRAY_TASK_ID]}
+
+BLOOM_FOLDER=$WORK/bloom/bloom-176b/$TASK
+mkdir -p $OPT_FOLDER
+
+CMD="./tasks/eval_harness/evaluate.py  \
+    --load $CHECKPOINT_PATH \
+    --results_path $BLOOM_FOLDER/eai_results.json \
+    --tensor-model-parallel-size $TP_SIZE  \
+    --pipeline-model-parallel-size $PP_SIZE \
+    --micro-batch-size $EVAL_MICRO_BATCH_SIZE \
+    --no-load-optim \
+    --no-load-rng \
+    --inference \
+    --task_list $TASK\
+    --deepspeed \
+    --deepspeed_config ds_config.json \
+    --intermed_results \
+    --adaptive_seq_len \
+    --micro_bs_multiplier 16 \
+    --offloadearly \
+    $MEGATRON_REQUIRED_ARGS \
+    "
+
+GPUS_PER_NODE=8
+NNODES=$SLURM_NNODES
+MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1)
+MASTER_PORT=6000
+export LAUNCHER="python -u -m torch.distributed.run \
+    --nproc_per_node $GPUS_PER_NODE \
+    --nnodes $NNODES \
+    --rdzv_endpoint $MASTER_ADDR:$MASTER_PORT \
+    --rdzv_backend c10d \
+    --max_restarts 0 \
+    --tee 3 \
+    "
+
+export CUDA_LAUNCH_BLOCKING=1
+
+echo $LAUNCHER $CMD
+
+export PYTHONPATH=$MEGATRON_DEEPSPEED_REPO
+
+$LAUNCHER $CMD 2>&1 | tee $BLOOM_FOLDER/eval-harness.log

From 9cc3e7b31669b6167e3b951d5a43e4fec8dd27bb Mon Sep 17 00:00:00 2001
From: thomasw21 <24695242+thomasw21@users.noreply.github.com>
Date: Wed, 6 Jul 2022 15:11:37 +0200
Subject: [PATCH 25/35] Rename

---
 ..._opt_evaluation_175b.slurm => run_bloom_evaluation_175b.slurm} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename evaluation/results/bloom/{run_opt_evaluation_175b.slurm => run_bloom_evaluation_175b.slurm} (100%)

diff --git a/evaluation/results/bloom/run_opt_evaluation_175b.slurm b/evaluation/results/bloom/run_bloom_evaluation_175b.slurm
similarity index 100%
rename from evaluation/results/bloom/run_opt_evaluation_175b.slurm
rename to evaluation/results/bloom/run_bloom_evaluation_175b.slurm

From dbaa0db17ea4e5ea29514811db9ee66ebeca5a1a Mon Sep 17 00:00:00 2001
From: thomasw21 <24695242+thomasw21@users.noreply.github.com>
Date: Wed, 6 Jul 2022 15:16:02 +0200
Subject: [PATCH 26/35] Woops

---
 evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm | 2 +-
 evaluation/results/bloom/run_bloom_evaluation_175b.slurm    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm b/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm
index 63cd1d4f..061d6982 100644
--- a/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm
+++ b/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm
@@ -159,7 +159,7 @@ fi
 TASK=${TASKS[$SLURM_ARRAY_TASK_ID]}
 
 BLOOM_FOLDER=$WORK/bloom/bloom-176b/$TASK
-mkdir -p $OPT_FOLDER
+mkdir -p $BLOOM_FOLDER
 
 CMD="./tasks/eval_harness/evaluate_bsevalharness.py  \
     --load $CHECKPOINT_PATH \
diff --git a/evaluation/results/bloom/run_bloom_evaluation_175b.slurm b/evaluation/results/bloom/run_bloom_evaluation_175b.slurm
index 9084d9bd..d71dc57c 100644
--- a/evaluation/results/bloom/run_bloom_evaluation_175b.slurm
+++ b/evaluation/results/bloom/run_bloom_evaluation_175b.slurm
@@ -117,7 +117,7 @@ fi
 TASK=${TASKS[$SLURM_ARRAY_TASK_ID]}
 
 BLOOM_FOLDER=$WORK/bloom/bloom-176b/$TASK
-mkdir -p $OPT_FOLDER
+mkdir -p $BLOOM_FOLDER
 
 CMD="./tasks/eval_harness/evaluate.py  \
     --load $CHECKPOINT_PATH \

From 1f9ccfcda07e2bcdac5a2420b39d9e7c2d64d6fb Mon Sep 17 00:00:00 2001
From: thomasw21 <24695242+thomasw21@users.noreply.github.com>
Date: Wed, 6 Jul 2022 15:32:03 +0200
Subject: [PATCH 27/35] prevent tokenizer parallelism

---
 evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm | 1 +
 evaluation/results/bloom/run_bloom_evaluation_175b.slurm    | 1 +
 2 files changed, 2 insertions(+)

diff --git a/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm b/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm
index 061d6982..3790d81d 100644
--- a/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm
+++ b/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm
@@ -30,6 +30,7 @@ export TRANSFORMERS_CACHE=$six_ALL_CCFRWORK/models
 export HF_DATASETS_CACHE=$six_ALL_CCFRWORK/datasetseval
 export HF_MODULES_CACHE=$six_ALL_CCFRWORK/modules
 export HF_METRICS_CACHE=$six_ALL_CCFRWORK/metrics
+export TOKENIZERS_PARALLELISM=false
 
 cd $MEGATRON_DEEPSPEED_REPO
 
diff --git a/evaluation/results/bloom/run_bloom_evaluation_175b.slurm b/evaluation/results/bloom/run_bloom_evaluation_175b.slurm
index d71dc57c..99ea0a16 100644
--- a/evaluation/results/bloom/run_bloom_evaluation_175b.slurm
+++ b/evaluation/results/bloom/run_bloom_evaluation_175b.slurm
@@ -31,6 +31,7 @@ export TRANSFORMERS_CACHE=$six_ALL_CCFRWORK/models
 export HF_DATASETS_CACHE=$six_ALL_CCFRWORK/datasets
 export HF_MODULES_CACHE=$six_ALL_CCFRWORK/modules
 export HF_METRICS_CACHE=$six_ALL_CCFRWORK/metrics
+export TOKENIZERS_PARALLELISM=false
 
 cd $MEGATRON_DEEPSPEED_REPO
 

From f94c925f4f477f921ef9e00c9c231bc92ad8a3e5 Mon Sep 17 00:00:00 2001
From: thomasw21 <24695242+thomasw21@users.noreply.github.com>
Date: Wed, 6 Jul 2022 15:45:05 +0200
Subject: [PATCH 28/35] Woops overrided value

---
 evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm | 4 ++--
 evaluation/results/opt/run_opt_bs_evaluation_175b.slurm     | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm b/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm
index 3790d81d..ad81f77a 100644
--- a/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm
+++ b/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm
@@ -164,7 +164,7 @@ mkdir -p $BLOOM_FOLDER
 
 CMD="./tasks/eval_harness/evaluate_bsevalharness.py  \
     --load $CHECKPOINT_PATH \
-    --results_path $BLOOM_FOLDER/eai_results.json \
+    --results_path $BLOOM_FOLDER/bs_results.json \
     --tensor-model-parallel-size $TP_SIZE  \
     --pipeline-model-parallel-size $PP_SIZE \
     --micro-batch-size $EVAL_MICRO_BATCH_SIZE \
@@ -200,4 +200,4 @@ echo $LAUNCHER $CMD
 
 export PYTHONPATH=$MEGATRON_DEEPSPEED_REPO
 
-$LAUNCHER $CMD 2>&1 | tee $BLOOM_FOLDER/eval-harness.log
+$LAUNCHER $CMD 2>&1 | tee $BLOOM_FOLDER/bs-eval-harness.log
diff --git a/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm b/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm
index 729b5484..c9c8c808 100644
--- a/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm
+++ b/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm
@@ -180,7 +180,7 @@ mkdir -p $OPT_FOLDER
 
 CMD="./tasks/eval_harness/evaluate_bsevalharness.py  \
     --load $CHECKPOINT_PATH \
-    --results_path $OPT_FOLDER/eai_results.json \
+    --results_path $OPT_FOLDER/bs_results.json \
     --tensor-model-parallel-size $TP_SIZE  \
     --pipeline-model-parallel-size $PP_SIZE \
     --micro-batch-size $EVAL_MICRO_BATCH_SIZE \
@@ -216,4 +216,4 @@ echo $LAUNCHER $CMD
 
 export PYTHONPATH=$MEGATRON_DEEPSPEED_REPO
 
-$LAUNCHER $CMD 2>&1 | tee $OPT_FOLDER/eval-harness.log
+$LAUNCHER $CMD 2>&1 | tee $OPT_FOLDER/bs-eval-harness.log

From e282bf293e1195df6f48a122f4ab45a103ee2f79 Mon Sep 17 00:00:00 2001
From: thomasw21 <24695242+thomasw21@users.noreply.github.com>
Date: Wed, 6 Jul 2022 17:31:26 +0200
Subject: [PATCH 29/35] Fix 125m opt evaluation script

---
 .../opt/run_opt_bs_evaluation_125m.slurm      | 83 ++++++++++++++-----
 1 file changed, 63 insertions(+), 20 deletions(-)

diff --git a/evaluation/results/opt/run_opt_bs_evaluation_125m.slurm b/evaluation/results/opt/run_opt_bs_evaluation_125m.slurm
index d6a70416..815dc083 100644
--- a/evaluation/results/opt/run_opt_bs_evaluation_125m.slurm
+++ b/evaluation/results/opt/run_opt_bs_evaluation_125m.slurm
@@ -91,35 +91,78 @@ cat <<EOT > $config_json
 EOT
 
 # --task_list GEM/web_nlg_en,GEM/web_nlg_en_challenge_test_numbers,GEM/web_nlg_en_challenge_test_scramble,GEM/web_nlg_en_challenge_validation_sample,GEM/web_nlg_ru,GEM/web_nlg_ru_challenge_test_scramble,GEM/web_nlg_ru_challenge_validation_sample,GEM/wiki_auto_asset_turk_challenge_test_asset_backtranslation,GEM/wiki_auto_asset_turk_challenge_test_asset_bfp02,GEM/wiki_auto_asset_turk_challenge_test_asset_bfp05,GEM/wiki_auto_asset_turk_challenge_test_asset_nopunc,GEM/wiki_auto_asset_turk_challenge_test_turk_backtranslation,GEM/wiki_auto_asset_turk_challenge_test_turk_bfp02,GEM/wiki_auto_asset_turk_challenge_test_turk_bfp05,GEM/wiki_auto_asset_turk_challenge_test_turk_nopunc,GEM/wiki_auto_asset_turk_test_asset,GEM/wiki_auto_asset_turk_test_turk,GEM/wiki_lingua_ar,GEM/wiki_lingua_cs,GEM/wiki_lingua_de,GEM/wiki_lingua_en,GEM/wiki_lingua_es,GEM/wiki_lingua_fr,GEM/wiki_lingua_hi,GEM/wiki_lingua_id,GEM/wiki_lingua_it,GEM/wiki_lingua_ja,GEM/wiki_lingua_ko,GEM/wiki_lingua_nl,GEM/wiki_lingua_pt,GEM/wiki_lingua_ru,GEM/wiki_lingua_th,GEM/wiki_lingua_tr,GEM/wiki_lingua_vi,GEM/wiki_lingua_zh,gem_xsum,gem_xsum_challenge_sample,gem_xsum_challenge_test_backtranslation,gem_xsum_challenge_test_bfp_02,gem_xsum_challenge_test_bfp_05,gem_xsum_challenge_test_covid,gem_xsum_challenge_test_nopunc,axb,axg,boolq,cb,cola,copa,crows_pairs_english,crows_pairs_french,diabla,e2e_nlg_cleaned,mnli,mnli_mismatched,multirc,piaf,qqp,rte,sst,tydiqa_primary,tydiqa_secondary,wic,wsc,wnli,wino_bias_type1_anti,wino_bias_type1_pro,wino_bias_type2_anti,wino_bias_type2_pro,xquad_ar,xquad_en\
+
 TASKS=(
-arc_challenge
-arc_easy
+GEM/web_nlg_en
+GEM/web_nlg_en_challenge_test_numbers
+GEM/web_nlg_en_challenge_test_scramble
+GEM/web_nlg_en_challenge_validation_sample
+GEM/web_nlg_ru
+GEM/web_nlg_ru_challenge_test_scramble
+GEM/web_nlg_ru_challenge_validation_sample
+GEM/wiki_auto_asset_turk_challenge_test_asset_backtranslation
+GEM/wiki_auto_asset_turk_challenge_test_asset_bfp02
+GEM/wiki_auto_asset_turk_challenge_test_asset_bfp05
+GEM/wiki_auto_asset_turk_challenge_test_asset_nopunc
+GEM/wiki_auto_asset_turk_challenge_test_turk_backtranslation
+GEM/wiki_auto_asset_turk_challenge_test_turk_bfp02
+GEM/wiki_auto_asset_turk_challenge_test_turk_bfp05
+GEM/wiki_auto_asset_turk_challenge_test_turk_nopunc
+GEM/wiki_auto_asset_turk_test_asset
+GEM/wiki_auto_asset_turk_test_turk
+GEM/wiki_lingua_ar
+GEM/wiki_lingua_cs
+GEM/wiki_lingua_de
+GEM/wiki_lingua_en
+GEM/wiki_lingua_es
+GEM/wiki_lingua_fr
+GEM/wiki_lingua_hi
+GEM/wiki_lingua_id
+GEM/wiki_lingua_it
+GEM/wiki_lingua_ja
+GEM/wiki_lingua_ko
+GEM/wiki_lingua_nl
+GEM/wiki_lingua_pt
+GEM/wiki_lingua_ru
+GEM/wiki_lingua_th
+GEM/wiki_lingua_tr
+GEM/wiki_lingua_vi
+GEM/wiki_lingua_zh
+gem_xsum
+gem_xsum_challenge_sample
+gem_xsum_challenge_test_backtranslation
+gem_xsum_challenge_test_bfp_02
+gem_xsum_challenge_test_bfp_05
+gem_xsum_challenge_test_covid
+gem_xsum_challenge_test_nopunc
+axb
+axg
 boolq
+cb
+cola
 copa
-headqa
-hellaswag
-lambada
-logiqa
-mathqa
-mc_taco
-mrpc
+crows_pairs_english
+crows_pairs_french
+diabla
+e2e_nlg_cleaned
+mnli
+mnli_mismatched
 multirc
-openbookqa
-piqa
-prost
-pubmedqa
-qnli
+piaf
 qqp
-race
 rte
-sciq
 sst
-triviaqa
-webqs
+tydiqa_primary
+tydiqa_secondary
 wic
-winogrande
-wnli
 wsc
+wnli
+wino_bias_type1_anti
+wino_bias_type1_pro
+wino_bias_type2_anti
+wino_bias_type2_pro
+xquad_ar
+xquad_en
 )
 
 if [ "${#TASKS[@]}" -eq "$SLURM_ARRAY_TASK_COUNT" ];

From 21a5865c29377c143f0c99abc05f5c9c12ad87c9 Mon Sep 17 00:00:00 2001
From: thomasw21 <24695242+thomasw21@users.noreply.github.com>
Date: Wed, 6 Jul 2022 22:56:58 +0200
Subject: [PATCH 30/35] Woops

---
 evaluation/results/bloom/run_bloom_evaluation_175b.slurm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/evaluation/results/bloom/run_bloom_evaluation_175b.slurm b/evaluation/results/bloom/run_bloom_evaluation_175b.slurm
index 99ea0a16..3ba5622e 100644
--- a/evaluation/results/bloom/run_bloom_evaluation_175b.slurm
+++ b/evaluation/results/bloom/run_bloom_evaluation_175b.slurm
@@ -22,7 +22,7 @@ conda activate thomas_lm_eval
 echo "START TIME: $(date)"
 
 
-CHECKPOINT_PATH=$six_ALL_CCFRSCRATCH/opt/opt-175b-meg-ds
+CHECKPOINT_PATH=$six_ALL_CCFRSCRATCH/checkpoints/tr11-176B-ml/checkpoints/main/global_step95000
 MEGATRON_DEEPSPEED_REPO=/gpfswork/rech/six/uty16tp/code/big_science/Megatron-DeepSpeed-bloom
 export HF_DATASETS_OFFLINE=1
 export TRANSFORMERS_OFFLINE=1

From 04fbe66b07462041068b4b2a41d1d0cb8d7a1f6a Mon Sep 17 00:00:00 2001
From: thomasw21 <24695242+thomasw21@users.noreply.github.com>
Date: Thu, 7 Jul 2022 09:09:37 +0200
Subject: [PATCH 31/35] Add all flores ppl

---
 .../bloom/run_bloom_bs_evaluation_176b.slurm  | 104 +++++++++++++++++-
 .../bloom/run_bloom_evaluation_175b.slurm     |   7 +-
 .../opt/run_opt_bs_evaluation_175b.slurm      | 104 +++++++++++++++++-
 .../results/opt/run_opt_evaluation_175b.slurm |   7 +-
 4 files changed, 218 insertions(+), 4 deletions(-)

diff --git a/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm b/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm
index ad81f77a..21a1ee37 100644
--- a/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm
+++ b/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm
@@ -11,7 +11,7 @@
 #SBATCH --time 20:00:00              # maximum execution time (HH:MM:SS)
 #SBATCH --output=/gpfswork/rech/six/uty16tp/logs/%x-%j.out           # output file name
 #SBATCH --account=six@a100
-#SBATCH --array=0-69
+#SBATCH --array=0-171
 
 set -x -e
 
@@ -150,6 +150,108 @@ wino_bias_type2_anti
 wino_bias_type2_pro
 xquad_ar
 xquad_en
+gsarti/flores_101_afr
+gsarti/flores_101_amh
+gsarti/flores_101_ara
+gsarti/flores_101_hye
+gsarti/flores_101_asm
+gsarti/flores_101_ast
+gsarti/flores_101_azj
+gsarti/flores_101_bel
+gsarti/flores_101_ben
+gsarti/flores_101_bos
+gsarti/flores_101_bul
+gsarti/flores_101_mya
+gsarti/flores_101_cat
+gsarti/flores_101_ceb
+gsarti/flores_101_zho_simpl
+gsarti/flores_101_zho_trad
+gsarti/flores_101_hrv
+gsarti/flores_101_ces
+gsarti/flores_101_dan
+gsarti/flores_101_nld
+gsarti/flores_101_eng
+gsarti/flores_101_est
+gsarti/flores_101_tgl
+gsarti/flores_101_fin
+gsarti/flores_101_fra
+gsarti/flores_101_ful
+gsarti/flores_101_glg
+gsarti/flores_101_lug
+gsarti/flores_101_kat
+gsarti/flores_101_deu
+gsarti/flores_101_ell
+gsarti/flores_101_guj
+gsarti/flores_101_hau
+gsarti/flores_101_heb
+gsarti/flores_101_hin
+gsarti/flores_101_hun
+gsarti/flores_101_isl
+gsarti/flores_101_ibo
+gsarti/flores_101_ind
+gsarti/flores_101_gle
+gsarti/flores_101_ita
+gsarti/flores_101_jpn
+gsarti/flores_101_jav
+gsarti/flores_101_kea
+gsarti/flores_101_kam
+gsarti/flores_101_kan
+gsarti/flores_101_kaz
+gsarti/flores_101_khm
+gsarti/flores_101_kor
+gsarti/flores_101_kir
+gsarti/flores_101_lao
+gsarti/flores_101_lav
+gsarti/flores_101_lin
+gsarti/flores_101_lit
+gsarti/flores_101_luo
+gsarti/flores_101_ltz
+gsarti/flores_101_mkd
+gsarti/flores_101_msa
+gsarti/flores_101_mal
+gsarti/flores_101_mlt
+gsarti/flores_101_mri
+gsarti/flores_101_mar
+gsarti/flores_101_mon
+gsarti/flores_101_npi
+gsarti/flores_101_nso
+gsarti/flores_101_nob
+gsarti/flores_101_nya
+gsarti/flores_101_oci
+gsarti/flores_101_ory
+gsarti/flores_101_orm
+gsarti/flores_101_pus
+gsarti/flores_101_fas
+gsarti/flores_101_pol
+gsarti/flores_101_por
+gsarti/flores_101_pan
+gsarti/flores_101_ron
+gsarti/flores_101_rus
+gsarti/flores_101_srp
+gsarti/flores_101_sna
+gsarti/flores_101_snd
+gsarti/flores_101_slk
+gsarti/flores_101_slv
+gsarti/flores_101_som
+gsarti/flores_101_ckb
+gsarti/flores_101_spa
+gsarti/flores_101_swh
+gsarti/flores_101_swe
+gsarti/flores_101_tgk
+gsarti/flores_101_tam
+gsarti/flores_101_tel
+gsarti/flores_101_tha
+gsarti/flores_101_tur
+gsarti/flores_101_ukr
+gsarti/flores_101_umb
+gsarti/flores_101_urd
+gsarti/flores_101_uzb
+gsarti/flores_101_vie
+gsarti/flores_101_cym
+gsarti/flores_101_wol
+gsarti/flores_101_xho
+gsarti/flores_101_yor
+gsarti/flores_101_zul
 )
 
 if [ "${#TASKS[@]}" -eq "$SLURM_ARRAY_TASK_COUNT" ];
diff --git a/evaluation/results/bloom/run_bloom_evaluation_175b.slurm b/evaluation/results/bloom/run_bloom_evaluation_175b.slurm
index 3ba5622e..bca13d0b 100644
--- a/evaluation/results/bloom/run_bloom_evaluation_175b.slurm
+++ b/evaluation/results/bloom/run_bloom_evaluation_175b.slurm
@@ -11,7 +11,7 @@
 #SBATCH --time 20:00:00              # maximum execution time (HH:MM:SS)
 #SBATCH --output=/gpfswork/rech/six/uty16tp/logs/%x-%j.out           # output file name
 #SBATCH --account=six@a100
-#SBATCH --array=0-27
+#SBATCH --array=0-32
 
 set -x -e
 
@@ -108,6 +108,11 @@ wic
 winogrande
 wnli
 wsc
+lambada_mt_en
+lambada_mt_fr
+lambada_mt_de
+lambada_mt_it
+lambada_mt_es
 )
 
 if [ "${#TASKS[@]}" -eq "$SLURM_ARRAY_TASK_COUNT" ];
diff --git a/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm b/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm
index c9c8c808..e4520864 100644
--- a/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm
+++ b/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm
@@ -11,7 +11,7 @@
 #SBATCH --time 20:00:00              # maximum execution time (HH:MM:SS)
 #SBATCH --output=/gpfswork/rech/six/uty16tp/logs/%x-%j.out           # output file name
 #SBATCH --account=six@a100
-#SBATCH --array=0-69
+#SBATCH --array=0-171
 
 set -x -e
 
@@ -166,6 +166,108 @@ wino_bias_type2_anti
 wino_bias_type2_pro
 xquad_ar
 xquad_en
+gsarti/flores_101_afr
+gsarti/flores_101_amh
+gsarti/flores_101_ara
+gsarti/flores_101_hye
+gsarti/flores_101_asm
+gsarti/flores_101_ast
+gsarti/flores_101_azj
+gsarti/flores_101_bel
+gsarti/flores_101_ben
+gsarti/flores_101_bos
+gsarti/flores_101_bul
+gsarti/flores_101_mya
+gsarti/flores_101_cat
+gsarti/flores_101_ceb
+gsarti/flores_101_zho_simpl
+gsarti/flores_101_zho_trad
+gsarti/flores_101_hrv
+gsarti/flores_101_ces
+gsarti/flores_101_dan
+gsarti/flores_101_nld
+gsarti/flores_101_eng
+gsarti/flores_101_est
+gsarti/flores_101_tgl
+gsarti/flores_101_fin
+gsarti/flores_101_fra
+gsarti/flores_101_ful
+gsarti/flores_101_glg
+gsarti/flores_101_lug
+gsarti/flores_101_kat
+gsarti/flores_101_deu
+gsarti/flores_101_ell
+gsarti/flores_101_guj
+gsarti/flores_101_hau
+gsarti/flores_101_heb
+gsarti/flores_101_hin
+gsarti/flores_101_hun
+gsarti/flores_101_isl
+gsarti/flores_101_ibo
+gsarti/flores_101_ind
+gsarti/flores_101_gle
+gsarti/flores_101_ita
+gsarti/flores_101_jpn
+gsarti/flores_101_jav
+gsarti/flores_101_kea
+gsarti/flores_101_kam
+gsarti/flores_101_kan
+gsarti/flores_101_kaz
+gsarti/flores_101_khm
+gsarti/flores_101_kor
+gsarti/flores_101_kir
+gsarti/flores_101_lao
+gsarti/flores_101_lav
+gsarti/flores_101_lin
+gsarti/flores_101_lit
+gsarti/flores_101_luo
+gsarti/flores_101_ltz
+gsarti/flores_101_mkd
+gsarti/flores_101_msa
+gsarti/flores_101_mal
+gsarti/flores_101_mlt
+gsarti/flores_101_mri
+gsarti/flores_101_mar
+gsarti/flores_101_mon
+gsarti/flores_101_npi
+gsarti/flores_101_nso
+gsarti/flores_101_nob
+gsarti/flores_101_nya
+gsarti/flores_101_oci
+gsarti/flores_101_ory
+gsarti/flores_101_orm
+gsarti/flores_101_pus
+gsarti/flores_101_fas
+gsarti/flores_101_pol
+gsarti/flores_101_por
+gsarti/flores_101_pan
+gsarti/flores_101_ron
+gsarti/flores_101_rus
+gsarti/flores_101_srp
+gsarti/flores_101_sna
+gsarti/flores_101_snd
+gsarti/flores_101_slk
+gsarti/flores_101_slv
+gsarti/flores_101_som
+gsarti/flores_101_ckb
+gsarti/flores_101_spa
+gsarti/flores_101_swh
+gsarti/flores_101_swe
+gsarti/flores_101_tgk
+gsarti/flores_101_tam
+gsarti/flores_101_tel
+gsarti/flores_101_tha
+gsarti/flores_101_tur
+gsarti/flores_101_ukr
+gsarti/flores_101_umb
+gsarti/flores_101_urd
+gsarti/flores_101_uzb
+gsarti/flores_101_vie
+gsarti/flores_101_cym
+gsarti/flores_101_wol
+gsarti/flores_101_xho
+gsarti/flores_101_yor
+gsarti/flores_101_zul
 )
 
 if [ "${#TASKS[@]}" -eq "$SLURM_ARRAY_TASK_COUNT" ];
diff --git a/evaluation/results/opt/run_opt_evaluation_175b.slurm b/evaluation/results/opt/run_opt_evaluation_175b.slurm
index 7726799a..10a7ea2d 100644
--- a/evaluation/results/opt/run_opt_evaluation_175b.slurm
+++ b/evaluation/results/opt/run_opt_evaluation_175b.slurm
@@ -11,7 +11,7 @@
 #SBATCH --time 20:00:00              # maximum execution time (HH:MM:SS)
 #SBATCH --output=/gpfswork/rech/six/uty16tp/logs/%x-%j.out           # output file name
 #SBATCH --account=six@a100
-#SBATCH --array=0-27
+#SBATCH --array=0-32
 
 set -x -e
 
@@ -124,6 +124,11 @@ wic
 winogrande
 wnli
 wsc
+lambada_mt_en
+lambada_mt_fr
+lambada_mt_de
+lambada_mt_it
+lambada_mt_es
 )
 
 if [ "${#TASKS[@]}" -eq "$SLURM_ARRAY_TASK_COUNT" ];

From 2eba492605813a36a31959bc6e31e8087cfe6b11 Mon Sep 17 00:00:00 2001
From: thomasw21 <24695242+thomasw21@users.noreply.github.com>
Date: Thu, 7 Jul 2022 09:11:32 +0200
Subject: [PATCH 32/35] Comment out quite annoying check

---
 .../results/bloom/run_bloom_bs_evaluation_176b.slurm   | 10 +++++-----
 .../results/bloom/run_bloom_evaluation_175b.slurm      | 10 +++++-----
 .../results/opt/run_opt_bs_evaluation_125m.slurm       | 10 +++++-----
 .../results/opt/run_opt_bs_evaluation_175b.slurm       | 10 +++++-----
 evaluation/results/opt/run_opt_evaluation_125m.slurm   | 10 +++++-----
 evaluation/results/opt/run_opt_evaluation_175b.slurm   | 10 +++++-----
 6 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm b/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm
index 21a1ee37..5e24f21d 100644
--- a/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm
+++ b/evaluation/results/bloom/run_bloom_bs_evaluation_176b.slurm
@@ -254,11 +254,11 @@ gsarti/flores_101_yor
 gsarti/flores_101_zul
 )
 
-if [ "${#TASKS[@]}" -eq "$SLURM_ARRAY_TASK_COUNT" ];
-    then
-    echo "Please update the array size as the it doesn't correspond to the number of models we want to evaluate. Array size: $SLURM_ARRAY_TASK_COUNT, number of models: ${#TASKS[@]}"
-    exit 1
-fi
+#if [ "${#TASKS[@]}" -ne "$SLURM_ARRAY_TASK_COUNT" ];
+#    then
+#    echo "Please update the array size as the it doesn't correspond to the number of models we want to evaluate. Array size: $SLURM_ARRAY_TASK_COUNT, number of models: ${#TASKS[@]}"
+#    exit 1
+#fi
 TASK=${TASKS[$SLURM_ARRAY_TASK_ID]}
 
 BLOOM_FOLDER=$WORK/bloom/bloom-176b/$TASK
diff --git a/evaluation/results/bloom/run_bloom_evaluation_175b.slurm b/evaluation/results/bloom/run_bloom_evaluation_175b.slurm
index bca13d0b..679ae7d2 100644
--- a/evaluation/results/bloom/run_bloom_evaluation_175b.slurm
+++ b/evaluation/results/bloom/run_bloom_evaluation_175b.slurm
@@ -115,11 +115,11 @@ lambada_mt_it
 lambada_mt_es
 )
 
-if [ "${#TASKS[@]}" -eq "$SLURM_ARRAY_TASK_COUNT" ];
-    then
-    echo "Please update the array size as the it doesn't correspond to the number of models we want to evaluate. Array size: $SLURM_ARRAY_TASK_COUNT, number of models: ${#TASKS[@]}"
-    exit 1
-fi
+#if [ "${#TASKS[@]}" -ne "$SLURM_ARRAY_TASK_COUNT" ];
+#    then
+#    echo "Please update the array size as the it doesn't correspond to the number of models we want to evaluate. Array size: $SLURM_ARRAY_TASK_COUNT, number of models: ${#TASKS[@]}"
+#    exit 1
+#fi
 TASK=${TASKS[$SLURM_ARRAY_TASK_ID]}
 
 BLOOM_FOLDER=$WORK/bloom/bloom-176b/$TASK
diff --git a/evaluation/results/opt/run_opt_bs_evaluation_125m.slurm b/evaluation/results/opt/run_opt_bs_evaluation_125m.slurm
index 815dc083..bf4afde9 100644
--- a/evaluation/results/opt/run_opt_bs_evaluation_125m.slurm
+++ b/evaluation/results/opt/run_opt_bs_evaluation_125m.slurm
@@ -165,11 +165,11 @@ xquad_ar
 xquad_en
 )
 
-if [ "${#TASKS[@]}" -eq "$SLURM_ARRAY_TASK_COUNT" ];
-    then
-    echo "Please update the array size as the it doesn't correspond to the number of models we want to evaluate. Array size: $SLURM_ARRAY_TASK_COUNT, number of models: ${#TASKS[@]}"
-    exit 1
-fi
+#if [ "${#TASKS[@]}" -ne "$SLURM_ARRAY_TASK_COUNT" ];
+#    then
+#    echo "Please update the array size as the it doesn't correspond to the number of models we want to evaluate. Array size: $SLURM_ARRAY_TASK_COUNT, number of models: ${#TASKS[@]}"
+#    exit 1
+#fi
 TASK=${TASKS[$SLURM_ARRAY_TASK_ID]}
 
 OPT_FOLDER=$WORK/opt/opt-125m/$TASK
diff --git a/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm b/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm
index e4520864..097581be 100644
--- a/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm
+++ b/evaluation/results/opt/run_opt_bs_evaluation_175b.slurm
@@ -270,11 +270,11 @@ gsarti/flores_101_yor
 gsarti/flores_101_zul
 )
 
-if [ "${#TASKS[@]}" -eq "$SLURM_ARRAY_TASK_COUNT" ];
-    then
-    echo "Please update the array size as the it doesn't correspond to the number of models we want to evaluate. Array size: $SLURM_ARRAY_TASK_COUNT, number of models: ${#TASKS[@]}"
-    exit 1
-fi
+#if [ "${#TASKS[@]}" -ne "$SLURM_ARRAY_TASK_COUNT" ];
+#    then
+#    echo "Please update the array size as the it doesn't correspond to the number of models we want to evaluate. Array size: $SLURM_ARRAY_TASK_COUNT, number of models: ${#TASKS[@]}"
+#    exit 1
+#fi
 TASK=${TASKS[$SLURM_ARRAY_TASK_ID]}
 
 OPT_FOLDER=$WORK/opt/opt-175b/$TASK
diff --git a/evaluation/results/opt/run_opt_evaluation_125m.slurm b/evaluation/results/opt/run_opt_evaluation_125m.slurm
index 2ef24b40..cc657325 100644
--- a/evaluation/results/opt/run_opt_evaluation_125m.slurm
+++ b/evaluation/results/opt/run_opt_evaluation_125m.slurm
@@ -123,11 +123,11 @@ wnli
 wsc
 )
 
-if [ "${#TASKS[@]}" -eq "$SLURM_ARRAY_TASK_COUNT" ];
-    then
-    echo "Please update the array size as the it doesn't correspond to the number of models we want to evaluate. Array size: $SLURM_ARRAY_TASK_COUNT, number of models: ${#TASKS[@]}"
-    exit 1
-fi
+#if [ "${#TASKS[@]}" -ne "$SLURM_ARRAY_TASK_COUNT" ];
+#    then
+#    echo "Please update the array size as the it doesn't correspond to the number of models we want to evaluate. Array size: $SLURM_ARRAY_TASK_COUNT, number of models: ${#TASKS[@]}"
+#    exit 1
+#fi
 TASK=${TASKS[$SLURM_ARRAY_TASK_ID]}
 
 OPT_FOLDER=$WORK/opt/opt-125m/$TASK
diff --git a/evaluation/results/opt/run_opt_evaluation_175b.slurm b/evaluation/results/opt/run_opt_evaluation_175b.slurm
index 10a7ea2d..515564cd 100644
--- a/evaluation/results/opt/run_opt_evaluation_175b.slurm
+++ b/evaluation/results/opt/run_opt_evaluation_175b.slurm
@@ -131,11 +131,11 @@ lambada_mt_it
 lambada_mt_es
 )
 
-if [ "${#TASKS[@]}" -eq "$SLURM_ARRAY_TASK_COUNT" ];
-    then
-    echo "Please update the array size as the it doesn't correspond to the number of models we want to evaluate. Array size: $SLURM_ARRAY_TASK_COUNT, number of models: ${#TASKS[@]}"
-    exit 1
-fi
+#if [ "${#TASKS[@]}" -ne "$SLURM_ARRAY_TASK_COUNT" ];
+#    then
+#    echo "Please update the array size as the it doesn't correspond to the number of models we want to evaluate. Array size: $SLURM_ARRAY_TASK_COUNT, number of models: ${#TASKS[@]}"
+#    exit 1
+#fi
 TASK=${TASKS[$SLURM_ARRAY_TASK_ID]}
 
 OPT_FOLDER=$WORK/opt/opt-175b/$TASK

From ceb785b020195888cc42cbb3c3b2b217b22487e6 Mon Sep 17 00:00:00 2001
From: thomasw21 <24695242+thomasw21@users.noreply.github.com>
Date: Thu, 7 Jul 2022 15:36:49 +0200
Subject: [PATCH 33/35] Fix the script to work to query both EAI and BS results

---
 .../results/opt/concatenate_all_results.py    | 79 ++++++++++++++-----
 1 file changed, 60 insertions(+), 19 deletions(-)

diff --git a/evaluation/results/opt/concatenate_all_results.py b/evaluation/results/opt/concatenate_all_results.py
index 62316c10..21f67424 100644
--- a/evaluation/results/opt/concatenate_all_results.py
+++ b/evaluation/results/opt/concatenate_all_results.py
@@ -2,6 +2,8 @@
 import json
 import re
 from pathlib import Path
+from re import Pattern
+from typing import List, Dict
 
 
 def get_args():
@@ -10,28 +12,46 @@ def get_args():
     parser.add_argument("--concatenate-output-file", required=True, type=Path, help="Path to store the final output file")
     return parser.parse_args()
 
-def main():
-    args = get_args()
-
-    # Get all json files
+MODEL = "opt-175b-meg-ds"
+# MODEL = "global_step95000"
+RESULTS_REGEX = re.compile(rf"(eai|bs)_results_lm-eval_{MODEL}_(\d{4}-\d{2}-\d{2}-\d{2}-\d{2}-\d{2})_backup\.json")
+def get_all_files_that_match_results_in_folder(root_folder: Path, regex: Pattern) -> List[Path]:
     json_files = []
-    for folder in args.results_dir.iterdir():
-        if folder.is_file():
-            continue
-        for file in folder.iterdir():
-            if file.is_dir():
-                continue
-            match = re.match(
-                r"(?:eai|bs)_results_lm-eval_opt-175b-meg-ds_(?:\d{4})-(?:\d{2})-(?:\d{2})-(?:\d{2})-(?:\d{2})-(?:\d{2})\.json",
-                file.name,
-            )
+    for folder in root_folder.iterdir():
+        if folder.is_dir():
+            json_files += get_all_files_that_match_results_in_folder(folder)
+        else:
+            # it's actually a file
+            file = folder
+
+            match = RESULTS_REGEX.match(file.name)
 
             if match is None:
                 continue
             else:
-                # TODO @thomasw21 some folder can have multiple results we should take the latest
                 json_files.append(file)
-                break
+    return json_files
+
+def sort_dict(dictionary: Dict) -> Dict:
+    results = {}
+
+    for key, value in sorted(dictionary.items(), key=lambda item: item[1]):
+        new_value = value
+
+        if isinstance(value, dict):
+            new_value = sort_dict(new_value)
+        elif isinstance(value, list):
+            new_value = sorted(value)
+
+        results[key] = new_value
+
+    return results
+
+def main():
+    args = get_args()
+
+    # Get all json files
+    json_files = get_all_files_that_match_results_in_folder(args.results_dir)
 
     # Merge all json files
     final_result = {
@@ -42,15 +62,36 @@ def main():
         with open(file, "r") as fi:
             task_result = json.load(fi)
 
-        for key, value in task_result["results"].items():
-            final_result["results"][key] = value
+        match = RESULTS_REGEX.match(file.name)
+        assert match is not None
+        prefix = match.group(1)
+        datetime_string = match.group(2)
+
+        if prefix == "eai":
+            results_key = "results"
+        elif prefix == "bs":
+            results_key = "table_results"
+        else:
+            raise ValueError(f"Unsupported key: {prefix}")
+
+        for key, value in task_result[results_key].items():
+            if key not in final_result["results"]:
+                final_result["results"][key] = {
+                    datetime_string: value
+                }
+            else:
+                assert datetime_string not in final_result["results"][key]
+                final_result["results"][key][datetime_string] = value
 
         for key, value in task_result["versions"].items():
             final_result["versions"][key] = value
 
+    # We sort dict, better for serialization
+    final_result = sort_dict(final_result)
+
     # Save result
     with open(args.concatenate_output_file, "w") as fo:
-        json.dump(final_result, fo)
+        json.dump(final_result, fo, indent=2)
 
     pass
 

From c99efd8a3933601c3b214163411476eeaf50324c Mon Sep 17 00:00:00 2001
From: Niklas Muennighoff <n.muennighoff@gmail.com>
Date: Fri, 15 Jul 2022 12:21:33 +0200
Subject: [PATCH 34/35] Update
 evaluation/results/opt/concatenate_all_results.py

Co-authored-by: Thomas Wang <24695242+thomasw21@users.noreply.github.com>
---
 evaluation/results/opt/concatenate_all_results.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/evaluation/results/opt/concatenate_all_results.py b/evaluation/results/opt/concatenate_all_results.py
index 21f67424..dc2ff023 100644
--- a/evaluation/results/opt/concatenate_all_results.py
+++ b/evaluation/results/opt/concatenate_all_results.py
@@ -35,7 +35,7 @@ def get_all_files_that_match_results_in_folder(root_folder: Path, regex: Pattern
 def sort_dict(dictionary: Dict) -> Dict:
     results = {}
 
-    for key, value in sorted(dictionary.items(), key=lambda item: item[1]):
+    for key, value in sorted(dictionary.items(), key=lambda item: item[0]):
         new_value = value
 
         if isinstance(value, dict):

From 3b701d0bce66a471c2a40a0ce069edbf195b5559 Mon Sep 17 00:00:00 2001
From: Thomas Wang <24695242+thomasw21@users.noreply.github.com>
Date: Fri, 15 Jul 2022 22:26:09 +0200
Subject: [PATCH 35/35] Update
 evaluation/results/opt/concatenate_all_results.py

Co-authored-by: Niklas Muennighoff <n.muennighoff@gmail.com>
---
 evaluation/results/opt/concatenate_all_results.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/evaluation/results/opt/concatenate_all_results.py b/evaluation/results/opt/concatenate_all_results.py
index dc2ff023..4e8fe616 100644
--- a/evaluation/results/opt/concatenate_all_results.py
+++ b/evaluation/results/opt/concatenate_all_results.py
@@ -15,7 +15,7 @@ def get_args():
 MODEL = "opt-175b-meg-ds"
 # MODEL = "global_step95000"
 RESULTS_REGEX = re.compile(rf"(eai|bs)_results_lm-eval_{MODEL}_(\d{4}-\d{2}-\d{2}-\d{2}-\d{2}-\d{2})_backup\.json")
-def get_all_files_that_match_results_in_folder(root_folder: Path, regex: Pattern) -> List[Path]:
+def get_all_files_that_match_results_in_folder(root_folder: Path) -> List[Path]:
     json_files = []
     for folder in root_folder.iterdir():
         if folder.is_dir():