huggingface · JingyaHuang · Dec 4, 2023 · Oct 19, 2023 · Oct 20, 2023 · Oct 23, 2023
diff --git a/optimum/commands/export/neuronx.py b/optimum/commands/export/neuronx.py
@@ -102,6 +102,12 @@ def parse_args_neuronx(parser: "ArgumentParser"):
         type=int,
         help=f"Sequence length {doc_input}",
     )
+    input_group.add_argument(
+        "--num_beams",
+        type=int,
+        default=1,
+        help=f"Number of beams for beam search {doc_input}",
+    )
     input_group.add_argument(
         "--num_choices",
         type=int,

diff --git a/optimum/exporters/neuron/__main__.py b/optimum/exporters/neuron/__main__.py
@@ -22,14 +22,16 @@
 from typing import TYPE_CHECKING, Any, Dict, Optional, Union
 
 from requests.exceptions import ConnectionError as RequestsConnectionError
-from transformers import AutoConfig
+from transformers import AutoConfig, PretrainedConfig
 
 from ...neuron.utils import (
+    DECODER_NAME,
     DIFFUSION_MODEL_TEXT_ENCODER_2_NAME,
     DIFFUSION_MODEL_TEXT_ENCODER_NAME,
     DIFFUSION_MODEL_UNET_NAME,
     DIFFUSION_MODEL_VAE_DECODER_NAME,
     DIFFUSION_MODEL_VAE_ENCODER_NAME,
+    ENCODER_NAME,
     NEURON_FILE_NAME,
     is_neuron_available,
     is_neuronx_available,
@@ -43,6 +45,7 @@
 from .model_configs import *  # noqa: F403
 from .utils import (
     build_stable_diffusion_components_mandatory_shapes,
+    get_encoder_decoder_models_for_export,
     get_stable_diffusion_models_for_export,
 )
 
@@ -63,8 +66,10 @@
 
 
 if TYPE_CHECKING:
+    from transformers import PreTrainedModel
+
     if is_diffusers_available():
-        from diffusers import StableDiffusionPipeline
+        from diffusers import DiffusionPipeline, StableDiffusionPipeline
 
 
 logger = logging.get_logger()
@@ -102,7 +107,11 @@ def infer_task(task: str, model_name_or_path: str) -> str:
 
 def normalize_input_shapes(task: str, args: argparse.Namespace) -> Dict[str, int]:
     config = AutoConfig.from_pretrained(args.model)
+
     model_type = config.model_type.replace("_", "-")
+    if config.is_encoder_decoder:
+        model_type = model_type + "-encoder"
+
     neuron_config_constructor = TasksManager.get_exporter_config_constructor(
         model_type=model_type, exporter="neuron", task=task
     )
@@ -172,6 +181,115 @@ def infer_stable_diffusion_shapes_from_diffusers(
     return input_shapes
 
 
+def _get_submodels_and_neuron_configs(
+    model: Union["PreTrainedModel", "DiffusionPipeline"],
+    input_shapes: Dict[str, int],
+    task: str,
+    output: Path,
+    dynamic_batch_size: bool = False,
+    model_name_or_path: Optional[Union[str, Path]] = None,
+):
+    is_stable_diffusion = "stable-diffusion" in task
+    is_encoder_decoder = (
+        getattr(model.config, "is_encoder_decoder", False) if isinstance(model.config, PretrainedConfig) else False
+    )
+
+    if is_stable_diffusion:
+        models_and_neuron_configs, output_model_names = _get_submodels_and_neuron_configs_for_stable_diffusion(
+            model, input_shapes, task, output, dynamic_batch_size
+        )
+    elif is_encoder_decoder:
+        models_and_neuron_configs, output_model_names = _get_submodels_and_neuron_configs_for_encoder_decoder(
+            model, input_shapes, task, output, dynamic_batch_size, model_name_or_path
+        )
+    else:
+        neuron_config_constructor = TasksManager.get_exporter_config_constructor(
+            model=model, exporter="neuron", task=task
+        )
+        neuron_config = neuron_config_constructor(model.config, dynamic_batch_size=dynamic_batch_size, **input_shapes)
+        model_name = model.name_or_path.split("/")[-1]
+        output_model_names = {model_name: "model.neuron"}
+        models_and_neuron_configs = {model_name: (model, neuron_config)}
+        maybe_save_preprocessors(model_name_or_path, output)
+    return models_and_neuron_configs, output_model_names
+
+
+def _get_submodels_and_neuron_configs_for_stable_diffusion(
+    model: Union["PreTrainedModel", "DiffusionPipeline"],
+    input_shapes: Dict[str, int],
+    task: str,
+    output: Path,
+    dynamic_batch_size: bool = False,
+):
+    check_compiler_compatibility_for_stable_diffusion()
+    if is_neuron_available():
+        raise RuntimeError(
+            "Stable diffusion export is not supported by neuron-cc on inf1, please use neuronx-cc on either inf2/trn1 instead."
+        )
+    input_shapes = infer_stable_diffusion_shapes_from_diffusers(input_shapes, model)
+
+    # Saving the model config and preprocessor as this is needed sometimes.
+    model.scheduler.save_pretrained(output.joinpath("scheduler"))
+    if hasattr(model, "tokenizer") and model.tokenizer is not None:
+        model.tokenizer.save_pretrained(output.joinpath("tokenizer"))
+    if hasattr(model, "tokenizer_2") and model.tokenizer_2 is not None:
+        model.tokenizer_2.save_pretrained(output.joinpath("tokenizer_2"))
+    if hasattr(model, "feature_extractor"):
+        model.feature_extractor.save_pretrained(output.joinpath("feature_extractor"))
+    model.save_config(output)
+
+    models_and_neuron_configs = get_stable_diffusion_models_for_export(
+        pipeline=model,
+        task=task,
+        dynamic_batch_size=dynamic_batch_size,
+        **input_shapes,
+    )
+    output_model_names = {
+        DIFFUSION_MODEL_UNET_NAME: os.path.join(DIFFUSION_MODEL_UNET_NAME, NEURON_FILE_NAME),
+        DIFFUSION_MODEL_VAE_ENCODER_NAME: os.path.join(DIFFUSION_MODEL_VAE_ENCODER_NAME, NEURON_FILE_NAME),
+        DIFFUSION_MODEL_VAE_DECODER_NAME: os.path.join(DIFFUSION_MODEL_VAE_DECODER_NAME, NEURON_FILE_NAME),
+    }
+    if hasattr(model, "text_encoder") and model.text_encoder is not None:
+        output_model_names[DIFFUSION_MODEL_TEXT_ENCODER_NAME] = os.path.join(
+            DIFFUSION_MODEL_TEXT_ENCODER_NAME, NEURON_FILE_NAME
+        )
+    if hasattr(model, "text_encoder_2") and model.text_encoder_2 is not None:
+        output_model_names[DIFFUSION_MODEL_TEXT_ENCODER_2_NAME] = os.path.join(
+            DIFFUSION_MODEL_TEXT_ENCODER_2_NAME, NEURON_FILE_NAME
+        )
+    del model
+
+    return models_and_neuron_configs, output_model_names
+
+
+def _get_submodels_and_neuron_configs_for_encoder_decoder(
+    model: "PreTrainedModel",
+    input_shapes: Dict[str, int],
+    task: str,
+    output: Path,
+    dynamic_batch_size: bool = False,
+    model_name_or_path: Optional[Union[str, Path]] = None,
+):
+    if is_neuron_available():
+        raise RuntimeError(
+            "Encoder-decoder models export is not supported by neuron-cc on inf1, please use neuronx-cc on either inf2/trn1 instead."
+        )
+
+    models_and_neuron_configs = get_encoder_decoder_models_for_export(
+        model=model,
+        task=task,
+        dynamic_batch_size=dynamic_batch_size,
+        input_shapes=input_shapes,
+    )
+    output_model_names = {
+        ENCODER_NAME: os.path.join(ENCODER_NAME, NEURON_FILE_NAME),
+        DECODER_NAME: os.path.join(DECODER_NAME, NEURON_FILE_NAME),
+    }
+    maybe_save_preprocessors(model_name_or_path, output)
+
+    return models_and_neuron_configs, output_model_names
+
+
 def main_export(
     model_name_or_path: str,
     output: Union[str, Path],
@@ -194,6 +312,7 @@ def main_export(
         output.parent.mkdir(parents=True)
 
     task = TasksManager.map_from_synonym(task)
+    is_stable_diffusion = "stable-diffusion" in task
 
     model_kwargs = {
         "task": task,
@@ -209,57 +328,14 @@ def main_export(
     }
     model = TasksManager.get_model_from_task(**model_kwargs)
 
-    is_stable_diffusion = "stable-diffusion" in task
-    if not is_stable_diffusion:
-        neuron_config_constructor = TasksManager.get_exporter_config_constructor(
-            model=model, exporter="neuron", task=task
-        )
-        neuron_config = neuron_config_constructor(model.config, dynamic_batch_size=dynamic_batch_size, **input_shapes)
-        if atol is None:
-            atol = neuron_config.ATOL_FOR_VALIDATION
-        model_name = model.name_or_path.split("/")[-1]
-        output_model_names = {model_name: "model.neuron"}
-        models_and_neuron_configs = {model_name: (model, neuron_config)}
-        maybe_save_preprocessors(model, output.parent)
-
-    if is_stable_diffusion:
-        check_compiler_compatibility_for_stable_diffusion()
-        if is_neuron_available():
-            raise RuntimeError(
-                "Stable diffusion export is not supported by neuron-cc on inf1, please use neuronx-cc on either inf2/trn1 instead."
-            )
-        input_shapes = infer_stable_diffusion_shapes_from_diffusers(input_shapes, model)
-
-        # Saving the model config and preprocessor as this is needed sometimes.
-        model.scheduler.save_pretrained(output.joinpath("scheduler"))
-        if hasattr(model, "tokenizer") and model.tokenizer is not None:
-            model.tokenizer.save_pretrained(output.joinpath("tokenizer"))
-        if hasattr(model, "tokenizer_2") and model.tokenizer_2 is not None:
-            model.tokenizer_2.save_pretrained(output.joinpath("tokenizer_2"))
-        if hasattr(model, "feature_extractor"):
-            model.feature_extractor.save_pretrained(output.joinpath("feature_extractor"))
-        model.save_config(output)
-
-        models_and_neuron_configs = get_stable_diffusion_models_for_export(
-            pipeline=model,
-            task=task,
-            dynamic_batch_size=dynamic_batch_size,
-            **input_shapes,
-        )
-        output_model_names = {
-            DIFFUSION_MODEL_UNET_NAME: os.path.join(DIFFUSION_MODEL_UNET_NAME, NEURON_FILE_NAME),
-            DIFFUSION_MODEL_VAE_ENCODER_NAME: os.path.join(DIFFUSION_MODEL_VAE_ENCODER_NAME, NEURON_FILE_NAME),
-            DIFFUSION_MODEL_VAE_DECODER_NAME: os.path.join(DIFFUSION_MODEL_VAE_DECODER_NAME, NEURON_FILE_NAME),
-        }
-        if hasattr(model, "text_encoder") and model.text_encoder is not None:
-            output_model_names[DIFFUSION_MODEL_TEXT_ENCODER_NAME] = os.path.join(
-                DIFFUSION_MODEL_TEXT_ENCODER_NAME, NEURON_FILE_NAME
-            )
-        if hasattr(model, "text_encoder_2") and model.text_encoder_2 is not None:
-            output_model_names[DIFFUSION_MODEL_TEXT_ENCODER_2_NAME] = os.path.join(
-                DIFFUSION_MODEL_TEXT_ENCODER_2_NAME, NEURON_FILE_NAME
-            )
-        del model
+    models_and_neuron_configs, output_model_names = _get_submodels_and_neuron_configs(
+        model=model,
+        input_shapes=input_shapes,
+        task=task,
+        output=output,
+        dynamic_batch_size=dynamic_batch_size,
+        model_name_or_path=model_name_or_path,
+    )
 
     _, neuron_outputs = export_models(
         models_and_neuron_configs=models_and_neuron_configs,

diff --git a/optimum/exporters/neuron/base.py b/optimum/exporters/neuron/base.py
@@ -119,6 +119,7 @@ def __init__(
         audio_sequence_length: Optional[int] = None,
         point_batch_size: Optional[int] = None,
         nb_points_per_image: Optional[int] = None,
+        num_beams: Optional[int] = None,
         # TODO: add custom dtype after optimum 1.13 release
         # int_dtype: str = "int64",
         # float_dtype: str = "fp32",
@@ -147,6 +148,7 @@ def __init__(
             "audio_sequence_length": audio_sequence_length,
             "point_batch_size": point_batch_size,
             "nb_points_per_image": nb_points_per_image,
+            "num_beams": num_beams,
         }
         input_shapes = {}
         for name, value in axes_values.items():
@@ -290,7 +292,7 @@ def flatten_inputs(cls, inputs: Dict[str, Any]) -> Dict[str, Any]:
                 flatten[name] = value
         return flatten
 
-    def check_model_inputs_order(
+    def patch_model_for_export(
         self,
         model: "PreTrainedModel",
         dummy_inputs: Optional[Dict[str, torch.Tensor]] = None,

diff --git a/optimum/exporters/neuron/config.py b/optimum/exporters/neuron/config.py
@@ -16,9 +16,13 @@
 Common Neuron configuration classes that handle most of the features for building model specific
 configurations.
 """
+from typing import Dict, List
 
 from ...utils import (
     DummyBboxInputGenerator,
+    DummyInputGenerator,
+    DummySeq2SeqDecoderTextInputGenerator,
+    DummySeq2SeqPastKeyValuesGenerator,
     DummyTextInputGenerator,
     DummyVisionInputGenerator,
     logging,
@@ -61,3 +65,85 @@ class TextNeuronDecoderConfig(NeuronDecoderConfig):
     """
 
     pass
+
+
+class TextSeq2SeqNeuronConfig(NeuronConfig):
+    """
+    Handles encoder-decoder-based text architectures.
+    """
+
+    DUMMY_INPUT_GENERATOR_CLASSES = (
+        DummyTextInputGenerator,
+        DummySeq2SeqDecoderTextInputGenerator,
+        DummySeq2SeqPastKeyValuesGenerator,
+    )
+
+    @property
+    def is_decoder(self) -> bool:
+        raise NotImplementedError()
+
+    @property
+    def inputs(self) -> Dict[str, Dict[int, str]]:
+        common_inputs = []
+        # encoder + decoder without past
+        if "encoder" in self.MODEL_TYPE:
+            common_inputs = ["input_ids", "attention_mask"]
+        # decoder with past
+        if "decoder" in self.MODEL_TYPE:
+            common_inputs = [
+                "decoder_input_ids",
+                "decoder_attention_mask",
+                "encoder_hidden_states",
+                "attention_mask",  # TODO: replace with `encoder_attention_mask` after optimum 1.14 release
+            ]
+
+        return common_inputs
+
+    @property
+    def outputs(self) -> Dict[str, Dict[int, str]]:
+        common_outputs = []
+        # encoder + decoder without past
+        if "encoder" in self.MODEL_TYPE:
+            common_outputs = (
+                [f"present.{idx}.self.key" for idx in range(self._config.num_decoder_layers)]
+                + [f"present.{idx}.self.value" for idx in range(self._config.num_decoder_layers)]
+                + [f"present.{idx}.cross.key" for idx in range(self._config.num_decoder_layers)]
+                + [f"present.{idx}.cross.value" for idx in range(self._config.num_decoder_layers)]
+            )
+        # decoder with past
+        if "decoder" in self.MODEL_TYPE:
+            beam_outputs = (
+                ["next_token_scores", "next_tokens", "next_indices"] if self.num_beams > 1 else ["next_tokens"]
+            )
+            # for i in range(self._config.num_decoder_layers):
+            common_outputs = (
+                beam_outputs
+                + [f"past.{idx}.self.key" for idx in range(self._config.num_decoder_layers)]
+                + [f"past.{idx}.self.value" for idx in range(self._config.num_decoder_layers)]
+                + [f"past.{idx}.cross.key" for idx in range(self._config.num_decoder_layers)]
+                + [f"past.{idx}.cross.value" for idx in range(self._config.num_decoder_layers)]
+            )
+        return common_outputs
+
+    def _create_dummy_input_generator_classes(self, **kwargs) -> List["DummyInputGenerator"]:
+        dummy_text_input_generator = self.DUMMY_INPUT_GENERATOR_CLASSES[0](
+            self.task, self._normalized_config, **kwargs
+        )
+        dummy_decoder_text_input_generator = self.DUMMY_INPUT_GENERATOR_CLASSES[1](
+            self.task,
+            self._normalized_config,
+            **kwargs,
+        )
+        dummy_seq2seq_past_key_values_generator = self.DUMMY_INPUT_GENERATOR_CLASSES[2](
+            self.task,
+            self._normalized_config,
+            encoder_sequence_length=dummy_text_input_generator.sequence_length,
+            **kwargs,
+        )
+        dummy_inputs_generators = [
+            dummy_text_input_generator,
+            dummy_decoder_text_input_generator,
+            dummy_seq2seq_past_key_values_generator,
+        ]
+
+        return dummy_inputs_generators