adapter-hub · calpt · Jan 18, 2025 · Jan 7, 2025 · Jan 9, 2025 · Jan 9, 2025
diff --git a/src/adapters/methods/reft.py b/src/adapters/methods/reft.py
@@ -1,3 +1,4 @@
+import logging
 from typing import List, Optional
 
 import torch
@@ -9,6 +10,9 @@
 from .modeling import Activation_Function_Class
 
 
+logger = logging.getLogger(__name__)
+
+
 class ReftUnit(nn.Module):
     def __init__(
         self,
@@ -26,6 +30,13 @@ def __init__(
 
         projection = nn.Linear(in_dim, r_dim, bias=False, dtype=dtype)
         if orthogonal:
+            # orthogonal is not implemented for half precision
+            if dtype in [torch.float16, torch.bfloat16]:
+                logger.warning(
+                    "Orthogonal parametrization is not supported for half precision dtypes. Converting REFT projection layer to float32.",
+                    UserWarning,
+                )
+                projection = projection.to(dtype=torch.float32)
             self.projection = nn.utils.parametrizations.orthogonal(projection)
         else:
             self.projection = projection
@@ -93,19 +104,18 @@ def _gather_adapted_states(self, hidden_states: torch.Tensor):
                 )
             # create indexing matrices for prefixes & suffixes
             if self.prefix_positions > 0:
+                real_pref_len = min(self.prefix_positions, hidden_states.size(1))
                 pref_idx = first_non_padding.view(-1, 1, 1) + (
-                    torch.arange(self.prefix_positions)
-                    .unsqueeze(-1)
-                    .expand(bsz, self.prefix_positions, ddim)
-                    .to(hidden_states.device)
+                    torch.arange(real_pref_len).unsqueeze(-1).expand(bsz, real_pref_len, ddim).to(hidden_states.device)
                 )
                 # Cache for next layer
                 context.pref_idx = pref_idx
             if self.suffix_positions > 0:
+                real_suff_len = min(self.suffix_positions, hidden_states.size(1))
                 suff_idx = last_non_padding.view(-1, 1, 1) + (
-                    torch.arange(-self.suffix_positions, 0)
+                    torch.arange(-real_suff_len, 0)
                     .unsqueeze(-1)
-                    .expand(bsz, self.suffix_positions, ddim)
+                    .expand(bsz, real_suff_len, ddim)
                     .to(hidden_states.device)
                 )
                 context.suff_idx = suff_idx
@@ -131,7 +141,7 @@ def _scatter_adapted_states(self, hidden_states: torch.Tensor, adapted_states: L
         context = ForwardContext.get_context()
 
         # merge prefix, suffix and adapted states
-        adapted_output = torch.cat(adapted_states, dim=1)
+        adapted_output = torch.cat(adapted_states, dim=1).to(hidden_states.dtype)
 
         if self.prefix_positions > 0:
             hidden_states = torch.scatter(

diff --git a/tests/methods/base.py b/tests/methods/base.py
@@ -370,3 +370,30 @@ def run_reset_test(self, adapter_config):
         # check forward pass
         self.assertEqual(len(output_1), len(output_2))
         self.assertTrue(torch.allclose(output_1[0], output_2[0], atol=1e-3))
+
+    def run_generate_test(self, adapter_config):
+        if self.config_class not in ADAPTER_MODEL_MAPPING or (
+            "seq2seq_lm" not in ADAPTER_MODEL_MAPPING[self.config_class].head_types
+            and "causal_lm" not in ADAPTER_MODEL_MAPPING[self.config_class].head_types
+        ):
+            self.skipTest("No seq2seq or causal language model head")
+
+        model1 = AutoAdapterModel.from_config(self.config())
+        model1.add_adapter("dummy", config=adapter_config)
+        if "seq2seq_lm" in ADAPTER_MODEL_MAPPING[self.config_class].head_types:
+            model1.add_seq2seq_lm_head("dummy")
+        else:
+            model1.add_causal_lm_head("dummy")
+        model1.set_active_adapters("dummy")
+        model1.to(torch_device)
+
+        seq_output_length = 32
+
+        # Finally, also check if generation works properly
+        if self.is_speech_model:
+            input_ids = self.get_input_samples((1, 80, 3000), config=model1.config)["input_features"]
+        else:
+            input_ids = self.get_input_samples((1, 4), config=model1.config)["input_ids"]
+        input_ids = input_ids.to(torch_device)
+        generated = model1.generate(input_ids, max_length=seq_output_length)
+        self.assertLessEqual(generated.shape, (1, seq_output_length))
diff --git a/tests/methods/test_compacter.py b/tests/methods/test_compacter.py
@@ -1,5 +1,5 @@
-from adapters import ADAPTER_MODEL_MAPPING, AutoAdapterModel, CompacterPlusPlusConfig
-from transformers.testing_utils import require_torch, torch_device
+from adapters import CompacterPlusPlusConfig
+from transformers.testing_utils import require_torch
 
 from .base import AdapterMethodBaseTestMixin
 
@@ -53,28 +53,4 @@ def test_train_shared_phm_compacter(self):
         self.run_train_test(adapter_config, ["adapters.{name}."])
 
     def test_compacter_generate(self):
-        if self.config_class not in ADAPTER_MODEL_MAPPING or (
-            "seq2seq_lm" not in ADAPTER_MODEL_MAPPING[self.config_class].head_types
-            and "causal_lm" not in ADAPTER_MODEL_MAPPING[self.config_class].head_types
-        ):
-            self.skipTest("No seq2seq or causal language model head")
-
-        model1 = AutoAdapterModel.from_config(self.config())
-        model1.add_adapter("dummy", config=CompacterPlusPlusConfig(phm_dim=2, reduction_factor=8))
-        if "seq2seq_lm" in ADAPTER_MODEL_MAPPING[self.config_class].head_types:
-            model1.add_seq2seq_lm_head("dummy")
-        else:
-            model1.add_causal_lm_head("dummy")
-        model1.set_active_adapters("dummy")
-        model1.to(torch_device)
-
-        seq_output_length = 32
-
-        # Finally, also check if generation works properly
-        if self.is_speech_model:
-            input_ids = self.get_input_samples((1, 80, 3000), config=model1.config)["input_features"]
-        else:
-            input_ids = self.get_input_samples((1, 4), config=model1.config)["input_ids"]
-        input_ids = input_ids.to(torch_device)
-        generated = model1.generate(input_ids, max_length=seq_output_length)
-        self.assertLessEqual(generated.shape, (1, seq_output_length))
+        self.run_generate_test(CompacterPlusPlusConfig(phm_dim=2, reduction_factor=8))
diff --git a/tests/methods/test_prefix_tuning.py b/tests/methods/test_prefix_tuning.py
@@ -1,6 +1,6 @@
 import torch
 
-from adapters import ADAPTER_MODEL_MAPPING, AutoAdapterModel, PrefixTuningConfig
+from adapters import PrefixTuningConfig
 from transformers import CLIPConfig
 from transformers.testing_utils import require_torch, torch_device
 
@@ -76,28 +76,4 @@ def test_eject_prefix(self):
         self.assertTrue(torch.allclose(output_1[0], output_2[0], atol=1e-4))
 
     def test_prefix_tuning_generate(self):
-        if self.config_class not in ADAPTER_MODEL_MAPPING or (
-            "seq2seq_lm" not in ADAPTER_MODEL_MAPPING[self.config_class].head_types
-            and "causal_lm" not in ADAPTER_MODEL_MAPPING[self.config_class].head_types
-        ):
-            self.skipTest("No seq2seq or causal language model head")
-
-        model1 = AutoAdapterModel.from_config(self.config())
-        model1.add_adapter("dummy", config="prefix_tuning")
-        if "seq2seq_lm" in ADAPTER_MODEL_MAPPING[self.config_class].head_types:
-            model1.add_seq2seq_lm_head("dummy")
-        else:
-            model1.add_causal_lm_head("dummy")
-        model1.set_active_adapters("dummy")
-        model1.to(torch_device)
-
-        seq_output_length = 32
-
-        # Finally, also check if generation works properly
-        if self.is_speech_model:
-            input_ids = self.get_input_samples((1, 80, 3000), config=model1.config)["input_features"]
-        else:
-            input_ids = self.get_input_samples((1, 4), config=model1.config)["input_ids"]
-        input_ids = input_ids.to(torch_device)
-        generated = model1.generate(input_ids, max_length=seq_output_length)
-        self.assertLessEqual(generated.shape, (1, seq_output_length))
+        self.run_generate_test(PrefixTuningConfig())
diff --git a/tests/methods/test_reft.py b/tests/methods/test_reft.py
@@ -77,3 +77,6 @@ def test_load_full_model_reft(self):
 
     def test_train_loreft(self):
         self.run_train_test(LoReftConfig(), ["refts.{name}."])
+
+    def test_reft_generate(self):
+        self.run_generate_test(LoReftConfig())