Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix prediction head loading for T5 #640

Merged
merged 1 commit into from
Jan 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions src/adapters/heads/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -955,14 +955,13 @@ def _load_pretrained_model(
**kwargs,
):
# Filter only weights not part of base model
loader = PredictionHeadLoader(model, error_on_missing=False, convert_to_flex_head=True)
filter_func = loader.filter_func(None)
if state_dict is not None:
head_state_dict = {
key: value for key, value in state_dict.items() if not key.startswith(cls.base_model_prefix)
}
head_state_dict = {key: value for key, value in state_dict.items() if filter_func(key)}
else:
head_state_dict = None
head_name = "default"
loader = PredictionHeadLoader(model, error_on_missing=False, convert_to_flex_head=True)
head_config, new_head_state_dict = loader.convert_static_to_flex_head(head_state_dict, load_as=head_name)

if head_config is not None:
Expand All @@ -973,6 +972,14 @@ def _load_pretrained_model(
model.add_prediction_head_from_config(head_name, head_config, overwrite_ok=True)

if new_head_state_dict is not None:
# Always ensure base_model_prefix is added, otherwise loading head weights does not work.
if len(model.base_model_prefix) > 0 and not any(
s.startswith(model.base_model_prefix) for s in loaded_keys
):
rename_func = lambda x: model.base_model_prefix + "." + x if x not in head_state_dict else x
state_dict = {rename_func(k): v for k, v in state_dict.items()}
loaded_keys = [rename_func(k) for k in loaded_keys]

for k in head_state_dict:
del state_dict[k]
loaded_keys.remove(k)
Expand Down
5 changes: 4 additions & 1 deletion src/adapters/loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -706,7 +706,7 @@ def __init__(self, model, error_on_missing=True, convert_to_flex_head=False):

def filter_func(self, head_name):
# ToDo remove this workaround
if self.model.__class__.__name__ in ["T5ForConditionalGeneration", "T5ForQuestionAnswering"]:
if self.model.config.model_type in ["t5", "mt5"]:
if head_name:
return (
lambda x: not x.startswith("encoder")
Expand Down Expand Up @@ -909,6 +909,9 @@ def convert_static_to_flex_head(self, state_dict, load_as="default"):
assert self.convert_to_flex_head, "load_from_state_dict() can only be used with convert_to_flex_head=True."
assert hasattr(self.model, "heads"), "load_from_state_dict() can only be used with flex heads model class."

if state_dict is None:
return None, None

conversion_rename_func = None

original_model_class = self.model.config.architectures[0] if self.model.config.architectures else None
Expand Down
Loading