Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bigbird fusion #425

Open
wants to merge 10 commits into
base: legacy
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions src/transformers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@
"models.bert_generation": ["BertGenerationConfig"],
"models.bert_japanese": ["BertJapaneseTokenizer", "CharacterTokenizer", "MecabTokenizer"],
"models.bertweet": ["BertweetTokenizer"],
"models.big_bird": ["BIG_BIRD_PRETRAINED_CONFIG_ARCHIVE_MAP", "BigBirdConfig"],
"models.big_bird": ["BIG_BIRD_PRETRAINED_CONFIG_ARCHIVE_MAP", "BigBirdConfig","BigBirdTokenizer"],
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
"models.big_bird": ["BIG_BIRD_PRETRAINED_CONFIG_ARCHIVE_MAP", "BigBirdConfig","BigBirdTokenizer"],
"models.big_bird": ["BIG_BIRD_PRETRAINED_CONFIG_ARCHIVE_MAP", "BigBirdConfig"],

"models.bigbird_pegasus": [
"BIGBIRD_PEGASUS_PRETRAINED_CONFIG_ARCHIVE_MAP",
"BigBirdPegasusConfig",
Expand Down Expand Up @@ -2023,6 +2023,7 @@
"AdapterType",
"AutoAdapterModel",
"AutoModelWithHeads",
"BigBirdModelWithHeads",
"BartAdapterModel",
"BartModelWithHeads",
"BertAdapterModel",
Expand Down Expand Up @@ -2985,7 +2986,7 @@
from .models.bert_generation import BertGenerationConfig
from .models.bert_japanese import BertJapaneseTokenizer, CharacterTokenizer, MecabTokenizer
from .models.bertweet import BertweetTokenizer
from .models.big_bird import BIG_BIRD_PRETRAINED_CONFIG_ARCHIVE_MAP, BigBirdConfig
from .models.big_bird import BIG_BIRD_PRETRAINED_CONFIG_ARCHIVE_MAP, BigBirdConfig,BigBirdTokenizer
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
from .models.big_bird import BIG_BIRD_PRETRAINED_CONFIG_ARCHIVE_MAP, BigBirdConfig,BigBirdTokenizer
from .models.big_bird import BIG_BIRD_PRETRAINED_CONFIG_ARCHIVE_MAP, BigBirdConfig

from .models.bigbird_pegasus import BIGBIRD_PEGASUS_PRETRAINED_CONFIG_ARCHIVE_MAP, BigBirdPegasusConfig
from .models.blenderbot import BLENDERBOT_PRETRAINED_CONFIG_ARCHIVE_MAP, BlenderbotConfig, BlenderbotTokenizer
from .models.blenderbot_small import (
Expand Down Expand Up @@ -4562,6 +4563,7 @@
AdapterType,
AutoAdapterModel,
AutoModelWithHeads,
BigBirdModelWithHeads,
BartAdapterModel,
BartModelWithHeads,
BertAdapterModel,
Expand Down
240 changes: 240 additions & 0 deletions src/transformers/adapters/__init__ copy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,240 @@
# flake8: noqa
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please remove this file

# There's no way to ignore "F401 '...' imported but unused" warnings in this
# module, but to preserve other warnings. So, don't check this module at all.

# Copyright 2020 The Adapter-Hub Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

__version__ = "3.1.0a0"

from typing import TYPE_CHECKING

from ..utils import _LazyModule


_import_structure = {
"composition": [
"AdapterCompositionBlock",
"BatchSplit",
"Fuse",
"Parallel",
"Split",
"Stack",
"parse_composition",
"validate_composition",
],
"configuration": [
"ADAPTER_CONFIG_MAP",
"ADAPTERFUSION_CONFIG_MAP",
"DEFAULT_ADAPTER_CONFIG",
"DEFAULT_ADAPTERFUSION_CONFIG",
"AdapterConfig",
"AdapterConfigBase",
"AdapterFusionConfig",
"CompacterConfig",
"CompacterPlusPlusConfig",
"ConfigUnion",
"DynamicAdapterFusionConfig",
"HoulsbyConfig",
"HoulsbyInvConfig",
"LoRAConfig",
"MAMConfig",
"ModelAdaptersConfig",
"ParallelConfig",
"PfeifferConfig",
"PfeifferInvConfig",
"PrefixTuningConfig",
"StaticAdapterFusionConfig",
],
"context": [
"AdapterSetup",
"ForwardContext",
],
"heads": [
"BertStyleMaskedLMHead",
"BiaffineParsingHead",
"CausalLMHead",
"ClassificationHead",
"DependencyParsingOutput",
"ModelWithFlexibleHeadsAdaptersMixin",
"MultiHeadOutput",
"MultiLabelClassificationHead",
"MultipleChoiceHead",
"PredictionHead",
"QuestionAnsweringHead",
"Seq2SeqLMHead",
"TaggingHead",
],
"layer": ["AdapterLayer", "AdapterLayerBase"],
"model_mixin": [
"EmbeddingAdaptersMixin",
"InvertibleAdaptersMixin",
"ModelAdaptersMixin",
"ModelWithHeadsAdaptersMixin",
],
"models.auto": [
"ADAPTER_MODEL_MAPPING",
"MODEL_WITH_HEADS_MAPPING",
"AutoAdapterModel",
"AutoModelWithHeads",
],
"models.bigbird":[
"BigBirdAdapterModel",
"BigBirdModelWithHeads",
],
"models.bart": [
"BartAdapterModel",
"BartModelWithHeads",
],
"models.bert": [
"BertAdapterModel",
"BertModelWithHeads",
],
"models.deberta": ["DebertaAdapterModel"],
"models.debertaV2": ["DebertaV2AdapterModel"],
"models.distilbert": [
"DistilBertAdapterModel",
"DistilBertModelWithHeads",
],
"models.gpt2": [
"GPT2AdapterModel",
"GPT2ModelWithHeads",
],
"models.mbart": [
"MBartAdapterModel",
"MBartModelWithHeads",
],
"models.roberta": [
"RobertaAdapterModel",
"RobertaModelWithHeads",
],
"models.big_bird": [
"BigBirdAdapterModel",
"BigBirdModelWithHeads",
],
"models.t5": [
"T5AdapterModel",
"T5ModelWithHeads",
],
"models.vit": ["ViTAdapterModel"],
"models.xlm_roberta": [
"XLMRobertaAdapterModel",
"XLMRobertaModelWithHeads",
],
"trainer": ["AdapterTrainer", "Seq2SeqAdapterTrainer"],
"training": [
"AdapterArguments",
"MultiLingAdapterArguments",
],
"utils": [
"ADAPTER_CACHE",
"AdapterInfo",
"AdapterType",
"get_adapter_config_hash",
"get_adapter_info",
"list_adapters",
],
}


if TYPE_CHECKING:
from .composition import (
AdapterCompositionBlock,
BatchSplit,
Fuse,
Parallel,
Split,
Stack,
parse_composition,
validate_composition,
)
from .configuration import (
ADAPTER_CONFIG_MAP,
ADAPTERFUSION_CONFIG_MAP,
DEFAULT_ADAPTER_CONFIG,
DEFAULT_ADAPTERFUSION_CONFIG,
AdapterConfig,
AdapterConfigBase,
AdapterFusionConfig,
CompacterConfig,
CompacterPlusPlusConfig,
ConfigUnion,
DynamicAdapterFusionConfig,
HoulsbyConfig,
HoulsbyInvConfig,
LoRAConfig,
MAMConfig,
ModelAdaptersConfig,
ParallelConfig,
PfeifferConfig,
PfeifferInvConfig,
PrefixTuningConfig,
StaticAdapterFusionConfig,
)
from .context import AdapterSetup, ForwardContext
from .heads import (
BertStyleMaskedLMHead,
BiaffineParsingHead,
CausalLMHead,
ClassificationHead,
DependencyParsingOutput,
ModelWithFlexibleHeadsAdaptersMixin,
MultiHeadOutput,
MultiLabelClassificationHead,
MultipleChoiceHead,
PredictionHead,
QuestionAnsweringHead,
Seq2SeqLMHead,
TaggingHead,
)
from .layer import AdapterLayer, AdapterLayerBase
from .model_mixin import (
EmbeddingAdaptersMixin,
InvertibleAdaptersMixin,
ModelAdaptersMixin,
ModelWithHeadsAdaptersMixin,
)
from .models.auto import ADAPTER_MODEL_MAPPING, MODEL_WITH_HEADS_MAPPING, AutoAdapterModel, AutoModelWithHeads
from .models.bart import BartAdapterModel, BartModelWithHeads
from .models.bert import BertAdapterModel, BertModelWithHeads
from .models.deberta import DebertaAdapterModel
from .models.debertaV2 import DebertaV2AdapterModel
from .models.distilbert import DistilBertAdapterModel, DistilBertModelWithHeads
from .models.gpt2 import GPT2AdapterModel, GPT2ModelWithHeads
from .models.mbart import MBartAdapterModel, MBartModelWithHeads
from .models.roberta import RobertaAdapterModel, RobertaModelWithHeads
from .models.big_bird import BigBirdAdapterModel, BigBirdModelWithHeads
from .models.t5 import T5AdapterModel, T5ModelWithHeads
from .models.vit import ViTAdapterModel
from .models.xlm_roberta import XLMRobertaAdapterModel, XLMRobertaModelWithHeads
from .trainer import AdapterTrainer, Seq2SeqAdapterTrainer
from .training import AdapterArguments, MultiLingAdapterArguments
from .utils import (
ADAPTER_CACHE,
AdapterInfo,
AdapterType,
get_adapter_config_hash,
get_adapter_info,
list_adapters,
)

else:
import sys

sys.modules[__name__] = _LazyModule(
__name__,
globals()["__file__"],
_import_structure,
extra_objects={"__version__": __version__},
)
5 changes: 5 additions & 0 deletions src/transformers/adapters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,10 @@
"models.mbart": [
"MBartAdapterModel",
"MBartModelWithHeads",
],
"models.big_bird":[
"BigBirdAdapterModel",
"BigBirdModelWithHeads",
],
"models.roberta": [
"RobertaAdapterModel",
Expand Down Expand Up @@ -209,6 +213,7 @@
from .models.distilbert import DistilBertAdapterModel, DistilBertModelWithHeads
from .models.gpt2 import GPT2AdapterModel, GPT2ModelWithHeads
from .models.mbart import MBartAdapterModel, MBartModelWithHeads
from .models.big_bird import BigBirdAdapterModel, BigBirdModelWithHeads
from .models.roberta import RobertaAdapterModel, RobertaModelWithHeads
from .models.t5 import T5AdapterModel, T5ModelWithHeads
from .models.vit import ViTAdapterModel
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/adapters/heads/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -530,7 +530,7 @@ def tie_weights(self):
if hasattr(self, self.base_model_prefix):
self = getattr(self, self.base_model_prefix)
self._tie_encoder_decoder_weights(self.encoder, self.decoder, self.base_model_prefix)

return self.get_input_embeddings()

def _resize_token_embeddings(self, new_num_tokens):
Expand Down
10 changes: 10 additions & 0 deletions src/transformers/adapters/model_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -986,9 +986,19 @@ def add_adapter(self, adapter_name: str, config=None, overwrite_ok: bool = False
If self.base_model is self, must inherit from a class that implements this method, to preclude infinite
recursion
"""

# print("#=================================================================================================")
# # print(self) #Robertamodelwithheads #Big BirdModel with heads
# print("#====================================================================================================")
# print(self.base_model) #Robertamodel #BigBirdmodel
# print("#======================================================================================================")

Comment on lines +989 to +995
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please remove

if self.base_model is self:
super().add_adapter(adapter_name, config, overwrite_ok=overwrite_ok, set_active=set_active)
else:
# print(config)
# print(adapter_name)
# print("#===========================================================================================")
self.base_model.add_adapter(adapter_name, config, overwrite_ok=overwrite_ok, set_active=set_active)

def train_adapter(self, adapter_setup: Union[list, AdapterCompositionBlock], train_embeddings=False):
Expand Down
3 changes: 3 additions & 0 deletions src/transformers/adapters/models/auto/adapter_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
[
("xlm-roberta", "XLMRobertaAdapterModel"),
("roberta", "RobertaAdapterModel"),
("big_bird", "BigBirdAdapterModel"),
("bert", "BertAdapterModel"),
("distilbert", "DistilBertAdapterModel"),
("deberta-v2", "DebertaV2AdapterModel"),
Expand All @@ -21,10 +22,12 @@
("vit", "ViTAdapterModel"),
]
)

MODEL_WITH_HEADS_MAPPING_NAMES = OrderedDict(
[
("xlm-roberta", "XLMRobertaModelWithHeads"),
("roberta", "RobertaModelWithHeads"),
("big_bird", "BigBirdModelWithHeads"),
("bert", "BertModelWithHeads"),
("distilbert", "DistilBertModelWithHeads"),
("bart", "BartModelWithHeads"),
Expand Down
44 changes: 44 additions & 0 deletions src/transformers/adapters/models/auto_/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# flake8: noqa
# There's no way to ignore "F401 '...' imported but unused" warnings in this
# module, but to preserve other warnings. So, don't check this module at all.

# Copyright 2020 The Adapter-Hub Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import TYPE_CHECKING

from ....utils import _LazyModule


_import_structure = {
"adapter_model": [
"ADAPTER_MODEL_MAPPING",
"MODEL_WITH_HEADS_MAPPING",
"AutoAdapterModel",
"AutoModelWithHeads",
],
}


if TYPE_CHECKING:
from .adapter_model import ADAPTER_MODEL_MAPPING, MODEL_WITH_HEADS_MAPPING, AutoAdapterModel, AutoModelWithHeads

else:
import sys

sys.modules[__name__] = _LazyModule(
__name__,
globals()["__file__"],
_import_structure,
)
Loading