| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159 |
- # coding=utf-8
- # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
- # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- """Configuration base class and utilities."""
- import copy
- import json
- import os
- import re
- import warnings
- from typing import Any, Dict, List, Optional, Tuple, Union
- from packaging import version
- from . import __version__
- from .dynamic_module_utils import custom_object_save
- from .modeling_gguf_pytorch_utils import load_gguf_checkpoint
- from .utils import (
- CONFIG_NAME,
- PushToHubMixin,
- add_model_info_to_auto_map,
- add_model_info_to_custom_pipelines,
- cached_file,
- copy_func,
- download_url,
- extract_commit_hash,
- is_remote_url,
- is_torch_available,
- logging,
- )
- logger = logging.get_logger(__name__)
- _re_configuration_file = re.compile(r"config\.(.*)\.json")
- class PretrainedConfig(PushToHubMixin):
- # no-format
- r"""
- Base class for all configuration classes. Handles a few parameters common to all models' configurations as well as
- methods for loading/downloading/saving configurations.
- <Tip>
- A configuration file can be loaded and saved to disk. Loading the configuration file and using this file to
- initialize a model does **not** load the model weights. It only affects the model's configuration.
- </Tip>
- Class attributes (overridden by derived classes):
- - **model_type** (`str`) -- An identifier for the model type, serialized into the JSON file, and used to recreate
- the correct object in [`~transformers.AutoConfig`].
- - **is_composition** (`bool`) -- Whether the config class is composed of multiple sub-configs. In this case the
- config has to be initialized from two or more configs of type [`~transformers.PretrainedConfig`] like:
- [`~transformers.EncoderDecoderConfig`] or [`~RagConfig`].
- - **keys_to_ignore_at_inference** (`List[str]`) -- A list of keys to ignore by default when looking at dictionary
- outputs of the model during inference.
- - **attribute_map** (`Dict[str, str]`) -- A dict that maps model specific attribute names to the standardized
- naming of attributes.
- Common attributes (present in all subclasses):
- - **vocab_size** (`int`) -- The number of tokens in the vocabulary, which is also the first dimension of the
- embeddings matrix (this attribute may be missing for models that don't have a text modality like ViT).
- - **hidden_size** (`int`) -- The hidden size of the model.
- - **num_attention_heads** (`int`) -- The number of attention heads used in the multi-head attention layers of the
- model.
- - **num_hidden_layers** (`int`) -- The number of blocks in the model.
- <Tip warning={true}>
- Setting parameters for sequence generation in the model config is deprecated. For backward compatibility, loading
- some of them will still be possible, but attempting to overwrite them will throw an exception -- you should set
- them in a [~transformers.GenerationConfig]. Check the documentation of [~transformers.GenerationConfig] for more
- information about the individual parameters.
- </Tip>
- Arg:
- name_or_path (`str`, *optional*, defaults to `""`):
- Store the string that was passed to [`PreTrainedModel.from_pretrained`] or
- [`TFPreTrainedModel.from_pretrained`] as `pretrained_model_name_or_path` if the configuration was created
- with such a method.
- output_hidden_states (`bool`, *optional*, defaults to `False`):
- Whether or not the model should return all hidden-states.
- output_attentions (`bool`, *optional*, defaults to `False`):
- Whether or not the model should returns all attentions.
- return_dict (`bool`, *optional*, defaults to `True`):
- Whether or not the model should return a [`~transformers.utils.ModelOutput`] instead of a plain tuple.
- is_encoder_decoder (`bool`, *optional*, defaults to `False`):
- Whether the model is used as an encoder/decoder or not.
- is_decoder (`bool`, *optional*, defaults to `False`):
- Whether the model is used as decoder or not (in which case it's used as an encoder).
- cross_attention_hidden_size** (`bool`, *optional*):
- The hidden size of the cross-attention layer in case the model is used as a decoder in an encoder-decoder
- setting and the cross-attention hidden dimension differs from `self.config.hidden_size`.
- add_cross_attention (`bool`, *optional*, defaults to `False`):
- Whether cross-attention layers should be added to the model. Note, this option is only relevant for models
- that can be used as decoder models within the [`EncoderDecoderModel`] class, which consists of all models
- in `AUTO_MODELS_FOR_CAUSAL_LM`.
- tie_encoder_decoder (`bool`, *optional*, defaults to `False`):
- Whether all encoder weights should be tied to their equivalent decoder weights. This requires the encoder
- and decoder model to have the exact same parameter names.
- prune_heads (`Dict[int, List[int]]`, *optional*, defaults to `{}`):
- Pruned heads of the model. The keys are the selected layer indices and the associated values, the list of
- heads to prune in said layer.
- For instance `{1: [0, 2], 2: [2, 3]}` will prune heads 0 and 2 on layer 1 and heads 2 and 3 on layer 2.
- chunk_size_feed_forward (`int`, *optional*, defaults to `0`):
- The chunk size of all feed forward layers in the residual attention blocks. A chunk size of `0` means that
- the feed forward layer is not chunked. A chunk size of n means that the feed forward layer processes `n` <
- sequence_length embeddings at a time. For more information on feed forward chunking, see [How does Feed
- Forward Chunking work?](../glossary.html#feed-forward-chunking).
- > Parameters for fine-tuning tasks
- architectures (`List[str]`, *optional*):
- Model architectures that can be used with the model pretrained weights.
- finetuning_task (`str`, *optional*):
- Name of the task used to fine-tune the model. This can be used when converting from an original (TensorFlow
- or PyTorch) checkpoint.
- id2label (`Dict[int, str]`, *optional*):
- A map from index (for instance prediction index, or target index) to label.
- label2id (`Dict[str, int]`, *optional*): A map from label to index for the model.
- num_labels (`int`, *optional*):
- Number of labels to use in the last layer added to the model, typically for a classification task.
- task_specific_params (`Dict[str, Any]`, *optional*):
- Additional keyword arguments to store for the current task.
- problem_type (`str`, *optional*):
- Problem type for `XxxForSequenceClassification` models. Can be one of `"regression"`,
- `"single_label_classification"` or `"multi_label_classification"`.
- > Parameters linked to the tokenizer
- tokenizer_class (`str`, *optional*):
- The name of the associated tokenizer class to use (if none is set, will use the tokenizer associated to the
- model by default).
- prefix (`str`, *optional*):
- A specific prompt that should be added at the beginning of each text before calling the model.
- bos_token_id (`int`, *optional*): The id of the _beginning-of-stream_ token.
- pad_token_id (`int`, *optional*): The id of the _padding_ token.
- eos_token_id (`int`, *optional*): The id of the _end-of-stream_ token.
- decoder_start_token_id (`int`, *optional*):
- If an encoder-decoder model starts decoding with a different token than _bos_, the id of that token.
- sep_token_id (`int`, *optional*): The id of the _separation_ token.
- > PyTorch specific parameters
- torchscript (`bool`, *optional*, defaults to `False`):
- Whether or not the model should be used with Torchscript.
- tie_word_embeddings (`bool`, *optional*, defaults to `True`):
- Whether the model's input and output word embeddings should be tied. Note that this is only relevant if the
- model has a output word embedding layer.
- torch_dtype (`str`, *optional*):
- The `dtype` of the weights. This attribute can be used to initialize the model to a non-default `dtype`
- (which is normally `float32`) and thus allow for optimal storage allocation. For example, if the saved
- model is `float16`, ideally we want to load it back using the minimal amount of memory needed to load
- `float16` weights. Since the config object is stored in plain text, this attribute contains just the
- floating type string without the `torch.` prefix. For example, for `torch.float16` ``torch_dtype` is the
- `"float16"` string.
- This attribute is currently not being used during model loading time, but this may change in the future
- versions. But we can already start preparing for the future by saving the dtype with save_pretrained.
- > TensorFlow specific parameters
- use_bfloat16 (`bool`, *optional*, defaults to `False`):
- Whether or not the model should use BFloat16 scalars (only used by some TensorFlow models).
- tf_legacy_loss (`bool`, *optional*, defaults to `False`):
- Whether the model should use legacy TensorFlow losses. Legacy losses have variable output shapes and may
- not be XLA-compatible. This option is here for backward compatibility and will be removed in Transformers
- v5.
- loss_type (`str`, *optional*):
- The type of loss that the model should use. It should be in `LOSS_MAPPING`'s keys, otherwise the loss will
- be automatically infered from the model architecture.
- """
- model_type: str = ""
- is_composition: bool = False
- attribute_map: Dict[str, str] = {}
- _auto_class: Optional[str] = None
- def __setattr__(self, key, value):
- if key in super().__getattribute__("attribute_map"):
- key = super().__getattribute__("attribute_map")[key]
- super().__setattr__(key, value)
- def __getattribute__(self, key):
- if key != "attribute_map" and key in super().__getattribute__("attribute_map"):
- key = super().__getattribute__("attribute_map")[key]
- return super().__getattribute__(key)
- def __init__(self, **kwargs):
- # Attributes with defaults
- self.return_dict = kwargs.pop("return_dict", True)
- self.output_hidden_states = kwargs.pop("output_hidden_states", False)
- self.output_attentions = kwargs.pop("output_attentions", False)
- self.torchscript = kwargs.pop("torchscript", False) # Only used by PyTorch models
- self.torch_dtype = kwargs.pop("torch_dtype", None) # Only used by PyTorch models
- self.use_bfloat16 = kwargs.pop("use_bfloat16", False)
- self.tf_legacy_loss = kwargs.pop("tf_legacy_loss", False) # Only used by TensorFlow models
- self.pruned_heads = kwargs.pop("pruned_heads", {})
- self.tie_word_embeddings = kwargs.pop(
- "tie_word_embeddings", True
- ) # Whether input and output word embeddings should be tied for all MLM, LM and Seq2Seq models.
- self.chunk_size_feed_forward = kwargs.pop("chunk_size_feed_forward", 0)
- # Is decoder is used in encoder-decoder models to differentiate encoder from decoder
- self.is_encoder_decoder = kwargs.pop("is_encoder_decoder", False)
- self.is_decoder = kwargs.pop("is_decoder", False)
- self.cross_attention_hidden_size = kwargs.pop("cross_attention_hidden_size", None)
- self.add_cross_attention = kwargs.pop("add_cross_attention", False)
- self.tie_encoder_decoder = kwargs.pop("tie_encoder_decoder", False)
- # Retrocompatibility: Parameters for sequence generation. While we will keep the ability to load these
- # parameters, saving them will be deprecated. In a distant future, we won't need to load them.
- for parameter_name, default_value in self._get_global_generation_defaults().items():
- setattr(self, parameter_name, kwargs.pop(parameter_name, default_value))
- # Fine-tuning task arguments
- self.architectures = kwargs.pop("architectures", None)
- self.finetuning_task = kwargs.pop("finetuning_task", None)
- self.id2label = kwargs.pop("id2label", None)
- self.label2id = kwargs.pop("label2id", None)
- if self.label2id is not None and not isinstance(self.label2id, dict):
- raise ValueError("Argument label2id should be a dictionary.")
- if self.id2label is not None:
- if not isinstance(self.id2label, dict):
- raise ValueError("Argument id2label should be a dictionary.")
- num_labels = kwargs.pop("num_labels", None)
- if num_labels is not None and len(self.id2label) != num_labels:
- logger.warning(
- f"You passed along `num_labels={num_labels}` with an incompatible id to label map: "
- f"{self.id2label}. The number of labels wil be overwritten to {self.num_labels}."
- )
- self.id2label = {int(key): value for key, value in self.id2label.items()}
- # Keys are always strings in JSON so convert ids to int here.
- else:
- self.num_labels = kwargs.pop("num_labels", 2)
- if self.torch_dtype is not None and isinstance(self.torch_dtype, str):
- # we will start using self.torch_dtype in v5, but to be consistent with
- # from_pretrained's torch_dtype arg convert it to an actual torch.dtype object
- if is_torch_available():
- import torch
- self.torch_dtype = getattr(torch, self.torch_dtype)
- # Tokenizer arguments TODO: eventually tokenizer and models should share the same config
- self.tokenizer_class = kwargs.pop("tokenizer_class", None)
- self.prefix = kwargs.pop("prefix", None)
- self.bos_token_id = kwargs.pop("bos_token_id", None)
- self.pad_token_id = kwargs.pop("pad_token_id", None)
- self.eos_token_id = kwargs.pop("eos_token_id", None)
- self.sep_token_id = kwargs.pop("sep_token_id", None)
- self.decoder_start_token_id = kwargs.pop("decoder_start_token_id", None)
- # task specific arguments
- self.task_specific_params = kwargs.pop("task_specific_params", None)
- # regression / multi-label classification
- self.problem_type = kwargs.pop("problem_type", None)
- allowed_problem_types = ("regression", "single_label_classification", "multi_label_classification")
- if self.problem_type is not None and self.problem_type not in allowed_problem_types:
- raise ValueError(
- f"The config parameter `problem_type` was not understood: received {self.problem_type} "
- "but only 'regression', 'single_label_classification' and 'multi_label_classification' are valid."
- )
- # TPU arguments
- if kwargs.pop("xla_device", None) is not None:
- logger.warning(
- "The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can "
- "safely remove it from your `config.json` file."
- )
- # Name or path to the pretrained checkpoint
- self._name_or_path = str(kwargs.pop("name_or_path", ""))
- # Config hash
- self._commit_hash = kwargs.pop("_commit_hash", None)
- # Attention implementation to use, if relevant.
- self._attn_implementation_internal = kwargs.pop("attn_implementation", None)
- self._attn_implementation_autoset = False
- # Drop the transformers version info
- self.transformers_version = kwargs.pop("transformers_version", None)
- # Deal with gradient checkpointing
- if kwargs.get("gradient_checkpointing", False):
- warnings.warn(
- "Passing `gradient_checkpointing` to a config initialization is deprecated and will be removed in v5 "
- "Transformers. Using `model.gradient_checkpointing_enable()` instead, or if you are using the "
- "`Trainer` API, pass `gradient_checkpointing=True` in your `TrainingArguments`."
- )
- # Additional attributes without default values
- for key, value in kwargs.items():
- try:
- setattr(self, key, value)
- except AttributeError as err:
- logger.error(f"Can't set {key} with value {value} for {self}")
- raise err
- @property
- def name_or_path(self) -> str:
- return getattr(self, "_name_or_path", None)
- @name_or_path.setter
- def name_or_path(self, value):
- self._name_or_path = str(value) # Make sure that name_or_path is a string (for JSON encoding)
- @property
- def use_return_dict(self) -> bool:
- """
- `bool`: Whether or not return [`~utils.ModelOutput`] instead of tuples.
- """
- # If torchscript is set, force `return_dict=False` to avoid jit errors
- return self.return_dict and not self.torchscript
- @property
- def num_labels(self) -> int:
- """
- `int`: The number of labels for classification models.
- """
- return len(self.id2label)
- @num_labels.setter
- def num_labels(self, num_labels: int):
- if not hasattr(self, "id2label") or self.id2label is None or len(self.id2label) != num_labels:
- self.id2label = {i: f"LABEL_{i}" for i in range(num_labels)}
- self.label2id = dict(zip(self.id2label.values(), self.id2label.keys()))
- @property
- def _attn_implementation(self):
- # This property is made private for now (as it cannot be changed and a PreTrainedModel.use_attn_implementation method needs to be implemented.)
- if hasattr(self, "_attn_implementation_internal"):
- if self._attn_implementation_internal is None:
- # `config.attn_implementation` should never be None, for backward compatibility.
- return "eager"
- else:
- return self._attn_implementation_internal
- else:
- return "eager"
- @_attn_implementation.setter
- def _attn_implementation(self, value):
- self._attn_implementation_internal = value
- def save_pretrained(self, save_directory: Union[str, os.PathLike], push_to_hub: bool = False, **kwargs):
- """
- Save a configuration object to the directory `save_directory`, so that it can be re-loaded using the
- [`~PretrainedConfig.from_pretrained`] class method.
- Args:
- save_directory (`str` or `os.PathLike`):
- Directory where the configuration JSON file will be saved (will be created if it does not exist).
- push_to_hub (`bool`, *optional*, defaults to `False`):
- Whether or not to push your model to the Hugging Face model hub after saving it. You can specify the
- repository you want to push to with `repo_id` (will default to the name of `save_directory` in your
- namespace).
- kwargs (`Dict[str, Any]`, *optional*):
- Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method.
- """
- self._set_token_in_kwargs(kwargs)
- if os.path.isfile(save_directory):
- raise AssertionError(f"Provided path ({save_directory}) should be a directory, not a file")
- non_default_generation_parameters = self._get_non_default_generation_parameters()
- if len(non_default_generation_parameters) > 0:
- # TODO (joao): this should be an exception if the user has modified the loaded config. See #33886
- warnings.warn(
- "Some non-default generation parameters are set in the model config. These should go into either a) "
- "`model.generation_config` (as opposed to `model.config`); OR b) a GenerationConfig file "
- "(https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model)."
- "This warning will become an exception in the future."
- f"\nNon-default generation parameters: {str(non_default_generation_parameters)}",
- UserWarning,
- )
- os.makedirs(save_directory, exist_ok=True)
- if push_to_hub:
- commit_message = kwargs.pop("commit_message", None)
- repo_id = kwargs.pop("repo_id", save_directory.split(os.path.sep)[-1])
- repo_id = self._create_repo(repo_id, **kwargs)
- files_timestamps = self._get_files_timestamps(save_directory)
- # If we have a custom config, we copy the file defining it in the folder and set the attributes so it can be
- # loaded from the Hub.
- if self._auto_class is not None:
- custom_object_save(self, save_directory, config=self)
- # If we save using the predefined names, we can load using `from_pretrained`
- output_config_file = os.path.join(save_directory, CONFIG_NAME)
- self.to_json_file(output_config_file, use_diff=True)
- logger.info(f"Configuration saved in {output_config_file}")
- if push_to_hub:
- self._upload_modified_files(
- save_directory,
- repo_id,
- files_timestamps,
- commit_message=commit_message,
- token=kwargs.get("token"),
- )
- @staticmethod
- def _set_token_in_kwargs(kwargs, token=None):
- """Temporary method to deal with `token` and `use_auth_token`.
- This method is to avoid apply the same changes in all model config classes that overwrite `from_pretrained`.
- Need to clean up `use_auth_token` in a follow PR.
- """
- # Some model config classes like CLIP define their own `from_pretrained` without the new argument `token` yet.
- if token is None:
- token = kwargs.pop("token", None)
- use_auth_token = kwargs.pop("use_auth_token", None)
- if use_auth_token is not None:
- warnings.warn(
- "The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.",
- FutureWarning,
- )
- if token is not None:
- raise ValueError(
- "`token` and `use_auth_token` are both specified. Please set only the argument `token`."
- )
- token = use_auth_token
- if token is not None:
- kwargs["token"] = token
- @classmethod
- def from_pretrained(
- cls,
- pretrained_model_name_or_path: Union[str, os.PathLike],
- cache_dir: Optional[Union[str, os.PathLike]] = None,
- force_download: bool = False,
- local_files_only: bool = False,
- token: Optional[Union[str, bool]] = None,
- revision: str = "main",
- **kwargs,
- ) -> "PretrainedConfig":
- r"""
- Instantiate a [`PretrainedConfig`] (or a derived class) from a pretrained model configuration.
- Args:
- pretrained_model_name_or_path (`str` or `os.PathLike`):
- This can be either:
- - a string, the *model id* of a pretrained model configuration hosted inside a model repo on
- huggingface.co.
- - a path to a *directory* containing a configuration file saved using the
- [`~PretrainedConfig.save_pretrained`] method, e.g., `./my_model_directory/`.
- - a path or url to a saved configuration JSON *file*, e.g., `./my_model_directory/configuration.json`.
- cache_dir (`str` or `os.PathLike`, *optional*):
- Path to a directory in which a downloaded pretrained model configuration should be cached if the
- standard cache should not be used.
- force_download (`bool`, *optional*, defaults to `False`):
- Whether or not to force to (re-)download the configuration files and override the cached versions if
- they exist.
- resume_download:
- Deprecated and ignored. All downloads are now resumed by default when possible.
- Will be removed in v5 of Transformers.
- proxies (`Dict[str, str]`, *optional*):
- A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
- 'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
- token (`str` or `bool`, *optional*):
- The token to use as HTTP bearer authorization for remote files. If `True`, or not specified, will use
- the token generated when running `huggingface-cli login` (stored in `~/.huggingface`).
- revision (`str`, *optional*, defaults to `"main"`):
- The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
- git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
- identifier allowed by git.
- <Tip>
- To test a pull request you made on the Hub, you can pass `revision="refs/pr/<pr_number>"`.
- </Tip>
- return_unused_kwargs (`bool`, *optional*, defaults to `False`):
- If `False`, then this function returns just the final configuration object.
- If `True`, then this functions returns a `Tuple(config, unused_kwargs)` where *unused_kwargs* is a
- dictionary consisting of the key/value pairs whose keys are not configuration attributes: i.e., the
- part of `kwargs` which has not been used to update `config` and is otherwise ignored.
- subfolder (`str`, *optional*, defaults to `""`):
- In case the relevant files are located inside a subfolder of the model repo on huggingface.co, you can
- specify the folder name here.
- kwargs (`Dict[str, Any]`, *optional*):
- The values in kwargs of any keys which are configuration attributes will be used to override the loaded
- values. Behavior concerning key/value pairs whose keys are *not* configuration attributes is controlled
- by the `return_unused_kwargs` keyword parameter.
- Returns:
- [`PretrainedConfig`]: The configuration object instantiated from this pretrained model.
- Examples:
- ```python
- # We can't instantiate directly the base class *PretrainedConfig* so let's show the examples on a
- # derived class: BertConfig
- config = BertConfig.from_pretrained(
- "google-bert/bert-base-uncased"
- ) # Download configuration from huggingface.co and cache.
- config = BertConfig.from_pretrained(
- "./test/saved_model/"
- ) # E.g. config (or model) was saved using *save_pretrained('./test/saved_model/')*
- config = BertConfig.from_pretrained("./test/saved_model/my_configuration.json")
- config = BertConfig.from_pretrained("google-bert/bert-base-uncased", output_attentions=True, foo=False)
- assert config.output_attentions == True
- config, unused_kwargs = BertConfig.from_pretrained(
- "google-bert/bert-base-uncased", output_attentions=True, foo=False, return_unused_kwargs=True
- )
- assert config.output_attentions == True
- assert unused_kwargs == {"foo": False}
- ```"""
- kwargs["cache_dir"] = cache_dir
- kwargs["force_download"] = force_download
- kwargs["local_files_only"] = local_files_only
- kwargs["revision"] = revision
- cls._set_token_in_kwargs(kwargs, token)
- config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
- if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
- logger.warning(
- f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
- f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
- )
- return cls.from_dict(config_dict, **kwargs)
- @classmethod
- def get_config_dict(
- cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs
- ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
- """
- From a `pretrained_model_name_or_path`, resolve to a dictionary of parameters, to be used for instantiating a
- [`PretrainedConfig`] using `from_dict`.
- Parameters:
- pretrained_model_name_or_path (`str` or `os.PathLike`):
- The identifier of the pre-trained checkpoint from which we want the dictionary of parameters.
- Returns:
- `Tuple[Dict, Dict]`: The dictionary(ies) that will be used to instantiate the configuration object.
- """
- cls._set_token_in_kwargs(kwargs)
- original_kwargs = copy.deepcopy(kwargs)
- # Get config dict associated with the base config file
- config_dict, kwargs = cls._get_config_dict(pretrained_model_name_or_path, **kwargs)
- if config_dict is None:
- return {}, kwargs
- if "_commit_hash" in config_dict:
- original_kwargs["_commit_hash"] = config_dict["_commit_hash"]
- # That config file may point us toward another config file to use.
- if "configuration_files" in config_dict:
- configuration_file = get_configuration_file(config_dict["configuration_files"])
- config_dict, kwargs = cls._get_config_dict(
- pretrained_model_name_or_path, _configuration_file=configuration_file, **original_kwargs
- )
- return config_dict, kwargs
- @classmethod
- def _get_config_dict(
- cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs
- ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
- cache_dir = kwargs.pop("cache_dir", None)
- force_download = kwargs.pop("force_download", False)
- resume_download = kwargs.pop("resume_download", None)
- proxies = kwargs.pop("proxies", None)
- token = kwargs.pop("token", None)
- local_files_only = kwargs.pop("local_files_only", False)
- revision = kwargs.pop("revision", None)
- trust_remote_code = kwargs.pop("trust_remote_code", None)
- subfolder = kwargs.pop("subfolder", "")
- from_pipeline = kwargs.pop("_from_pipeline", None)
- from_auto_class = kwargs.pop("_from_auto", False)
- commit_hash = kwargs.pop("_commit_hash", None)
- gguf_file = kwargs.get("gguf_file", None)
- if trust_remote_code is True:
- logger.warning(
- "The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is"
- " ignored."
- )
- user_agent = {"file_type": "config", "from_auto_class": from_auto_class}
- if from_pipeline is not None:
- user_agent["using_pipeline"] = from_pipeline
- pretrained_model_name_or_path = str(pretrained_model_name_or_path)
- is_local = os.path.isdir(pretrained_model_name_or_path)
- if os.path.isfile(os.path.join(subfolder, pretrained_model_name_or_path)):
- # Special case when pretrained_model_name_or_path is a local file
- resolved_config_file = pretrained_model_name_or_path
- is_local = True
- elif is_remote_url(pretrained_model_name_or_path):
- configuration_file = pretrained_model_name_or_path if gguf_file is None else gguf_file
- resolved_config_file = download_url(pretrained_model_name_or_path)
- else:
- configuration_file = kwargs.pop("_configuration_file", CONFIG_NAME) if gguf_file is None else gguf_file
- try:
- # Load from local folder or from cache or download from model Hub and cache
- resolved_config_file = cached_file(
- pretrained_model_name_or_path,
- configuration_file,
- cache_dir=cache_dir,
- force_download=force_download,
- proxies=proxies,
- resume_download=resume_download,
- local_files_only=local_files_only,
- token=token,
- user_agent=user_agent,
- revision=revision,
- subfolder=subfolder,
- _commit_hash=commit_hash,
- )
- if resolved_config_file is None:
- return None, kwargs
- commit_hash = extract_commit_hash(resolved_config_file, commit_hash)
- except EnvironmentError:
- # Raise any environment error raise by `cached_file`. It will have a helpful error message adapted to
- # the original exception.
- raise
- except Exception:
- # For any other exception, we throw a generic error.
- raise EnvironmentError(
- f"Can't load the configuration of '{pretrained_model_name_or_path}'. If you were trying to load it"
- " from 'https://huggingface.co/models', make sure you don't have a local directory with the same"
- f" name. Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a directory"
- f" containing a {configuration_file} file"
- )
- try:
- if gguf_file:
- config_dict = load_gguf_checkpoint(resolved_config_file, return_tensors=False)["config"]
- else:
- # Load config dict
- config_dict = cls._dict_from_json_file(resolved_config_file)
- config_dict["_commit_hash"] = commit_hash
- except (json.JSONDecodeError, UnicodeDecodeError):
- raise EnvironmentError(
- f"It looks like the config file at '{resolved_config_file}' is not a valid JSON file."
- )
- if is_local:
- logger.info(f"loading configuration file {resolved_config_file}")
- else:
- logger.info(f"loading configuration file {configuration_file} from cache at {resolved_config_file}")
- if "auto_map" in config_dict and not is_local:
- config_dict["auto_map"] = add_model_info_to_auto_map(
- config_dict["auto_map"], pretrained_model_name_or_path
- )
- if "custom_pipelines" in config_dict and not is_local:
- config_dict["custom_pipelines"] = add_model_info_to_custom_pipelines(
- config_dict["custom_pipelines"], pretrained_model_name_or_path
- )
- return config_dict, kwargs
- @classmethod
- def from_dict(cls, config_dict: Dict[str, Any], **kwargs) -> "PretrainedConfig":
- """
- Instantiates a [`PretrainedConfig`] from a Python dictionary of parameters.
- Args:
- config_dict (`Dict[str, Any]`):
- Dictionary that will be used to instantiate the configuration object. Such a dictionary can be
- retrieved from a pretrained checkpoint by leveraging the [`~PretrainedConfig.get_config_dict`] method.
- kwargs (`Dict[str, Any]`):
- Additional parameters from which to initialize the configuration object.
- Returns:
- [`PretrainedConfig`]: The configuration object instantiated from those parameters.
- """
- return_unused_kwargs = kwargs.pop("return_unused_kwargs", False)
- # Those arguments may be passed along for our internal telemetry.
- # We remove them so they don't appear in `return_unused_kwargs`.
- kwargs.pop("_from_auto", None)
- kwargs.pop("_from_pipeline", None)
- # The commit hash might have been updated in the `config_dict`, we don't want the kwargs to erase that update.
- if "_commit_hash" in kwargs and "_commit_hash" in config_dict:
- kwargs["_commit_hash"] = config_dict["_commit_hash"]
- # We remove it from kwargs so that it does not appear in `return_unused_kwargs`.
- config_dict["attn_implementation"] = kwargs.pop("attn_implementation", None)
- config = cls(**config_dict)
- if hasattr(config, "pruned_heads"):
- config.pruned_heads = {int(key): value for key, value in config.pruned_heads.items()}
- # Update config with kwargs if needed
- if "num_labels" in kwargs and "id2label" in kwargs:
- num_labels = kwargs["num_labels"]
- id2label = kwargs["id2label"] if kwargs["id2label"] is not None else []
- if len(id2label) != num_labels:
- raise ValueError(
- f"You passed along `num_labels={num_labels }` with an incompatible id to label map: "
- f"{kwargs['id2label']}. Since those arguments are inconsistent with each other, you should remove "
- "one of them."
- )
- to_remove = []
- for key, value in kwargs.items():
- if hasattr(config, key):
- current_attr = getattr(config, key)
- # To authorize passing a custom subconfig as kwarg in models that have nested configs.
- if isinstance(current_attr, PretrainedConfig) and isinstance(value, dict):
- value = current_attr.__class__(**value)
- setattr(config, key, value)
- if key != "torch_dtype":
- to_remove.append(key)
- for key in to_remove:
- kwargs.pop(key, None)
- logger.info(f"Model config {config}")
- if return_unused_kwargs:
- return config, kwargs
- else:
- return config
- @classmethod
- def from_json_file(cls, json_file: Union[str, os.PathLike]) -> "PretrainedConfig":
- """
- Instantiates a [`PretrainedConfig`] from the path to a JSON file of parameters.
- Args:
- json_file (`str` or `os.PathLike`):
- Path to the JSON file containing the parameters.
- Returns:
- [`PretrainedConfig`]: The configuration object instantiated from that JSON file.
- """
- config_dict = cls._dict_from_json_file(json_file)
- return cls(**config_dict)
- @classmethod
- def _dict_from_json_file(cls, json_file: Union[str, os.PathLike]):
- with open(json_file, "r", encoding="utf-8") as reader:
- text = reader.read()
- return json.loads(text)
- def __eq__(self, other):
- return isinstance(other, PretrainedConfig) and (self.__dict__ == other.__dict__)
- def __repr__(self):
- return f"{self.__class__.__name__} {self.to_json_string()}"
- def __iter__(self):
- for attr in self.__dict__:
- yield attr
- def to_diff_dict(self) -> Dict[str, Any]:
- """
- Removes all attributes from config which correspond to the default config attributes for better readability and
- serializes to a Python dictionary.
- Returns:
- `Dict[str, Any]`: Dictionary of all the attributes that make up this configuration instance,
- """
- config_dict = self.to_dict()
- # get the default config dict
- default_config_dict = PretrainedConfig().to_dict()
- # get class specific config dict
- class_config_dict = self.__class__().to_dict() if not self.is_composition else {}
- serializable_config_dict = {}
- # only serialize values that differ from the default config
- for key, value in config_dict.items():
- if (
- isinstance(getattr(self, key, None), PretrainedConfig)
- and key in class_config_dict
- and isinstance(class_config_dict[key], dict)
- ):
- # For nested configs we need to clean the diff recursively
- diff = recursive_diff_dict(value, class_config_dict[key], config_obj=getattr(self, key, None))
- if "model_type" in value:
- # Needs to be set even if it's not in the diff
- diff["model_type"] = value["model_type"]
- if len(diff) > 0:
- serializable_config_dict[key] = diff
- elif (
- key not in default_config_dict
- or key == "transformers_version"
- or value != default_config_dict[key]
- or (key in class_config_dict and value != class_config_dict[key])
- ):
- serializable_config_dict[key] = value
- if hasattr(self, "quantization_config"):
- serializable_config_dict["quantization_config"] = (
- self.quantization_config.to_dict()
- if not isinstance(self.quantization_config, dict)
- else self.quantization_config
- )
- # pop the `_pre_quantization_dtype` as torch.dtypes are not serializable.
- _ = serializable_config_dict.pop("_pre_quantization_dtype", None)
- self.dict_torch_dtype_to_str(serializable_config_dict)
- if "_attn_implementation_internal" in serializable_config_dict:
- del serializable_config_dict["_attn_implementation_internal"]
- return serializable_config_dict
- def to_dict(self) -> Dict[str, Any]:
- """
- Serializes this instance to a Python dictionary.
- Returns:
- `Dict[str, Any]`: Dictionary of all the attributes that make up this configuration instance.
- """
- output = copy.deepcopy(self.__dict__)
- if hasattr(self.__class__, "model_type"):
- output["model_type"] = self.__class__.model_type
- if "_auto_class" in output:
- del output["_auto_class"]
- if "_commit_hash" in output:
- del output["_commit_hash"]
- if "_attn_implementation_internal" in output:
- del output["_attn_implementation_internal"]
- # Transformers version when serializing the model
- output["transformers_version"] = __version__
- for key, value in output.items():
- # Deal with nested configs like CLIP
- if isinstance(value, PretrainedConfig):
- value = value.to_dict()
- del value["transformers_version"]
- output[key] = value
- if hasattr(self, "quantization_config"):
- output["quantization_config"] = (
- self.quantization_config.to_dict()
- if not isinstance(self.quantization_config, dict)
- else self.quantization_config
- )
- # pop the `_pre_quantization_dtype` as torch.dtypes are not serializable.
- _ = output.pop("_pre_quantization_dtype", None)
- self.dict_torch_dtype_to_str(output)
- return output
- def to_json_string(self, use_diff: bool = True) -> str:
- """
- Serializes this instance to a JSON string.
- Args:
- use_diff (`bool`, *optional*, defaults to `True`):
- If set to `True`, only the difference between the config instance and the default `PretrainedConfig()`
- is serialized to JSON string.
- Returns:
- `str`: String containing all the attributes that make up this configuration instance in JSON format.
- """
- if use_diff is True:
- config_dict = self.to_diff_dict()
- else:
- config_dict = self.to_dict()
- return json.dumps(config_dict, indent=2, sort_keys=True) + "\n"
- def to_json_file(self, json_file_path: Union[str, os.PathLike], use_diff: bool = True):
- """
- Save this instance to a JSON file.
- Args:
- json_file_path (`str` or `os.PathLike`):
- Path to the JSON file in which this configuration instance's parameters will be saved.
- use_diff (`bool`, *optional*, defaults to `True`):
- If set to `True`, only the difference between the config instance and the default `PretrainedConfig()`
- is serialized to JSON file.
- """
- with open(json_file_path, "w", encoding="utf-8") as writer:
- writer.write(self.to_json_string(use_diff=use_diff))
- def update(self, config_dict: Dict[str, Any]):
- """
- Updates attributes of this class with attributes from `config_dict`.
- Args:
- config_dict (`Dict[str, Any]`): Dictionary of attributes that should be updated for this class.
- """
- for key, value in config_dict.items():
- setattr(self, key, value)
- def update_from_string(self, update_str: str):
- """
- Updates attributes of this class with attributes from `update_str`.
- The expected format is ints, floats and strings as is, and for booleans use `true` or `false`. For example:
- "n_embd=10,resid_pdrop=0.2,scale_attn_weights=false,summary_type=cls_index"
- The keys to change have to already exist in the config object.
- Args:
- update_str (`str`): String with attributes that should be updated for this class.
- """
- d = dict(x.split("=") for x in update_str.split(","))
- for k, v in d.items():
- if not hasattr(self, k):
- raise ValueError(f"key {k} isn't in the original config dict")
- old_v = getattr(self, k)
- if isinstance(old_v, bool):
- if v.lower() in ["true", "1", "y", "yes"]:
- v = True
- elif v.lower() in ["false", "0", "n", "no"]:
- v = False
- else:
- raise ValueError(f"can't derive true or false from {v} (key {k})")
- elif isinstance(old_v, int):
- v = int(v)
- elif isinstance(old_v, float):
- v = float(v)
- elif not isinstance(old_v, str):
- raise TypeError(
- f"You can only update int, float, bool or string values in the config, got {v} for key {k}"
- )
- setattr(self, k, v)
- def dict_torch_dtype_to_str(self, d: Dict[str, Any]) -> None:
- """
- Checks whether the passed dictionary and its nested dicts have a *torch_dtype* key and if it's not None,
- converts torch.dtype to a string of just the type. For example, `torch.float32` get converted into *"float32"*
- string, which can then be stored in the json format.
- """
- if d.get("torch_dtype", None) is not None and not isinstance(d["torch_dtype"], str):
- d["torch_dtype"] = str(d["torch_dtype"]).split(".")[1]
- for value in d.values():
- if isinstance(value, dict):
- self.dict_torch_dtype_to_str(value)
- @classmethod
- def register_for_auto_class(cls, auto_class="AutoConfig"):
- """
- Register this class with a given auto class. This should only be used for custom configurations as the ones in
- the library are already mapped with `AutoConfig`.
- <Tip warning={true}>
- This API is experimental and may have some slight breaking changes in the next releases.
- </Tip>
- Args:
- auto_class (`str` or `type`, *optional*, defaults to `"AutoConfig"`):
- The auto class to register this new configuration with.
- """
- if not isinstance(auto_class, str):
- auto_class = auto_class.__name__
- import transformers.models.auto as auto_module
- if not hasattr(auto_module, auto_class):
- raise ValueError(f"{auto_class} is not a valid auto class.")
- cls._auto_class = auto_class
- @staticmethod
- def _get_global_generation_defaults() -> Dict[str, Any]:
- return {
- "max_length": 20,
- "min_length": 0,
- "do_sample": False,
- "early_stopping": False,
- "num_beams": 1,
- "num_beam_groups": 1,
- "diversity_penalty": 0.0,
- "temperature": 1.0,
- "top_k": 50,
- "top_p": 1.0,
- "typical_p": 1.0,
- "repetition_penalty": 1.0,
- "length_penalty": 1.0,
- "no_repeat_ngram_size": 0,
- "encoder_no_repeat_ngram_size": 0,
- "bad_words_ids": None,
- "num_return_sequences": 1,
- "output_scores": False,
- "return_dict_in_generate": False,
- "forced_bos_token_id": None,
- "forced_eos_token_id": None,
- "remove_invalid_values": False,
- "exponential_decay_length_penalty": None,
- "suppress_tokens": None,
- "begin_suppress_tokens": None,
- }
- def _get_non_default_generation_parameters(self) -> Dict[str, Any]:
- """
- Gets the non-default generation parameters on the PretrainedConfig instance
- """
- non_default_generation_parameters = {}
- decoder_attribute_name = None
- # Composite models don't have a default config, use their decoder config as a fallback for default values
- # If no known pattern is matched, then `default_config = None` -> check against the global generation defaults
- try:
- default_config = self.__class__()
- except ValueError:
- decoder_config = self.get_text_config(decoder=True)
- if decoder_config is not self:
- default_config = decoder_config.__class__()
- else:
- default_config = None
- # If it is a composite model, we want to check the subconfig that will be used for generation
- self_decoder_config = self if decoder_attribute_name is None else getattr(self, decoder_attribute_name)
- for parameter_name, default_global_value in self._get_global_generation_defaults().items():
- if hasattr(self_decoder_config, parameter_name):
- is_default_in_config = is_default_generation_value = None
- parameter_value = getattr(self_decoder_config, parameter_name)
- # Three cases in which is okay for the model config to hold generation config parameters:
- # 1. The parameter is set to `None`, effectivelly delegating its value to the generation config
- if parameter_value is None:
- continue
- # 2. If we have a default config, then the instance should hold the same generation defaults
- if default_config is not None:
- is_default_in_config = parameter_value == getattr(default_config, parameter_name)
- # 3. if we don't have a default config, then the instance should hold the global generation defaults
- else:
- is_default_generation_value = parameter_value == default_global_value
- is_non_default = (is_default_in_config is False) or (
- is_default_in_config is None and is_default_generation_value is False
- )
- if is_non_default:
- non_default_generation_parameters[parameter_name] = getattr(self_decoder_config, parameter_name)
- return non_default_generation_parameters
- def get_text_config(self, decoder=False) -> "PretrainedConfig":
- """
- Returns the config that is meant to be used with text IO. On most models, it is the original config instance
- itself. On specific composite models, it is under a set of valid names.
- If `decoder` is set to `True`, then only search for decoder config names.
- """
- decoder_possible_text_config_names = ("decoder", "generator", "text_config")
- encoder_possible_text_config_names = ("text_encoder",)
- if decoder:
- possible_text_config_names = decoder_possible_text_config_names
- else:
- possible_text_config_names = encoder_possible_text_config_names + decoder_possible_text_config_names
- valid_text_config_names = []
- for text_config_name in possible_text_config_names:
- if hasattr(self, text_config_name):
- text_config = getattr(self, text_config_name, None)
- if text_config is not None:
- valid_text_config_names += [text_config_name]
- if len(valid_text_config_names) > 1:
- raise ValueError(
- f"Multiple valid text configs were found in the model config: {valid_text_config_names}. In this "
- "case, using `get_text_config()` would be ambiguous. Please specify the desied text config directly."
- )
- elif len(valid_text_config_names) == 1:
- return getattr(self, valid_text_config_names[0])
- return self
- def get_configuration_file(configuration_files: List[str]) -> str:
- """
- Get the configuration file to use for this version of transformers.
- Args:
- configuration_files (`List[str]`): The list of available configuration files.
- Returns:
- `str`: The configuration file to use.
- """
- configuration_files_map = {}
- for file_name in configuration_files:
- search = _re_configuration_file.search(file_name)
- if search is not None:
- v = search.groups()[0]
- configuration_files_map[v] = file_name
- available_versions = sorted(configuration_files_map.keys())
- # Defaults to FULL_CONFIGURATION_FILE and then try to look at some newer versions.
- configuration_file = CONFIG_NAME
- transformers_version = version.parse(__version__)
- for v in available_versions:
- if version.parse(v) <= transformers_version:
- configuration_file = configuration_files_map[v]
- else:
- # No point going further since the versions are sorted.
- break
- return configuration_file
- def recursive_diff_dict(dict_a, dict_b, config_obj=None):
- """
- Helper function to recursively take the diff between two nested dictionaries. The resulting diff only contains the
- values from `dict_a` that are different from values in `dict_b`.
- """
- diff = {}
- default = config_obj.__class__().to_dict() if config_obj is not None else {}
- for key, value in dict_a.items():
- obj_value = getattr(config_obj, str(key), None)
- if isinstance(obj_value, PretrainedConfig) and key in dict_b and isinstance(dict_b[key], dict):
- diff_value = recursive_diff_dict(value, dict_b[key], config_obj=obj_value)
- if len(diff_value) > 0:
- diff[key] = diff_value
- elif key not in dict_b or value != dict_b[key] or key not in default or value != default[key]:
- diff[key] = value
- return diff
- PretrainedConfig.push_to_hub = copy_func(PretrainedConfig.push_to_hub)
- if PretrainedConfig.push_to_hub.__doc__ is not None:
- PretrainedConfig.push_to_hub.__doc__ = PretrainedConfig.push_to_hub.__doc__.format(
- object="config", object_class="AutoConfig", object_files="configuration file"
- )
|