configuration_utils.py 78 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534
  1. # coding=utf-8
  2. # Copyright 2022 The HuggingFace Inc. team.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. """Generation configuration class and utilities."""
  16. import copy
  17. import json
  18. import os
  19. import warnings
  20. from abc import ABC, abstractmethod
  21. from dataclasses import dataclass, is_dataclass
  22. from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
  23. from .. import __version__
  24. from ..configuration_utils import PretrainedConfig
  25. from ..utils import (
  26. GENERATION_CONFIG_NAME,
  27. ExplicitEnum,
  28. PushToHubMixin,
  29. cached_file,
  30. download_url,
  31. extract_commit_hash,
  32. is_remote_url,
  33. is_torch_available,
  34. logging,
  35. )
  36. if TYPE_CHECKING:
  37. from ..modeling_utils import PreTrainedModel
  38. logger = logging.get_logger(__name__)
  39. METADATA_FIELDS = ("_from_model_config", "_commit_hash", "_original_object_hash", "transformers_version")
  40. NEEDS_CACHE_CONFIG = {}
  41. NEED_SETUP_CACHE_CLASSES_MAPPING = {}
  42. QUANT_BACKEND_CLASSES_MAPPING = {}
  43. ALL_CACHE_IMPLEMENTATIONS = []
  44. if is_torch_available():
  45. from ..cache_utils import (
  46. HQQQuantizedCache,
  47. HybridCache,
  48. MambaCache,
  49. OffloadedStaticCache,
  50. QuantizedCacheConfig,
  51. QuantoQuantizedCache,
  52. SlidingWindowCache,
  53. StaticCache,
  54. StaticCacheConfig,
  55. )
  56. from .logits_process import SynthIDTextWatermarkLogitsProcessor, WatermarkLogitsProcessor
  57. NEEDS_CACHE_CONFIG["quantized"] = QuantizedCacheConfig
  58. NEEDS_CACHE_CONFIG["static"] = StaticCacheConfig
  59. NEED_SETUP_CACHE_CLASSES_MAPPING = {
  60. "static": StaticCache,
  61. "offloaded_static": OffloadedStaticCache,
  62. "sliding_window": SlidingWindowCache,
  63. "hybrid": HybridCache,
  64. "mamba": MambaCache,
  65. }
  66. QUANT_BACKEND_CLASSES_MAPPING = {"quanto": QuantoQuantizedCache, "HQQ": HQQQuantizedCache}
  67. ALL_CACHE_IMPLEMENTATIONS = list(NEED_SETUP_CACHE_CLASSES_MAPPING.keys()) + list(NEEDS_CACHE_CONFIG.keys())
  68. class GenerationMode(ExplicitEnum):
  69. """
  70. Possible generation modes, downstream of the [`~generation.GenerationMixin.generate`] method.
  71. """
  72. # Non-beam methods
  73. CONTRASTIVE_SEARCH = "contrastive_search"
  74. GREEDY_SEARCH = "greedy_search"
  75. SAMPLE = "sample"
  76. ASSISTED_GENERATION = "assisted_generation"
  77. DOLA_GENERATION = "dola_generation"
  78. # Beam methods
  79. BEAM_SEARCH = "beam_search"
  80. BEAM_SAMPLE = "beam_sample"
  81. CONSTRAINED_BEAM_SEARCH = "constrained_beam_search"
  82. GROUP_BEAM_SEARCH = "group_beam_search"
  83. class GenerationConfig(PushToHubMixin):
  84. # no-format
  85. """
  86. Class that holds a configuration for a generation task. A `generate` call supports the following generation methods
  87. for text-decoder, text-to-text, speech-to-text, and vision-to-text models:
  88. - *greedy decoding* if `num_beams=1` and `do_sample=False`
  89. - *contrastive search* if `penalty_alpha>0.` and `top_k>1`
  90. - *multinomial sampling* if `num_beams=1` and `do_sample=True`
  91. - *beam-search decoding* if `num_beams>1` and `do_sample=False`
  92. - *beam-search multinomial sampling* if `num_beams>1` and `do_sample=True`
  93. - *diverse beam-search decoding* if `num_beams>1` and `num_beam_groups>1`
  94. - *constrained beam-search decoding* if `constraints!=None` or `force_words_ids!=None`
  95. - *assisted decoding* if `assistant_model` or `prompt_lookup_num_tokens` is passed to `.generate()`
  96. - *dola decoding* if `dola_layers` is passed to `.generate()`
  97. To learn more about decoding strategies refer to the [text generation strategies guide](../generation_strategies).
  98. <Tip>
  99. A large number of these flags control the logits or the stopping criteria of the generation. Make sure you check
  100. the [generate-related classes](https://huggingface.co/docs/transformers/internal/generation_utils) for a full
  101. description of the possible manipulations, as well as examples of their usage.
  102. </Tip>
  103. Arg:
  104. > Parameters that control the length of the output
  105. max_length (`int`, *optional*, defaults to 20):
  106. The maximum length the generated tokens can have. Corresponds to the length of the input prompt +
  107. `max_new_tokens`. Its effect is overridden by `max_new_tokens`, if also set.
  108. max_new_tokens (`int`, *optional*):
  109. The maximum numbers of tokens to generate, ignoring the number of tokens in the prompt.
  110. min_length (`int`, *optional*, defaults to 0):
  111. The minimum length of the sequence to be generated. Corresponds to the length of the input prompt +
  112. `min_new_tokens`. Its effect is overridden by `min_new_tokens`, if also set.
  113. min_new_tokens (`int`, *optional*):
  114. The minimum numbers of tokens to generate, ignoring the number of tokens in the prompt.
  115. early_stopping (`bool` or `str`, *optional*, defaults to `False`):
  116. Controls the stopping condition for beam-based methods, like beam-search. It accepts the following values:
  117. `True`, where the generation stops as soon as there are `num_beams` complete candidates; `False`, where an
  118. heuristic is applied and the generation stops when is it very unlikely to find better candidates;
  119. `"never"`, where the beam search procedure only stops when there cannot be better candidates (canonical
  120. beam search algorithm).
  121. max_time (`float`, *optional*):
  122. The maximum amount of time you allow the computation to run for in seconds. generation will still finish
  123. the current pass after allocated time has been passed.
  124. stop_strings (`str or List[str]`, *optional*):
  125. A string or a list of strings that should terminate generation if the model outputs them.
  126. > Parameters that control the generation strategy used
  127. do_sample (`bool`, *optional*, defaults to `False`):
  128. Whether or not to use sampling ; use greedy decoding otherwise.
  129. num_beams (`int`, *optional*, defaults to 1):
  130. Number of beams for beam search. 1 means no beam search.
  131. num_beam_groups (`int`, *optional*, defaults to 1):
  132. Number of groups to divide `num_beams` into in order to ensure diversity among different groups of beams.
  133. [this paper](https://arxiv.org/pdf/1610.02424.pdf) for more details.
  134. penalty_alpha (`float`, *optional*):
  135. The values balance the model confidence and the degeneration penalty in contrastive search decoding.
  136. dola_layers (`str` or `List[int]`, *optional*):
  137. The layers to use for DoLa decoding. If `None`, DoLa decoding is not used. If a string, it must
  138. be one of "low" or "high", which means using the lower part or higher part of the model layers, respectively.
  139. "low" means the first half of the layers up to the first 20 layers, and "high" means the last half of the
  140. layers up to the last 20 layers.
  141. If a list of integers, it must contain the indices of the layers to use for candidate premature layers in DoLa.
  142. The 0-th layer is the word embedding layer of the model. Set to `'low'` to improve long-answer reasoning tasks,
  143. `'high'` to improve short-answer tasks. Check the [documentation](https://github.com/huggingface/transformers/blob/main/docs/source/en/generation_strategies.md)
  144. or [the paper](https://arxiv.org/abs/2309.03883) for more details.
  145. > Parameters that control the cache
  146. use_cache (`bool`, *optional*, defaults to `True`):
  147. Whether or not the model should use the past last key/values attentions (if applicable to the model) to
  148. speed up decoding.
  149. cache_implementation (`str`, *optional*, default to `None`):
  150. Name of the cache class that will be instantiated in `generate`, for faster decoding. Possible values are:
  151. {ALL_CACHE_IMPLEMENTATIONS}. We support other cache types, but they must be manually instantiated and
  152. passed to `generate` through the `past_key_values` argument. See our
  153. [cache documentation](https://huggingface.co/docs/transformers/en/kv_cache) for further information.
  154. cache_config (`CacheConfig` or `dict`, *optional*, default to `None`):
  155. Arguments used in the key-value cache class can be passed in `cache_config`. Can be passed as a `Dict` and
  156. it will be converted to its repsective `CacheConfig` internally.
  157. Otherwise can be passed as a `CacheConfig` class matching the indicated `cache_implementation`.
  158. return_legacy_cache (`bool`, *optional*, default to `True`):
  159. Whether to return the legacy or new format of the cache when `DynamicCache` is used by default.
  160. > Parameters for manipulation of the model output logits
  161. temperature (`float`, *optional*, defaults to 1.0):
  162. The value used to modulate the next token probabilities.
  163. top_k (`int`, *optional*, defaults to 50):
  164. The number of highest probability vocabulary tokens to keep for top-k-filtering.
  165. top_p (`float`, *optional*, defaults to 1.0):
  166. If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to
  167. `top_p` or higher are kept for generation.
  168. min_p (`float`, *optional*):
  169. Minimum token probability, which will be scaled by the probability of the most likely token. It must be a
  170. value between 0 and 1. Typical values are in the 0.01-0.2 range, comparably selective as setting `top_p` in
  171. the 0.99-0.8 range (use the opposite of normal `top_p` values).
  172. typical_p (`float`, *optional*, defaults to 1.0):
  173. Local typicality measures how similar the conditional probability of predicting a target token next is to
  174. the expected conditional probability of predicting a random token next, given the partial text already
  175. generated. If set to float < 1, the smallest set of the most locally typical tokens with probabilities that
  176. add up to `typical_p` or higher are kept for generation. See [this
  177. paper](https://arxiv.org/pdf/2202.00666.pdf) for more details.
  178. epsilon_cutoff (`float`, *optional*, defaults to 0.0):
  179. If set to float strictly between 0 and 1, only tokens with a conditional probability greater than
  180. `epsilon_cutoff` will be sampled. In the paper, suggested values range from 3e-4 to 9e-4, depending on the
  181. size of the model. See [Truncation Sampling as Language Model
  182. Desmoothing](https://arxiv.org/abs/2210.15191) for more details.
  183. eta_cutoff (`float`, *optional*, defaults to 0.0):
  184. Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to float strictly between
  185. 0 and 1, a token is only considered if it is greater than either `eta_cutoff` or `sqrt(eta_cutoff) *
  186. exp(-entropy(softmax(next_token_logits)))`. The latter term is intuitively the expected next token
  187. probability, scaled by `sqrt(eta_cutoff)`. In the paper, suggested values range from 3e-4 to 2e-3,
  188. depending on the size of the model. See [Truncation Sampling as Language Model
  189. Desmoothing](https://arxiv.org/abs/2210.15191) for more details.
  190. diversity_penalty (`float`, *optional*, defaults to 0.0):
  191. This value is subtracted from a beam's score if it generates a token same as any beam from other group at a
  192. particular time. Note that `diversity_penalty` is only effective if `group beam search` is enabled.
  193. repetition_penalty (`float`, *optional*, defaults to 1.0):
  194. The parameter for repetition penalty. 1.0 means no penalty. See [this
  195. paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
  196. encoder_repetition_penalty (`float`, *optional*, defaults to 1.0):
  197. The paramater for encoder_repetition_penalty. An exponential penalty on sequences that are not in the
  198. original input. 1.0 means no penalty.
  199. length_penalty (`float`, *optional*, defaults to 1.0):
  200. Exponential penalty to the length that is used with beam-based generation. It is applied as an exponent to
  201. the sequence length, which in turn is used to divide the score of the sequence. Since the score is the log
  202. likelihood of the sequence (i.e. negative), `length_penalty` > 0.0 promotes longer sequences, while
  203. `length_penalty` < 0.0 encourages shorter sequences.
  204. no_repeat_ngram_size (`int`, *optional*, defaults to 0):
  205. If set to int > 0, all ngrams of that size can only occur once.
  206. bad_words_ids (`List[List[int]]`, *optional*):
  207. List of list of token ids that are not allowed to be generated. Check
  208. [`~generation.NoBadWordsLogitsProcessor`] for further documentation and examples.
  209. force_words_ids (`List[List[int]]` or `List[List[List[int]]]`, *optional*):
  210. List of token ids that must be generated. If given a `List[List[int]]`, this is treated as a simple list of
  211. words that must be included, the opposite to `bad_words_ids`. If given `List[List[List[int]]]`, this
  212. triggers a [disjunctive constraint](https://github.com/huggingface/transformers/issues/14081), where one
  213. can allow different forms of each word.
  214. renormalize_logits (`bool`, *optional*, defaults to `False`):
  215. Whether to renormalize the logits after applying all the logits processors (including the custom
  216. ones). It's highly recommended to set this flag to `True` as the search algorithms suppose the score logits
  217. are normalized but some logit processors break the normalization.
  218. constraints (`List[Constraint]`, *optional*):
  219. Custom constraints that can be added to the generation to ensure that the output will contain the use of
  220. certain tokens as defined by `Constraint` objects, in the most sensible way possible.
  221. forced_bos_token_id (`int`, *optional*, defaults to `model.config.forced_bos_token_id`):
  222. The id of the token to force as the first generated token after the `decoder_start_token_id`. Useful for
  223. multilingual models like [mBART](../model_doc/mbart) where the first generated token needs to be the target
  224. language token.
  225. forced_eos_token_id (`int` or List[int]`, *optional*, defaults to `model.config.forced_eos_token_id`):
  226. The id of the token to force as the last generated token when `max_length` is reached. Optionally, use a
  227. list to set multiple *end-of-sequence* tokens.
  228. remove_invalid_values (`bool`, *optional*, defaults to `model.config.remove_invalid_values`):
  229. Whether to remove possible *nan* and *inf* outputs of the model to prevent the generation method to crash.
  230. Note that using `remove_invalid_values` can slow down generation.
  231. exponential_decay_length_penalty (`tuple(int, float)`, *optional*):
  232. This Tuple adds an exponentially increasing length penalty, after a certain amount of tokens have been
  233. generated. The tuple shall consist of: `(start_index, decay_factor)` where `start_index` indicates where
  234. penalty starts and `decay_factor` represents the factor of exponential decay
  235. suppress_tokens (`List[int]`, *optional*):
  236. A list of tokens that will be suppressed at generation. The `SupressTokens` logit processor will set their
  237. log probs to `-inf` so that they are not sampled.
  238. begin_suppress_tokens (`List[int]`, *optional*):
  239. A list of tokens that will be suppressed at the beginning of the generation. The `SupressBeginTokens` logit
  240. processor will set their log probs to `-inf` so that they are not sampled.
  241. forced_decoder_ids (`List[List[int]]`, *optional*):
  242. A list of pairs of integers which indicates a mapping from generation indices to token indices that will be
  243. forced before sampling. For example, `[[1, 123]]` means the second generated token will always be a token
  244. of index 123.
  245. sequence_bias (`Dict[Tuple[int], float]`, *optional*)):
  246. Dictionary that maps a sequence of tokens to its bias term. Positive biases increase the odds of the
  247. sequence being selected, while negative biases do the opposite. Check
  248. [`~generation.SequenceBiasLogitsProcessor`] for further documentation and examples.
  249. token_healing (`bool`, *optional*, defaults to `False`):
  250. Heal tail tokens of prompts by replacing them with their appropriate extensions.
  251. This enhances the quality of completions for prompts affected by greedy tokenization bias.
  252. guidance_scale (`float`, *optional*):
  253. The guidance scale for classifier free guidance (CFG). CFG is enabled by setting `guidance_scale > 1`.
  254. Higher guidance scale encourages the model to generate samples that are more closely linked to the input
  255. prompt, usually at the expense of poorer quality.
  256. low_memory (`bool`, *optional*):
  257. Switch to sequential beam search and sequential topk for contrastive search to reduce peak memory.
  258. Used with beam search and contrastive search.
  259. watermarking_config (`BaseWatermarkingConfig` or `dict`, *optional*):
  260. Arguments used to watermark the model outputs by adding a small bias to randomly selected set of "green"
  261. tokens. See the docs of [`SynthIDTextWatermarkingConfig`] and [`WatermarkingConfig`] for more
  262. details. If passed as `Dict`, it will be converted to a `WatermarkingConfig` internally.
  263. > Parameters that define the output variables of generate
  264. num_return_sequences (`int`, *optional*, defaults to 1):
  265. The number of independently computed returned sequences for each element in the batch.
  266. output_attentions (`bool`, *optional*, defaults to `False`):
  267. Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
  268. tensors for more details.
  269. output_hidden_states (`bool`, *optional*, defaults to `False`):
  270. Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
  271. more details.
  272. output_scores (`bool`, *optional*, defaults to `False`):
  273. Whether or not to return the prediction scores. See `scores` under returned tensors for more details.
  274. output_logits (`bool`, *optional*):
  275. Whether or not to return the unprocessed prediction logit scores. See `logits` under returned tensors for
  276. more details.
  277. return_dict_in_generate (`bool`, *optional*, defaults to `False`):
  278. Whether or not to return a [`~utils.ModelOutput`], as opposed to returning exclusively the generated
  279. sequence. This flag must be set to `True` to return the generation cache (when `use_cache` is `True`)
  280. or optional outputs (see flags starting with `output_`)
  281. > Special tokens that can be used at generation time
  282. pad_token_id (`int`, *optional*):
  283. The id of the *padding* token.
  284. bos_token_id (`int`, *optional*):
  285. The id of the *beginning-of-sequence* token.
  286. eos_token_id (`Union[int, List[int]]`, *optional*):
  287. The id of the *end-of-sequence* token. Optionally, use a list to set multiple *end-of-sequence* tokens.
  288. > Generation parameters exclusive to encoder-decoder models
  289. encoder_no_repeat_ngram_size (`int`, *optional*, defaults to 0):
  290. If set to int > 0, all ngrams of that size that occur in the `encoder_input_ids` cannot occur in the
  291. `decoder_input_ids`.
  292. decoder_start_token_id (`int` or `List[int]`, *optional*):
  293. If an encoder-decoder model starts decoding with a different token than *bos*, the id of that token or a list of length
  294. `batch_size`. Indicating a list enables different start ids for each element in the batch
  295. (e.g. multilingual models with different target languages in one batch)
  296. > Generation parameters exclusive to assistant generation
  297. is_assistant (`bool`, *optional*, defaults to `False`):
  298. Whether the model is an assistant (draft) model.
  299. num_assistant_tokens (`int`, *optional*, defaults to 20):
  300. Defines the number of _speculative tokens_ that shall be generated by the assistant model before being
  301. checked by the target model at each iteration. Higher values for `num_assistant_tokens` make the generation
  302. more _speculative_ : If the assistant model is performant larger speed-ups can be reached, if the assistant
  303. model requires lots of corrections, lower speed-ups are reached.
  304. num_assistant_tokens_schedule (`str`, *optional*, defaults to `"constant"`):
  305. Defines the schedule at which max assistant tokens shall be changed during inference.
  306. - `"heuristic"`: When all speculative tokens are correct, increase `num_assistant_tokens` by 2 else
  307. reduce by 1. `num_assistant_tokens` value is persistent over multiple generation calls with the same assistant model.
  308. - `"heuristic_transient"`: Same as `"heuristic"` but `num_assistant_tokens` is reset to its initial value after each generation call.
  309. - `"constant"`: `num_assistant_tokens` stays unchanged during generation
  310. assistant_confidence_threshold (`float`, *optional*, defaults to 0.4):
  311. The confidence threshold for the assistant model. If the assistant model's confidence in its prediction for the current token is lower
  312. than this threshold, the assistant model stops the current token generation iteration, even if the number of _speculative tokens_
  313. (defined by `num_assistant_tokens`) is not yet reached. It is an unsupervised version of the dynamic speculation lookahead
  314. from Dynamic Speculation Lookahead Accelerates Speculative Decoding of Large Language Models <https://arxiv.org/abs/2405.04304>.
  315. prompt_lookup_num_tokens (`int`, *optional*, default to `None`):
  316. The number of tokens to be output as candidate tokens.
  317. max_matching_ngram_size (`int`, *optional*, default to `None`):
  318. The maximum ngram size to be considered for matching in the prompt. Default to 2 if not provided.
  319. > Wild card
  320. generation_kwargs:
  321. Additional generation kwargs will be forwarded to the `generate` function of the model. Kwargs that are not
  322. present in `generate`'s signature will be used in the model forward pass.
  323. """
  324. extra_output_flags = ("output_attentions", "output_hidden_states", "output_scores", "output_logits")
  325. def __init__(self, **kwargs):
  326. # Parameters that control the length of the output
  327. self.max_length = kwargs.pop("max_length", 20)
  328. self.max_new_tokens = kwargs.pop("max_new_tokens", None)
  329. self.min_length = kwargs.pop("min_length", 0)
  330. self.min_new_tokens = kwargs.pop("min_new_tokens", None)
  331. self.early_stopping = kwargs.pop("early_stopping", False)
  332. self.max_time = kwargs.pop("max_time", None)
  333. self.stop_strings = kwargs.pop("stop_strings", None)
  334. # Parameters that control the generation strategy used
  335. self.do_sample = kwargs.pop("do_sample", False)
  336. self.num_beams = kwargs.pop("num_beams", 1)
  337. self.num_beam_groups = kwargs.pop("num_beam_groups", 1)
  338. self.penalty_alpha = kwargs.pop("penalty_alpha", None)
  339. self.dola_layers = kwargs.pop("dola_layers", None)
  340. # Parameters that control the cache
  341. self.use_cache = kwargs.pop("use_cache", True)
  342. self.cache_implementation = kwargs.pop("cache_implementation", None)
  343. self.cache_config = kwargs.pop("cache_config", None)
  344. if self.cache_implementation is not None and self.cache_implementation in NEEDS_CACHE_CONFIG:
  345. cache_config_class = NEEDS_CACHE_CONFIG[self.cache_implementation]
  346. if self.cache_config is None:
  347. self.cache_config = cache_config_class()
  348. elif isinstance(self.cache_config, dict):
  349. self.cache_config = cache_config_class.from_dict(self.cache_config)
  350. self.return_legacy_cache = kwargs.pop("return_legacy_cache", None)
  351. # Parameters for manipulation of the model output logits
  352. self.temperature = kwargs.pop("temperature", 1.0)
  353. self.top_k = kwargs.pop("top_k", 50)
  354. self.top_p = kwargs.pop("top_p", 1.0)
  355. self.min_p = kwargs.pop("min_p", None)
  356. self.typical_p = kwargs.pop("typical_p", 1.0)
  357. self.epsilon_cutoff = kwargs.pop("epsilon_cutoff", 0.0)
  358. self.eta_cutoff = kwargs.pop("eta_cutoff", 0.0)
  359. self.diversity_penalty = kwargs.pop("diversity_penalty", 0.0)
  360. self.repetition_penalty = kwargs.pop("repetition_penalty", 1.0)
  361. self.encoder_repetition_penalty = kwargs.pop("encoder_repetition_penalty", 1.0)
  362. self.length_penalty = kwargs.pop("length_penalty", 1.0)
  363. self.no_repeat_ngram_size = kwargs.pop("no_repeat_ngram_size", 0)
  364. self.bad_words_ids = kwargs.pop("bad_words_ids", None)
  365. self.force_words_ids = kwargs.pop("force_words_ids", None)
  366. self.renormalize_logits = kwargs.pop("renormalize_logits", False)
  367. self.constraints = kwargs.pop("constraints", None)
  368. self.forced_bos_token_id = kwargs.pop("forced_bos_token_id", None)
  369. self.forced_eos_token_id = kwargs.pop("forced_eos_token_id", None)
  370. self.remove_invalid_values = kwargs.pop("remove_invalid_values", False)
  371. self.exponential_decay_length_penalty = kwargs.pop("exponential_decay_length_penalty", None)
  372. self.suppress_tokens = kwargs.pop("suppress_tokens", None)
  373. self.begin_suppress_tokens = kwargs.pop("begin_suppress_tokens", None)
  374. self.forced_decoder_ids = kwargs.pop("forced_decoder_ids", None)
  375. self.sequence_bias = kwargs.pop("sequence_bias", None)
  376. self.token_healing = kwargs.pop("token_healing", False)
  377. self.guidance_scale = kwargs.pop("guidance_scale", None)
  378. self.low_memory = kwargs.pop("low_memory", None)
  379. watermarking_config = kwargs.pop("watermarking_config", None)
  380. if watermarking_config is None:
  381. self.watermarking_config = None
  382. elif isinstance(watermarking_config, BaseWatermarkingConfig):
  383. self.watermarking_config = watermarking_config
  384. else:
  385. self.watermarking_config = WatermarkingConfig.from_dict(watermarking_config)
  386. # Parameters that define the output variables of `generate`
  387. self.num_return_sequences = kwargs.pop("num_return_sequences", 1)
  388. self.output_attentions = kwargs.pop("output_attentions", False)
  389. self.output_hidden_states = kwargs.pop("output_hidden_states", False)
  390. self.output_scores = kwargs.pop("output_scores", False)
  391. self.output_logits = kwargs.pop("output_logits", None)
  392. self.return_dict_in_generate = kwargs.pop("return_dict_in_generate", False)
  393. # Special tokens that can be used at generation time
  394. self.pad_token_id = kwargs.pop("pad_token_id", None)
  395. self.bos_token_id = kwargs.pop("bos_token_id", None)
  396. self.eos_token_id = kwargs.pop("eos_token_id", None)
  397. # Generation parameters exclusive to encoder-decoder models
  398. self.encoder_no_repeat_ngram_size = kwargs.pop("encoder_no_repeat_ngram_size", 0)
  399. self.decoder_start_token_id = kwargs.pop("decoder_start_token_id", None)
  400. # Assistant generation
  401. self.is_assistant = False
  402. self.num_assistant_tokens = kwargs.pop("num_assistant_tokens", 20)
  403. self.num_assistant_tokens_schedule = kwargs.pop("num_assistant_tokens_schedule", "constant")
  404. self.assistant_confidence_threshold = kwargs.pop("assistant_confidence_threshold", 0.4)
  405. # Prompt lookup decoding
  406. self.prompt_lookup_num_tokens = kwargs.pop("prompt_lookup_num_tokens", None)
  407. self.max_matching_ngram_size = kwargs.pop("max_matching_ngram_size", None)
  408. # Wild card
  409. self.generation_kwargs = kwargs.pop("generation_kwargs", {})
  410. # The remaining attributes do not parametrize `.generate()`, but are informative and/or used by the hub
  411. # interface.
  412. self._from_model_config = kwargs.pop("_from_model_config", False)
  413. self._commit_hash = kwargs.pop("_commit_hash", None)
  414. self.transformers_version = kwargs.pop("transformers_version", __version__)
  415. # Additional attributes without default values
  416. if not self._from_model_config:
  417. # we don't want to copy values from the model config if we're initializing a `GenerationConfig` from a
  418. # model's default configuration file
  419. for key, value in kwargs.items():
  420. try:
  421. setattr(self, key, value)
  422. except AttributeError as err:
  423. logger.error(f"Can't set {key} with value {value} for {self}")
  424. raise err
  425. # Validate the values of the attributes
  426. self.validate(is_init=True)
  427. def __hash__(self):
  428. return hash(self.to_json_string(ignore_metadata=True))
  429. def __eq__(self, other):
  430. if not isinstance(other, GenerationConfig):
  431. return False
  432. self_without_metadata = self.to_json_string(use_diff=False, ignore_metadata=True)
  433. other_without_metadata = other.to_json_string(use_diff=False, ignore_metadata=True)
  434. return self_without_metadata == other_without_metadata
  435. def __repr__(self):
  436. return f"{self.__class__.__name__} {self.to_json_string(ignore_metadata=True)}"
  437. def get_generation_mode(self, assistant_model: Optional["PreTrainedModel"] = None) -> GenerationMode:
  438. """
  439. Returns the generation mode triggered by the [`GenerationConfig`] instance.
  440. Arg:
  441. assistant_model (`PreTrainedModel`, *optional*):
  442. The assistant model to be used for assisted generation. If set, the generation mode will be
  443. assisted generation.
  444. Returns:
  445. `GenerationMode`: The generation mode triggered by the instance.
  446. """
  447. # TODO joao: find out a way of not depending on external fields (e.g. `assistant_model`), then make this a
  448. # property and part of the `__repr__`
  449. if self.constraints is not None or self.force_words_ids is not None:
  450. generation_mode = GenerationMode.CONSTRAINED_BEAM_SEARCH
  451. elif self.num_beams == 1:
  452. if self.do_sample is False:
  453. if (
  454. self.top_k is not None
  455. and self.top_k > 1
  456. and self.penalty_alpha is not None
  457. and self.penalty_alpha > 0
  458. ):
  459. generation_mode = GenerationMode.CONTRASTIVE_SEARCH
  460. else:
  461. generation_mode = GenerationMode.GREEDY_SEARCH
  462. else:
  463. generation_mode = GenerationMode.SAMPLE
  464. else:
  465. if self.num_beam_groups > 1:
  466. generation_mode = GenerationMode.GROUP_BEAM_SEARCH
  467. elif self.do_sample is True:
  468. generation_mode = GenerationMode.BEAM_SAMPLE
  469. else:
  470. generation_mode = GenerationMode.BEAM_SEARCH
  471. # Assisted generation may extend some generation modes
  472. if assistant_model is not None or self.prompt_lookup_num_tokens is not None:
  473. if generation_mode in ("greedy_search", "sample"):
  474. generation_mode = GenerationMode.ASSISTED_GENERATION
  475. else:
  476. raise ValueError(
  477. "You've set `assistant_model`, which triggers assisted generate. Currently, assisted generate "
  478. "is only supported with Greedy Search and Sample."
  479. )
  480. # DoLa generation may extend some generation modes
  481. if self.dola_layers is not None:
  482. if generation_mode in ("greedy_search", "sample"):
  483. generation_mode = GenerationMode.DOLA_GENERATION
  484. else:
  485. raise ValueError(
  486. "You've set `dola_layers`, which triggers DoLa generate. Currently, DoLa generate "
  487. "is only supported with Greedy Search and Sample."
  488. )
  489. return generation_mode
  490. def validate(self, is_init=False):
  491. """
  492. Validates the values of the attributes of the [`GenerationConfig`] instance. Raises exceptions in the presence
  493. of parameterization that can be detected as incorrect from the configuration instance alone.
  494. Note that some parameters not validated here are best validated at generate runtime, as they may depend on
  495. other inputs and/or the model, such as parameters related to the generation length.
  496. Arg:
  497. is_init (`bool`, *optional*, defaults to `False`):
  498. Whether the validation is performed during the initialization of the instance.
  499. """
  500. # Validation of individual attributes
  501. if self.early_stopping not in {True, False, "never"}:
  502. raise ValueError(f"`early_stopping` must be a boolean or 'never', but is {self.early_stopping}.")
  503. if self.max_new_tokens is not None and self.max_new_tokens <= 0:
  504. raise ValueError(f"`max_new_tokens` must be greater than 0, but is {self.max_new_tokens}.")
  505. if self.pad_token_id is not None and self.pad_token_id < 0:
  506. warnings.warn(
  507. f"`pad_token_id` should be positive but got {self.pad_token_id}. This will cause errors when batch "
  508. "generating, if there is padding. Please set `pad_token_id` explicitly as "
  509. "`model.generation_config.pad_token_id=PAD_TOKEN_ID` to avoid errors in generation"
  510. )
  511. # Validation of attribute relations:
  512. fix_location = ""
  513. if is_init:
  514. fix_location = (
  515. " This was detected when initializing the generation config instance, which means the corresponding "
  516. "file may hold incorrect parameterization and should be fixed."
  517. )
  518. # 1. detect sampling-only parameterization when not in sampling mode
  519. if self.do_sample is False:
  520. greedy_wrong_parameter_msg = (
  521. "`do_sample` is set to `False`. However, `{flag_name}` is set to `{flag_value}` -- this flag is only "
  522. "used in sample-based generation modes. You should set `do_sample=True` or unset `{flag_name}`."
  523. + fix_location
  524. )
  525. if self.temperature is not None and self.temperature != 1.0:
  526. warnings.warn(
  527. greedy_wrong_parameter_msg.format(flag_name="temperature", flag_value=self.temperature),
  528. UserWarning,
  529. )
  530. if self.top_p is not None and self.top_p != 1.0:
  531. warnings.warn(
  532. greedy_wrong_parameter_msg.format(flag_name="top_p", flag_value=self.top_p),
  533. UserWarning,
  534. )
  535. if self.min_p is not None:
  536. warnings.warn(
  537. greedy_wrong_parameter_msg.format(flag_name="min_p", flag_value=self.min_p),
  538. UserWarning,
  539. )
  540. if self.typical_p is not None and self.typical_p != 1.0:
  541. warnings.warn(
  542. greedy_wrong_parameter_msg.format(flag_name="typical_p", flag_value=self.typical_p),
  543. UserWarning,
  544. )
  545. if (
  546. self.top_k is not None and self.top_k != 50 and self.penalty_alpha is None
  547. ): # contrastive search uses top_k
  548. warnings.warn(
  549. greedy_wrong_parameter_msg.format(flag_name="top_k", flag_value=self.top_k),
  550. UserWarning,
  551. )
  552. if self.epsilon_cutoff is not None and self.epsilon_cutoff != 0.0:
  553. warnings.warn(
  554. greedy_wrong_parameter_msg.format(flag_name="epsilon_cutoff", flag_value=self.epsilon_cutoff),
  555. UserWarning,
  556. )
  557. if self.eta_cutoff is not None and self.eta_cutoff != 0.0:
  558. warnings.warn(
  559. greedy_wrong_parameter_msg.format(flag_name="eta_cutoff", flag_value=self.eta_cutoff),
  560. UserWarning,
  561. )
  562. # 2. detect beam-only parameterization when not in beam mode
  563. if self.num_beams is None:
  564. warnings.warn("`num_beams` is set to None - defaulting to 1.", UserWarning)
  565. self.num_beams = 1
  566. if self.num_beams == 1:
  567. single_beam_wrong_parameter_msg = (
  568. "`num_beams` is set to 1. However, `{flag_name}` is set to `{flag_value}` -- this flag is only used "
  569. "in beam-based generation modes. You should set `num_beams>1` or unset `{flag_name}`." + fix_location
  570. )
  571. if self.early_stopping is not False:
  572. warnings.warn(
  573. single_beam_wrong_parameter_msg.format(flag_name="early_stopping", flag_value=self.early_stopping),
  574. UserWarning,
  575. )
  576. if self.num_beam_groups is not None and self.num_beam_groups != 1:
  577. warnings.warn(
  578. single_beam_wrong_parameter_msg.format(
  579. flag_name="num_beam_groups", flag_value=self.num_beam_groups
  580. ),
  581. UserWarning,
  582. )
  583. if self.diversity_penalty is not None and self.diversity_penalty != 0.0:
  584. warnings.warn(
  585. single_beam_wrong_parameter_msg.format(
  586. flag_name="diversity_penalty", flag_value=self.diversity_penalty
  587. ),
  588. UserWarning,
  589. )
  590. if self.length_penalty is not None and self.length_penalty != 1.0:
  591. warnings.warn(
  592. single_beam_wrong_parameter_msg.format(flag_name="length_penalty", flag_value=self.length_penalty),
  593. UserWarning,
  594. )
  595. if self.constraints is not None:
  596. warnings.warn(
  597. single_beam_wrong_parameter_msg.format(flag_name="constraints", flag_value=self.constraints),
  598. UserWarning,
  599. )
  600. # 3. detect incorrect paramaterization specific to advanced beam modes
  601. else:
  602. # constrained beam search
  603. if self.constraints is not None or self.force_words_ids is not None:
  604. constrained_wrong_parameter_msg = (
  605. "one of `constraints`, `force_words_ids` is not `None`, triggering constrained beam search. However, "
  606. "`{flag_name}` is set to `{flag_value}`, which is incompatible with this generation mode. Set "
  607. "`constraints` and `force_words_ids` to `None` or unset `{flag_name}` to continue." + fix_location
  608. )
  609. if self.do_sample is True:
  610. raise ValueError(
  611. constrained_wrong_parameter_msg.format(flag_name="do_sample", flag_value=self.do_sample)
  612. )
  613. if self.num_beam_groups is not None and self.num_beam_groups != 1:
  614. raise ValueError(
  615. constrained_wrong_parameter_msg.format(
  616. flag_name="num_beam_groups", flag_value=self.num_beam_groups
  617. )
  618. )
  619. # group beam search
  620. if self.diversity_penalty != 0.0 or self.num_beam_groups != 1:
  621. group_error_prefix = (
  622. "`diversity_penalty` is not 0.0 or `num_beam_groups` is not 1, triggering group beam search. In "
  623. "this generation mode, "
  624. )
  625. if self.do_sample is True:
  626. raise ValueError(group_error_prefix + "`do_sample` must be set to `False`")
  627. if self.num_beams % self.num_beam_groups != 0:
  628. raise ValueError(group_error_prefix + "`num_beams` should be divisible by `num_beam_groups`")
  629. if self.diversity_penalty == 0.0:
  630. raise ValueError(
  631. group_error_prefix
  632. + "`diversity_penalty` should be greater than `0.0`, otherwise your groups will be identical."
  633. )
  634. # DoLa generation
  635. if self.dola_layers is not None and (self.repetition_penalty is None or self.repetition_penalty < 1.2):
  636. warnings.warn(
  637. "`dola_layers` is set to trigger DoLa decoding, but `repetition_penalty` is set to a value of "
  638. f"{self.repetition_penalty}, which could induce unwanted repetition. The recommended value for "
  639. "DoLa decoding is `repetition_penalty>=1.2`.",
  640. UserWarning,
  641. )
  642. # 4. check `num_return_sequences`
  643. if self.num_return_sequences != 1:
  644. if self.num_beams == 1:
  645. if self.do_sample is False:
  646. raise ValueError(
  647. "Greedy methods without beam search do not support `num_return_sequences` different than 1 "
  648. f"(got {self.num_return_sequences})."
  649. )
  650. elif self.num_return_sequences > self.num_beams:
  651. raise ValueError(
  652. f"`num_return_sequences` ({self.num_return_sequences}) has to be smaller or equal to `num_beams` "
  653. f"({self.num_beams})."
  654. )
  655. # 5. check cache-related arguments
  656. if self.cache_implementation is not None and self.cache_implementation not in ALL_CACHE_IMPLEMENTATIONS:
  657. raise ValueError(
  658. f"Invalid `cache_implementation` ({self.cache_implementation}). Choose one of: "
  659. f"{ALL_CACHE_IMPLEMENTATIONS}"
  660. )
  661. if self.cache_config is not None:
  662. cache_class = NEEDS_CACHE_CONFIG.get(self.cache_implementation)
  663. if cache_class is None:
  664. raise ValueError(
  665. "You provided a `cache_config` but the cache implementation you are using "
  666. f"({self.cache_implementation}) does not require any config. Make sure to use the "
  667. "correct cache implementation matching your cache config."
  668. )
  669. if not isinstance(self.cache_config, cache_class):
  670. self.cache_config = cache_class.from_dict(self.cache_config)
  671. self.cache_config.validate()
  672. if self.use_cache is False:
  673. # In this case, all cache-related arguments should be unset. However, since `use_cache=False` is often used
  674. # passed to `generate` directly to hot-fix cache issues, let's raise a warning instead of an error
  675. # (otherwise a user might need to overwrite several parameters).
  676. no_cache_warning = (
  677. "You have set `use_cache` to `False`, but {cache_arg} is set to {cache_arg_value}. {cache_arg} will "
  678. "have no effect."
  679. )
  680. for arg_name in ("cache_implementation", "cache_config", "return_legacy_cache"):
  681. if getattr(self, arg_name) is not None:
  682. logger.warning_once(
  683. no_cache_warning.format(cache_arg=arg_name, cache_arg_value=getattr(self, arg_name)),
  684. UserWarning,
  685. )
  686. # 6. check watermarking arguments
  687. if self.watermarking_config is not None:
  688. if not (
  689. isinstance(self.watermarking_config, WatermarkingConfig)
  690. or isinstance(self.watermarking_config, SynthIDTextWatermarkingConfig)
  691. ):
  692. warnings.warn(
  693. "`watermarking_config` as a dict is deprecated. Please construct `watermarking_config` object with "
  694. "`WatermarkingConfig` or `SynthIDTextWatermarkingConfig` class.",
  695. FutureWarning,
  696. )
  697. self.watermarking_config = WatermarkingConfig.from_dict(self.watermarking_config)
  698. self.watermarking_config.validate()
  699. # 7. other incorrect combinations
  700. if self.return_dict_in_generate is not True:
  701. for extra_output_flag in self.extra_output_flags:
  702. if getattr(self, extra_output_flag) is True:
  703. warnings.warn(
  704. f"`return_dict_in_generate` is NOT set to `True`, but `{extra_output_flag}` is. When "
  705. f"`return_dict_in_generate` is not `True`, `{extra_output_flag}` is ignored.",
  706. UserWarning,
  707. )
  708. # 8. check common issue: passing `generate` arguments inside the generation config
  709. generate_arguments = (
  710. "logits_processor",
  711. "stopping_criteria",
  712. "prefix_allowed_tokens_fn",
  713. "synced_gpus",
  714. "assistant_model",
  715. "streamer",
  716. "negative_prompt_ids",
  717. "negative_prompt_attention_mask",
  718. )
  719. for arg in generate_arguments:
  720. if hasattr(self, arg):
  721. raise ValueError(
  722. f"Argument `{arg}` is not a valid argument of `GenerationConfig`. It should be passed to "
  723. "`generate()` (or a pipeline) directly."
  724. )
  725. def save_pretrained(
  726. self,
  727. save_directory: Union[str, os.PathLike],
  728. config_file_name: Optional[Union[str, os.PathLike]] = None,
  729. push_to_hub: bool = False,
  730. **kwargs,
  731. ):
  732. r"""
  733. Save a generation configuration object to the directory `save_directory`, so that it can be re-loaded using the
  734. [`~GenerationConfig.from_pretrained`] class method.
  735. Args:
  736. save_directory (`str` or `os.PathLike`):
  737. Directory where the configuration JSON file will be saved (will be created if it does not exist).
  738. config_file_name (`str` or `os.PathLike`, *optional*, defaults to `"generation_config.json"`):
  739. Name of the generation configuration JSON file to be saved in `save_directory`.
  740. push_to_hub (`bool`, *optional*, defaults to `False`):
  741. Whether or not to push your model to the Hugging Face model hub after saving it. You can specify the
  742. repository you want to push to with `repo_id` (will default to the name of `save_directory` in your
  743. namespace).
  744. kwargs (`Dict[str, Any]`, *optional*):
  745. Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method.
  746. """
  747. # At save time, validate the instance -- if any warning/exception is thrown, we refuse to save the instance.
  748. # This strictness is enforced to prevent bad configurations from being saved and re-used.
  749. try:
  750. with warnings.catch_warnings(record=True) as caught_warnings:
  751. self.validate()
  752. if len(caught_warnings) > 0:
  753. raise ValueError(str([w.message for w in caught_warnings]))
  754. except ValueError as exc:
  755. raise ValueError(
  756. "The generation config instance is invalid -- `.validate()` throws warnings and/or exceptions. "
  757. "Fix these issues to save the configuration.\n\nThrown during validation:\n" + str(exc)
  758. )
  759. use_auth_token = kwargs.pop("use_auth_token", None)
  760. if use_auth_token is not None:
  761. warnings.warn(
  762. "The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. "
  763. "Please use `token` instead.",
  764. FutureWarning,
  765. )
  766. if kwargs.get("token", None) is not None:
  767. raise ValueError(
  768. "`token` and `use_auth_token` are both specified. Please set only the argument `token`."
  769. )
  770. kwargs["token"] = use_auth_token
  771. config_file_name = config_file_name if config_file_name is not None else GENERATION_CONFIG_NAME
  772. if os.path.isfile(save_directory):
  773. raise AssertionError(f"Provided path ({save_directory}) should be a directory, not a file")
  774. os.makedirs(save_directory, exist_ok=True)
  775. if push_to_hub:
  776. commit_message = kwargs.pop("commit_message", None)
  777. repo_id = kwargs.pop("repo_id", save_directory.split(os.path.sep)[-1])
  778. repo_id = self._create_repo(repo_id, **kwargs)
  779. files_timestamps = self._get_files_timestamps(save_directory)
  780. output_config_file = os.path.join(save_directory, config_file_name)
  781. self.to_json_file(output_config_file, use_diff=True)
  782. logger.info(f"Configuration saved in {output_config_file}")
  783. if push_to_hub:
  784. self._upload_modified_files(
  785. save_directory,
  786. repo_id,
  787. files_timestamps,
  788. commit_message=commit_message,
  789. token=kwargs.get("token"),
  790. )
  791. @classmethod
  792. def from_pretrained(
  793. cls,
  794. pretrained_model_name: Union[str, os.PathLike],
  795. config_file_name: Optional[Union[str, os.PathLike]] = None,
  796. cache_dir: Optional[Union[str, os.PathLike]] = None,
  797. force_download: bool = False,
  798. local_files_only: bool = False,
  799. token: Optional[Union[str, bool]] = None,
  800. revision: str = "main",
  801. **kwargs,
  802. ) -> "GenerationConfig":
  803. r"""
  804. Instantiate a [`GenerationConfig`] from a generation configuration file.
  805. Args:
  806. pretrained_model_name (`str` or `os.PathLike`):
  807. This can be either:
  808. - a string, the *model id* of a pretrained model configuration hosted inside a model repo on
  809. huggingface.co.
  810. - a path to a *directory* containing a configuration file saved using the
  811. [`~GenerationConfig.save_pretrained`] method, e.g., `./my_model_directory/`.
  812. config_file_name (`str` or `os.PathLike`, *optional*, defaults to `"generation_config.json"`):
  813. Name of the generation configuration JSON file to be loaded from `pretrained_model_name`.
  814. cache_dir (`str` or `os.PathLike`, *optional*):
  815. Path to a directory in which a downloaded pretrained model configuration should be cached if the
  816. standard cache should not be used.
  817. force_download (`bool`, *optional*, defaults to `False`):
  818. Whether or not to force to (re-)download the configuration files and override the cached versions if
  819. they exist.
  820. resume_download:
  821. Deprecated and ignored. All downloads are now resumed by default when possible.
  822. Will be removed in v5 of Transformers.
  823. proxies (`Dict[str, str]`, *optional*):
  824. A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
  825. 'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
  826. token (`str` or `bool`, *optional*):
  827. The token to use as HTTP bearer authorization for remote files. If `True`, or not specified, will use
  828. the token generated when running `huggingface-cli login` (stored in `~/.huggingface`).
  829. revision (`str`, *optional*, defaults to `"main"`):
  830. The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
  831. git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
  832. identifier allowed by git.
  833. <Tip>
  834. To test a pull request you made on the Hub, you can pass `revision="refs/pr/<pr_number>"`.
  835. </Tip>
  836. return_unused_kwargs (`bool`, *optional*, defaults to `False`):
  837. If `False`, then this function returns just the final configuration object.
  838. If `True`, then this functions returns a `Tuple(config, unused_kwargs)` where *unused_kwargs* is a
  839. dictionary consisting of the key/value pairs whose keys are not configuration attributes: i.e., the
  840. part of `kwargs` which has not been used to update `config` and is otherwise ignored.
  841. subfolder (`str`, *optional*, defaults to `""`):
  842. In case the relevant files are located inside a subfolder of the model repo on huggingface.co, you can
  843. specify the folder name here.
  844. kwargs (`Dict[str, Any]`, *optional*):
  845. The values in kwargs of any keys which are configuration attributes will be used to override the loaded
  846. values. Behavior concerning key/value pairs whose keys are *not* configuration attributes is controlled
  847. by the `return_unused_kwargs` keyword parameter.
  848. Returns:
  849. [`GenerationConfig`]: The configuration object instantiated from this pretrained model.
  850. Examples:
  851. ```python
  852. >>> from transformers import GenerationConfig
  853. >>> # Download configuration from huggingface.co and cache.
  854. >>> generation_config = GenerationConfig.from_pretrained("openai-community/gpt2")
  855. >>> # E.g. config was saved using *save_pretrained('./test/saved_model/')*
  856. >>> generation_config.save_pretrained("./test/saved_model/")
  857. >>> generation_config = GenerationConfig.from_pretrained("./test/saved_model/")
  858. >>> # You can also specify configuration names to your generation configuration file
  859. >>> generation_config.save_pretrained("./test/saved_model/", config_file_name="my_configuration.json")
  860. >>> generation_config = GenerationConfig.from_pretrained("./test/saved_model/", "my_configuration.json")
  861. >>> # If you'd like to try a minor variation to an existing configuration, you can also pass generation
  862. >>> # arguments to `.from_pretrained()`. Be mindful that typos and unused arguments will be ignored
  863. >>> generation_config, unused_kwargs = GenerationConfig.from_pretrained(
  864. ... "openai-community/gpt2", top_k=1, foo=False, do_sample=True, return_unused_kwargs=True
  865. ... )
  866. >>> generation_config.top_k
  867. 1
  868. >>> unused_kwargs
  869. {'foo': False}
  870. ```"""
  871. config_file_name = config_file_name if config_file_name is not None else GENERATION_CONFIG_NAME
  872. resume_download = kwargs.pop("resume_download", None)
  873. proxies = kwargs.pop("proxies", None)
  874. use_auth_token = kwargs.pop("use_auth_token", None)
  875. subfolder = kwargs.pop("subfolder", "")
  876. from_pipeline = kwargs.pop("_from_pipeline", None)
  877. from_auto_class = kwargs.pop("_from_auto", False)
  878. commit_hash = kwargs.pop("_commit_hash", None)
  879. if use_auth_token is not None:
  880. warnings.warn(
  881. "The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.",
  882. FutureWarning,
  883. )
  884. if token is not None:
  885. raise ValueError(
  886. "`token` and `use_auth_token` are both specified. Please set only the argument `token`."
  887. )
  888. token = use_auth_token
  889. user_agent = {"file_type": "config", "from_auto_class": from_auto_class}
  890. if from_pipeline is not None:
  891. user_agent["using_pipeline"] = from_pipeline
  892. config_path = os.path.join(pretrained_model_name, config_file_name)
  893. config_path = str(config_path)
  894. is_local = os.path.exists(config_path)
  895. if os.path.isfile(os.path.join(subfolder, config_path)):
  896. # Special case when config_path is a local file
  897. resolved_config_file = config_path
  898. is_local = True
  899. elif is_remote_url(config_path):
  900. configuration_file = config_path
  901. resolved_config_file = download_url(config_path)
  902. else:
  903. configuration_file = config_file_name
  904. try:
  905. # Load from local folder or from cache or download from model Hub and cache
  906. resolved_config_file = cached_file(
  907. pretrained_model_name,
  908. configuration_file,
  909. cache_dir=cache_dir,
  910. force_download=force_download,
  911. proxies=proxies,
  912. resume_download=resume_download,
  913. local_files_only=local_files_only,
  914. token=token,
  915. user_agent=user_agent,
  916. revision=revision,
  917. subfolder=subfolder,
  918. _commit_hash=commit_hash,
  919. )
  920. commit_hash = extract_commit_hash(resolved_config_file, commit_hash)
  921. except EnvironmentError:
  922. # Raise any environment error raise by `cached_file`. It will have a helpful error message adapted to
  923. # the original exception.
  924. raise
  925. except Exception:
  926. # For any other exception, we throw a generic error.
  927. raise EnvironmentError(
  928. f"Can't load the configuration of '{pretrained_model_name}'. If you were trying to load it"
  929. " from 'https://huggingface.co/models', make sure you don't have a local directory with the same"
  930. f" name. Otherwise, make sure '{pretrained_model_name}' is the correct path to a directory"
  931. f" containing a {configuration_file} file"
  932. )
  933. try:
  934. # Load config dict
  935. config_dict = cls._dict_from_json_file(resolved_config_file)
  936. config_dict["_commit_hash"] = commit_hash
  937. except (json.JSONDecodeError, UnicodeDecodeError):
  938. raise EnvironmentError(
  939. f"It looks like the config file at '{resolved_config_file}' is not a valid JSON file."
  940. )
  941. if is_local:
  942. logger.info(f"loading configuration file {resolved_config_file}")
  943. else:
  944. logger.info(f"loading configuration file {configuration_file} from cache at {resolved_config_file}")
  945. if kwargs.get("return_unused_kwargs") is True:
  946. config, unused_kwargs = cls.from_dict(config_dict, **kwargs)
  947. config._original_object_hash = hash(config) # Hash to detect whether the instance was modified
  948. return config, unused_kwargs
  949. else:
  950. config = cls.from_dict(config_dict, **kwargs)
  951. config._original_object_hash = hash(config) # Hash to detect whether the instance was modified
  952. return config
  953. @classmethod
  954. def _dict_from_json_file(cls, json_file: Union[str, os.PathLike]):
  955. with open(json_file, "r", encoding="utf-8") as reader:
  956. text = reader.read()
  957. return json.loads(text)
  958. @classmethod
  959. def from_dict(cls, config_dict: Dict[str, Any], **kwargs) -> "GenerationConfig":
  960. """
  961. Instantiates a [`GenerationConfig`] from a Python dictionary of parameters.
  962. Args:
  963. config_dict (`Dict[str, Any]`):
  964. Dictionary that will be used to instantiate the configuration object.
  965. kwargs (`Dict[str, Any]`):
  966. Additional parameters from which to initialize the configuration object.
  967. Returns:
  968. [`GenerationConfig`]: The configuration object instantiated from those parameters.
  969. """
  970. return_unused_kwargs = kwargs.pop("return_unused_kwargs", False)
  971. # Those arguments may be passed along for our internal telemetry.
  972. # We remove them so they don't appear in `return_unused_kwargs`.
  973. kwargs.pop("_from_auto", None)
  974. kwargs.pop("_from_pipeline", None)
  975. # The commit hash might have been updated in the `config_dict`, we don't want the kwargs to erase that update.
  976. if "_commit_hash" in kwargs and "_commit_hash" in config_dict:
  977. kwargs["_commit_hash"] = config_dict["_commit_hash"]
  978. # The line below allows model-specific config to be loaded as well through kwargs, with safety checks.
  979. # See https://github.com/huggingface/transformers/pull/21269
  980. config = cls(**{**config_dict, **kwargs})
  981. unused_kwargs = config.update(**kwargs)
  982. logger.info(f"Generate config {config}")
  983. if return_unused_kwargs:
  984. return config, unused_kwargs
  985. else:
  986. return config
  987. def dict_torch_dtype_to_str(self, d: Dict[str, Any]) -> None:
  988. """
  989. Checks whether the passed dictionary and its nested dicts have a *torch_dtype* key and if it's not None,
  990. converts torch.dtype to a string of just the type. For example, `torch.float32` get converted into *"float32"*
  991. string, which can then be stored in the json format.
  992. """
  993. if d.get("torch_dtype", None) is not None and not isinstance(d["torch_dtype"], str):
  994. d["torch_dtype"] = str(d["torch_dtype"]).split(".")[1]
  995. for value in d.values():
  996. if isinstance(value, dict):
  997. self.dict_torch_dtype_to_str(value)
  998. def to_diff_dict(self) -> Dict[str, Any]:
  999. """
  1000. Removes all attributes from config which correspond to the default config attributes for better readability and
  1001. serializes to a Python dictionary.
  1002. Returns:
  1003. `Dict[str, Any]`: Dictionary of all the attributes that make up this configuration instance,
  1004. """
  1005. config_dict = self.to_dict()
  1006. # get the default config dict
  1007. default_config_dict = GenerationConfig().to_dict()
  1008. serializable_config_dict = {}
  1009. # only serialize values that differ from the default config
  1010. for key, value in config_dict.items():
  1011. if key not in default_config_dict or key == "transformers_version" or value != default_config_dict[key]:
  1012. serializable_config_dict[key] = value
  1013. self.dict_torch_dtype_to_str(serializable_config_dict)
  1014. return serializable_config_dict
  1015. def to_dict(self) -> Dict[str, Any]:
  1016. """
  1017. Serializes this instance to a Python dictionary.
  1018. Returns:
  1019. `Dict[str, Any]`: Dictionary of all the attributes that make up this configuration instance.
  1020. """
  1021. output = copy.deepcopy(self.__dict__)
  1022. # Fields to ignore at serialization time
  1023. if "_commit_hash" in output:
  1024. del output["_commit_hash"]
  1025. if "_original_object_hash" in output:
  1026. del output["_original_object_hash"]
  1027. # Transformers version when serializing this file
  1028. output["transformers_version"] = __version__
  1029. self.dict_torch_dtype_to_str(output)
  1030. return output
  1031. def to_json_string(self, use_diff: bool = True, ignore_metadata: bool = False) -> str:
  1032. """
  1033. Serializes this instance to a JSON string.
  1034. Args:
  1035. use_diff (`bool`, *optional*, defaults to `True`):
  1036. If set to `True`, only the difference between the config instance and the default `GenerationConfig()`
  1037. is serialized to JSON string.
  1038. ignore_metadata (`bool`, *optional*, defaults to `False`):
  1039. Whether to ignore the metadata fields present in the instance
  1040. Returns:
  1041. `str`: String containing all the attributes that make up this configuration instance in JSON format.
  1042. """
  1043. if use_diff is True:
  1044. config_dict = self.to_diff_dict()
  1045. else:
  1046. config_dict = self.to_dict()
  1047. if ignore_metadata:
  1048. for metadata_field in METADATA_FIELDS:
  1049. config_dict.pop(metadata_field, None)
  1050. def convert_keys_to_string(obj):
  1051. if isinstance(obj, dict):
  1052. return {str(key): convert_keys_to_string(value) for key, value in obj.items()}
  1053. elif isinstance(obj, list):
  1054. return [convert_keys_to_string(item) for item in obj]
  1055. else:
  1056. return obj
  1057. def convert_dataclass_to_dict(obj):
  1058. if isinstance(obj, dict):
  1059. return {key: convert_dataclass_to_dict(value) for key, value in obj.items()}
  1060. elif is_dataclass(obj):
  1061. return obj.to_dict()
  1062. else:
  1063. return obj
  1064. config_dict = convert_keys_to_string(config_dict)
  1065. config_dict = convert_dataclass_to_dict(config_dict)
  1066. return json.dumps(config_dict, indent=2, sort_keys=True) + "\n"
  1067. def to_json_file(self, json_file_path: Union[str, os.PathLike], use_diff: bool = True):
  1068. """
  1069. Save this instance to a JSON file.
  1070. Args:
  1071. json_file_path (`str` or `os.PathLike`):
  1072. Path to the JSON file in which this configuration instance's parameters will be saved.
  1073. use_diff (`bool`, *optional*, defaults to `True`):
  1074. If set to `True`, only the difference between the config instance and the default `GenerationConfig()`
  1075. is serialized to JSON file.
  1076. """
  1077. with open(json_file_path, "w", encoding="utf-8") as writer:
  1078. writer.write(self.to_json_string(use_diff=use_diff))
  1079. @classmethod
  1080. def from_model_config(cls, model_config: PretrainedConfig) -> "GenerationConfig":
  1081. """
  1082. Instantiates a [`GenerationConfig`] from a [`PretrainedConfig`]. This function is useful to convert legacy
  1083. [`PretrainedConfig`] objects, which may contain generation parameters, into a stand-alone [`GenerationConfig`].
  1084. Args:
  1085. model_config (`PretrainedConfig`):
  1086. The model config that will be used to instantiate the generation config.
  1087. Returns:
  1088. [`GenerationConfig`]: The configuration object instantiated from those parameters.
  1089. """
  1090. config_dict = model_config.to_dict()
  1091. config_dict.pop("_from_model_config", None)
  1092. # Removes all `None` from the model config dict -- this lets the generation config defaults to take hold
  1093. config_dict = {key: value for key, value in config_dict.items() if value is not None}
  1094. generation_config = cls.from_dict(config_dict, return_unused_kwargs=False, _from_model_config=True)
  1095. # Special case: some models have generation attributes set in the decoder. Use them if still unset in the
  1096. # generation config (which in turn is defined from the outer attributes of model config).
  1097. decoder_config = model_config.get_text_config(decoder=True)
  1098. if decoder_config is not model_config:
  1099. default_generation_config = GenerationConfig()
  1100. decoder_config_dict = decoder_config.to_dict()
  1101. for attr in generation_config.to_dict().keys():
  1102. is_unset = getattr(generation_config, attr) == getattr(default_generation_config, attr)
  1103. if attr in decoder_config_dict and is_unset:
  1104. setattr(generation_config, attr, decoder_config_dict[attr])
  1105. # If any `output_...` flag is set to `True`, we ensure `return_dict_in_generate` is set to `True`.
  1106. if generation_config.return_dict_in_generate is False:
  1107. if any(
  1108. getattr(generation_config, extra_output_flag, False)
  1109. for extra_output_flag in generation_config.extra_output_flags
  1110. ):
  1111. generation_config.return_dict_in_generate = True
  1112. # Hash to detect whether the instance was modified
  1113. generation_config._original_object_hash = hash(generation_config)
  1114. return generation_config
  1115. def update(self, **kwargs):
  1116. """
  1117. Updates attributes of this class instance with attributes from `kwargs` if they match existing attributes,
  1118. returning all the unused kwargs.
  1119. Args:
  1120. kwargs (`Dict[str, Any]`):
  1121. Dictionary of attributes to tentatively update this class.
  1122. Returns:
  1123. `Dict[str, Any]`: Dictionary containing all the key-value pairs that were not used to update the instance.
  1124. """
  1125. to_remove = []
  1126. for key, value in kwargs.items():
  1127. if hasattr(self, key):
  1128. setattr(self, key, value)
  1129. to_remove.append(key)
  1130. # Confirm that the updated instance is still valid
  1131. self.validate()
  1132. # Remove all the attributes that were updated, without modifying the input dict
  1133. unused_kwargs = {key: value for key, value in kwargs.items() if key not in to_remove}
  1134. return unused_kwargs
  1135. @dataclass
  1136. class BaseWatermarkingConfig(ABC):
  1137. """Generic watermarking config"""
  1138. @classmethod
  1139. def from_dict(cls, config_dict, **kwargs):
  1140. """
  1141. Constructs a BaseWatermarkingConfig instance from a dictionary of parameters.
  1142. Args:
  1143. config_dict (Dict[str, Any]): Dictionary containing configuration parameters.
  1144. **kwargs: Additional keyword arguments to override dictionary values.
  1145. Returns:
  1146. BaseWatermarkingConfig: Instance of BaseWatermarkingConfig constructed from the dictionary.
  1147. """
  1148. config = cls(**config_dict)
  1149. to_remove = []
  1150. for key, value in kwargs.items():
  1151. if hasattr(config, key):
  1152. setattr(config, key, value)
  1153. to_remove.append(key)
  1154. for key in to_remove:
  1155. kwargs.pop(key, None)
  1156. return config
  1157. def to_json_file(self, json_file_path: Union[str, os.PathLike]):
  1158. """
  1159. Save this instance to a JSON file.
  1160. Args:
  1161. json_file_path (Union[str, os.PathLike]): Path to the JSON file in which this configuration instance's parameters will be saved.
  1162. """
  1163. with open(json_file_path, "w", encoding="utf-8") as writer:
  1164. config_dict = self.to_dict()
  1165. json_string = json.dumps(config_dict, indent=2, sort_keys=True) + "\n"
  1166. writer.write(json_string)
  1167. def to_dict(self) -> Dict[str, Any]:
  1168. """
  1169. Serializes this instance to a Python dictionary.
  1170. Returns:
  1171. Dict[str, Any]: Dictionary of all the attributes that make up this configuration instance.
  1172. """
  1173. output = copy.deepcopy(self.__dict__)
  1174. return output
  1175. def __iter__(self):
  1176. for attr, value in copy.deepcopy(self.__dict__).items():
  1177. yield attr, value
  1178. def __repr__(self):
  1179. return f"{self.__class__.__name__} {self.to_json_string()}"
  1180. def to_json_string(self):
  1181. """
  1182. Serializes this instance to a JSON formatted string.
  1183. Returns:
  1184. str: JSON formatted string representing the configuration instance.
  1185. """
  1186. return json.dumps(self.__dict__, indent=2) + "\n"
  1187. def update(self, **kwargs):
  1188. """
  1189. Update the configuration attributes with new values.
  1190. Args:
  1191. **kwargs: Keyword arguments representing configuration attributes and their new values.
  1192. """
  1193. for key, value in kwargs.items():
  1194. if hasattr(self, key):
  1195. setattr(self, key, value)
  1196. @abstractmethod
  1197. def validate(self): ...
  1198. @abstractmethod
  1199. def construct_processor(self, vocab_size): ...
  1200. @dataclass
  1201. class WatermarkingConfig(BaseWatermarkingConfig):
  1202. """
  1203. Class that holds arguments for watermark generation and should be passed into `GenerationConfig` during `generate`.
  1204. See [this paper](https://arxiv.org/abs/2306.04634) for more details on the arguments.
  1205. Accepts the following keys:
  1206. - greenlist_ratio (`float`):
  1207. Used for watermarking. The ratio of "green" tokens used to the vocabulary size. Defaults to 0.25.
  1208. - bias (`float`):
  1209. Used with watermarking. The bias added to the selected "green" tokens' logits. Defaults to 2.0.
  1210. - hashing_key (`int`):
  1211. Hashing key used for watermarking. Defaults to 15485863 (the millionth prime).
  1212. - seeding_scheme (`str`):
  1213. Algorithm to use for watermarking. Accepts values:
  1214. - "lefthash" (default): "green" tokens selection depend on the last token (Algorithm 2 from the paper)
  1215. - "selfhash": "green" tokens selection depends on the current token itself (Algorithm 3 from the paper)
  1216. The downside of this scheme is that it considers all possible next tokens and can be slower than "lefthash".
  1217. - context_width(`int`):
  1218. The context length of previous tokens to use in seeding. Higher context length makes watermarking more robust.
  1219. """
  1220. def __init__(
  1221. self,
  1222. greenlist_ratio: Optional[float] = 0.25,
  1223. bias: Optional[float] = 2.0,
  1224. hashing_key: Optional[int] = 15485863,
  1225. seeding_scheme: Optional[str] = "lefthash",
  1226. context_width: Optional[int] = 1,
  1227. ):
  1228. self.greenlist_ratio = greenlist_ratio
  1229. self.bias = bias
  1230. self.hashing_key = hashing_key
  1231. self.seeding_scheme = seeding_scheme
  1232. self.context_width = context_width
  1233. def validate(self):
  1234. watermark_missing_arg_msg = (
  1235. "Some of the keys in `watermarking_config` are defined incorrectly. `{key}` should be {correct_value}` "
  1236. "but found {found_value}"
  1237. )
  1238. if self.seeding_scheme not in ["selfhash", "lefthash"]:
  1239. raise ValueError(
  1240. watermark_missing_arg_msg.format(
  1241. key="seeding_scheme",
  1242. correct_value="[`selfhash`, `lefthash`]",
  1243. found_value=self.seeding_scheme,
  1244. ),
  1245. )
  1246. if not 0.0 <= self.greenlist_ratio <= 1.0:
  1247. raise ValueError(
  1248. watermark_missing_arg_msg.format(
  1249. key="greenlist_ratio",
  1250. correct_value="in range between 0.0 and 1.0",
  1251. found_value=self.seeding_scheme,
  1252. ),
  1253. )
  1254. if not self.context_width >= 1:
  1255. raise ValueError(
  1256. watermark_missing_arg_msg.format(
  1257. key="context_width",
  1258. correct_value="a positive integer",
  1259. found_value=self.context_width,
  1260. ),
  1261. )
  1262. def construct_processor(self, vocab_size: int, device) -> "WatermarkLogitsProcessor":
  1263. return WatermarkLogitsProcessor(
  1264. vocab_size=vocab_size,
  1265. device=device,
  1266. greenlist_ratio=self.greenlist_ratio,
  1267. bias=self.bias,
  1268. hashing_key=self.hashing_key,
  1269. seeding_scheme=self.seeding_scheme,
  1270. context_width=self.context_width,
  1271. )
  1272. @dataclass
  1273. class SynthIDTextWatermarkingConfig(BaseWatermarkingConfig):
  1274. """
  1275. Class that holds arguments for watermark generation and should be passed into `GenerationConfig` during `generate`.
  1276. See [this paper](https://www.nature.com/articles/s41586-024-08025-4) for more details on the arguments.
  1277. Args:
  1278. ngram_len (`int`):
  1279. Ngram length.
  1280. keys (`List[int]`):
  1281. A sequence of watermarking keys, one for each depth.
  1282. context_history_size (`int`, *optional*, defaults to 1024):
  1283. Size of the tensor to keep track of seen contexts.
  1284. sampling_table_seed (`int`, *optional*, defaults to 0):
  1285. Random seed to generate the sampling table.
  1286. sampling_table_size (`int`, *optional*, defaults to 65536):
  1287. Size of the sampling table.
  1288. skip_first_ngram_calls (`bool`, *optional*, defaults to `False`):
  1289. Whether to skip first ngram calls.
  1290. debug_mode (`bool`, optional, *optional*, defaults to `False`):
  1291. Logits are modified to uniform one got before watermarking modification is applied. This is to test the
  1292. implementation.
  1293. Examples:
  1294. ```python
  1295. >>> from transformers import AutoModelForCausalLM, AutoTokenizer, SynthIDTextWatermarkingConfig
  1296. >>> tokenizer = AutoTokenizer.from_pretrained('google/gemma-2-2b-it')
  1297. >>> model = AutoModelForCausalLM.from_pretrained('google/gemma-2-2b-it')
  1298. >>> # SynthID Text configuration
  1299. >>> watermarking_config = SynthIDTextWatermarkingConfig(
  1300. ... keys=[654, 400, 836, 123, 340, 443, 597, 160, 57],
  1301. ... ngram_len=5,
  1302. ... )
  1303. >>> # Generation with watermarking
  1304. >>> tokenized_prompts = tokenizer(["your prompts here"])
  1305. >>> output_sequences = model.generate(
  1306. ... **tokenized_prompts, watermarking_config=watermarking_config, do_sample=True,
  1307. ... )
  1308. >>> watermarked_text = tokenizer.batch_decode(output_sequences)
  1309. ```
  1310. """
  1311. def __init__(
  1312. self,
  1313. ngram_len: int,
  1314. keys: List[int],
  1315. context_history_size: int = 1024,
  1316. sampling_table_seed: int = 0,
  1317. sampling_table_size: int = 2**16,
  1318. skip_first_ngram_calls: bool = False,
  1319. debug_mode: bool = False,
  1320. ):
  1321. self.ngram_len = ngram_len
  1322. self.keys = keys
  1323. self.sampling_table_size = sampling_table_size
  1324. self.sampling_table_seed = sampling_table_seed
  1325. self.context_history_size = context_history_size
  1326. self.skip_first_ngram_calls = skip_first_ngram_calls
  1327. self.debug_mode = debug_mode
  1328. def validate(self):
  1329. watermark_missing_arg_msg = (
  1330. "Some of the keys in `watermarking_config` are defined incorrectly. `{key}` should be {correct_value}` "
  1331. "but found {found_value}"
  1332. )
  1333. if self.sampling_table_size > 2**24:
  1334. raise ValueError(
  1335. watermark_missing_arg_msg.format(
  1336. key="sampling_table_size",
  1337. correct_value="< 2**24",
  1338. found_value=self.sampling_table_size,
  1339. ),
  1340. )
  1341. def construct_processor(self, vocab_size: int, device) -> "WatermarkLogitsProcessor":
  1342. return SynthIDTextWatermarkLogitsProcessor(
  1343. ngram_len=self.ngram_len,
  1344. keys=self.keys,
  1345. sampling_table_size=self.sampling_table_size,
  1346. sampling_table_seed=self.sampling_table_seed,
  1347. context_history_size=self.context_history_size,
  1348. device=device,
  1349. skip_first_ngram_calls=self.skip_first_ngram_calls,
  1350. debug_mode=self.debug_mode,
  1351. )