benchmark.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270
  1. # coding=utf-8
  2. # Copyright 2018 The HuggingFace Inc. team.
  3. # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
  4. #
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. """
  17. Benchmarking the library on inference and training in PyTorch.
  18. """
  19. import timeit
  20. from typing import Callable, Optional
  21. from ..configuration_utils import PretrainedConfig
  22. from ..models.auto.modeling_auto import MODEL_MAPPING, MODEL_WITH_LM_HEAD_MAPPING
  23. from ..utils import is_py3nvml_available, is_torch_available, logging
  24. from .benchmark_utils import (
  25. Benchmark,
  26. Memory,
  27. MemorySummary,
  28. measure_peak_memory_cpu,
  29. start_memory_tracing,
  30. stop_memory_tracing,
  31. )
  32. if is_torch_available():
  33. import torch
  34. from .benchmark_args import PyTorchBenchmarkArguments
  35. if is_py3nvml_available():
  36. import py3nvml.py3nvml as nvml
  37. logger = logging.get_logger(__name__)
  38. class PyTorchBenchmark(Benchmark):
  39. args: PyTorchBenchmarkArguments
  40. configs: PretrainedConfig
  41. framework: str = "PyTorch"
  42. @property
  43. def framework_version(self):
  44. return torch.__version__
  45. def _inference_speed(self, model_name: str, batch_size: int, sequence_length: int) -> float:
  46. _inference = self._prepare_inference_func(model_name, batch_size, sequence_length)
  47. return self._measure_speed(_inference)
  48. def _inference_memory(
  49. self, model_name: str, batch_size: int, sequence_length: int
  50. ) -> [Memory, Optional[MemorySummary]]:
  51. _inference = self._prepare_inference_func(model_name, batch_size, sequence_length)
  52. return self._measure_memory(_inference)
  53. def _train_speed(self, model_name: str, batch_size: int, sequence_length: int) -> float:
  54. _train = self._prepare_train_func(model_name, batch_size, sequence_length)
  55. return self._measure_speed(_train)
  56. def _train_memory(
  57. self, model_name: str, batch_size: int, sequence_length: int
  58. ) -> [Memory, Optional[MemorySummary]]:
  59. _train = self._prepare_train_func(model_name, batch_size, sequence_length)
  60. return self._measure_memory(_train)
  61. def _prepare_inference_func(self, model_name: str, batch_size: int, sequence_length: int) -> Callable[[], None]:
  62. config = self.config_dict[model_name]
  63. if self.args.torchscript:
  64. config.torchscript = True
  65. has_model_class_in_config = (
  66. hasattr(config, "architectures")
  67. and isinstance(config.architectures, list)
  68. and len(config.architectures) > 0
  69. )
  70. if not self.args.only_pretrain_model and has_model_class_in_config:
  71. try:
  72. model_class = config.architectures[0]
  73. transformers_module = __import__("transformers", fromlist=[model_class])
  74. model_cls = getattr(transformers_module, model_class)
  75. model = model_cls(config)
  76. except ImportError:
  77. raise ImportError(
  78. f"{model_class} does not exist. If you just want to test the pretrained model, you might want to"
  79. " set `--only_pretrain_model` or `args.only_pretrain_model=True`."
  80. )
  81. else:
  82. model = MODEL_MAPPING[config.__class__](config)
  83. model.eval()
  84. model.to(self.args.device)
  85. # encoder-decoder has vocab size saved differently
  86. vocab_size = config.vocab_size if hasattr(config, "vocab_size") else config.encoder.vocab_size
  87. input_ids = torch.randint(vocab_size, (batch_size, sequence_length), dtype=torch.long, device=self.args.device)
  88. if self.args.fp16:
  89. logger.info("Running training in Mixed Precision...")
  90. if not self.args.is_gpu:
  91. raise ValueError("Mixed precision is possible only for GPU.")
  92. # amp seems to have memory leaks so that memory usage
  93. # is measured using .half() for now https://github.com/NVIDIA/apex/issues/439
  94. model.half()
  95. if self.args.torchscript:
  96. with torch.no_grad():
  97. inference_model = torch.jit.trace(model, input_ids)
  98. else:
  99. inference_model = model
  100. def encoder_decoder_forward():
  101. with torch.no_grad():
  102. outputs = inference_model(input_ids, decoder_input_ids=input_ids)
  103. return outputs
  104. def encoder_forward():
  105. with torch.no_grad():
  106. outputs = inference_model(input_ids)
  107. return outputs
  108. _forward = encoder_decoder_forward if config.is_encoder_decoder else encoder_forward
  109. return _forward
  110. def _prepare_train_func(self, model_name: str, batch_size: int, sequence_length: int) -> Callable[[], None]:
  111. config = self.config_dict[model_name]
  112. has_model_class_in_config = (
  113. hasattr(config, "architectures")
  114. and isinstance(config.architectures, list)
  115. and len(config.architectures) > 0
  116. )
  117. if not self.args.only_pretrain_model and has_model_class_in_config:
  118. try:
  119. model_class = config.architectures[0]
  120. transformers_module = __import__("transformers", fromlist=[model_class])
  121. model_cls = getattr(transformers_module, model_class)
  122. model = model_cls(config)
  123. except ImportError:
  124. raise ImportError(
  125. f"{model_class} does not exist. If you just want to test the pretrained model, you might want to"
  126. " set `--only_pretrain_model` or `args.only_pretrain_model=True`."
  127. )
  128. else:
  129. model = MODEL_WITH_LM_HEAD_MAPPING[config.__class__](config)
  130. if self.args.torchscript:
  131. raise NotImplementedError("Training for torchscript is currently not implemented")
  132. else:
  133. train_model = model
  134. model.train()
  135. model.to(self.args.device)
  136. # encoder-decoder has vocab size saved differently
  137. vocab_size = config.vocab_size if hasattr(config, "vocab_size") else config.encoder.vocab_size
  138. input_ids = torch.randint(vocab_size, (batch_size, sequence_length), dtype=torch.long, device=self.args.device)
  139. if self.args.fp16:
  140. logger.info("Running training in Mixed Precision...")
  141. if not self.args.is_gpu:
  142. raise ValueError("Mixed precision is possible only for GPU.")
  143. # amp seems to have memory leaks so that memory usage
  144. # is measured using .half() for now https://github.com/NVIDIA/apex/issues/439
  145. model.half()
  146. def compute_loss_and_backprob_encoder():
  147. loss = train_model(input_ids, labels=input_ids)[0]
  148. loss.backward()
  149. return loss
  150. def compute_loss_and_backprob_encoder_decoder():
  151. loss = train_model(input_ids, decoder_input_ids=input_ids, labels=input_ids)[0]
  152. loss.backward()
  153. return loss
  154. _train = (
  155. compute_loss_and_backprob_encoder_decoder
  156. if config.is_encoder_decoder
  157. else compute_loss_and_backprob_encoder
  158. )
  159. return _train
  160. def _measure_speed(self, func) -> float:
  161. try:
  162. if self.args.is_tpu or self.args.torchscript:
  163. # run additional 10 times to stabilize compilation for tpu and torchscript
  164. logger.info("Do inference on TPU or torchscript. Running model 5 times to stabilize compilation")
  165. timeit.repeat(
  166. func,
  167. repeat=1,
  168. number=5,
  169. )
  170. # as written in https://docs.python.org/2/library/timeit.html#timeit.Timer.repeat, min should be taken rather than the average
  171. runtimes = timeit.repeat(
  172. func,
  173. repeat=self.args.repeat,
  174. number=10,
  175. )
  176. if self.args.is_tpu and self.args.torch_xla_tpu_print_metrics:
  177. import torch_xla.debug.metrics as met
  178. self.print_fn(met.metrics_report())
  179. return min(runtimes) / 10.0
  180. except RuntimeError as e:
  181. self.print_fn(f"Doesn't fit on GPU. {e}")
  182. return "N/A"
  183. def _measure_memory(self, func: Callable[[], None]) -> [Memory, MemorySummary]:
  184. try:
  185. if self.args.trace_memory_line_by_line:
  186. trace = start_memory_tracing("transformers")
  187. if self.args.is_tpu:
  188. # tpu
  189. raise NotImplementedError(
  190. "Memory Benchmarking is currently not implemented for TPU. Please disable memory benchmarking with"
  191. " `--no-memory` or `args.memory=False`"
  192. )
  193. elif self.args.is_gpu:
  194. if not is_py3nvml_available():
  195. logger.warning(
  196. "py3nvml not installed, we won't log GPU memory usage. "
  197. "Install py3nvml (pip install py3nvml) to log information about GPU."
  198. )
  199. memory = "N/A"
  200. else:
  201. logger.info(
  202. "Measuring total GPU usage on GPU device. Make sure to not have additional processes running"
  203. " on the same GPU."
  204. )
  205. # init nvml
  206. nvml.nvmlInit()
  207. func()
  208. handle = nvml.nvmlDeviceGetHandleByIndex(self.args.device_idx)
  209. meminfo = nvml.nvmlDeviceGetMemoryInfo(handle)
  210. max_bytes_in_use = meminfo.used
  211. memory = Memory(max_bytes_in_use)
  212. # shutdown nvml
  213. nvml.nvmlShutdown()
  214. else:
  215. # cpu
  216. memory_bytes = measure_peak_memory_cpu(func)
  217. memory = Memory(memory_bytes) if isinstance(memory_bytes, int) else memory_bytes
  218. if self.args.trace_memory_line_by_line:
  219. summary = stop_memory_tracing(trace)
  220. else:
  221. summary = None
  222. return memory, summary
  223. except RuntimeError as e:
  224. self.print_fn(f"Doesn't fit on GPU. {e}")
  225. return "N/A", None