image_processing_bridgetower.py 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540
  1. # coding=utf-8
  2. # Copyright 2023 The Intel Labs Team Authors, The Microsoft Research Team Authors and HuggingFace Inc. team. All rights reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. """Image processor class for BridgeTower."""
  16. from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
  17. import numpy as np
  18. from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict
  19. from ...image_transforms import PaddingMode, center_crop, pad, resize, to_channel_dimension_format
  20. from ...image_utils import (
  21. OPENAI_CLIP_MEAN,
  22. OPENAI_CLIP_STD,
  23. ChannelDimension,
  24. ImageInput,
  25. PILImageResampling,
  26. get_image_size,
  27. infer_channel_dimension_format,
  28. is_batched,
  29. is_scaled_image,
  30. to_numpy_array,
  31. valid_images,
  32. validate_preprocess_arguments,
  33. )
  34. from ...utils import TensorType, filter_out_non_signature_kwargs, is_vision_available, logging
  35. if is_vision_available():
  36. import PIL
  37. logger = logging.get_logger(__name__)
  38. # Copied from transformers.models.vilt.image_processing_vilt.max_across_indices
  39. def max_across_indices(values: Iterable[Any]) -> List[Any]:
  40. """
  41. Return the maximum value across all indices of an iterable of values.
  42. """
  43. return [max(values_i) for values_i in zip(*values)]
  44. # Copied from transformers.models.vilt.image_processing_vilt.make_pixel_mask
  45. def make_pixel_mask(
  46. image: np.ndarray, output_size: Tuple[int, int], input_data_format: Optional[Union[str, ChannelDimension]] = None
  47. ) -> np.ndarray:
  48. """
  49. Make a pixel mask for the image, where 1 indicates a valid pixel and 0 indicates padding.
  50. Args:
  51. image (`np.ndarray`):
  52. Image to make the pixel mask for.
  53. output_size (`Tuple[int, int]`):
  54. Output size of the mask.
  55. """
  56. input_height, input_width = get_image_size(image, channel_dim=input_data_format)
  57. mask = np.zeros(output_size, dtype=np.int64)
  58. mask[:input_height, :input_width] = 1
  59. return mask
  60. # Copied from transformers.models.vilt.image_processing_vilt.get_max_height_width
  61. def get_max_height_width(
  62. images: List[np.ndarray], input_data_format: Optional[Union[str, ChannelDimension]] = None
  63. ) -> List[int]:
  64. """
  65. Get the maximum height and width across all images in a batch.
  66. """
  67. if input_data_format is None:
  68. input_data_format = infer_channel_dimension_format(images[0])
  69. if input_data_format == ChannelDimension.FIRST:
  70. _, max_height, max_width = max_across_indices([img.shape for img in images])
  71. elif input_data_format == ChannelDimension.LAST:
  72. max_height, max_width, _ = max_across_indices([img.shape for img in images])
  73. else:
  74. raise ValueError(f"Invalid channel dimension format: {input_data_format}")
  75. return (max_height, max_width)
  76. # Copied from transformers.models.vilt.image_processing_vilt.get_resize_output_image_size
  77. def get_resize_output_image_size(
  78. input_image: np.ndarray,
  79. shorter: int = 800,
  80. longer: int = 1333,
  81. size_divisor: int = 32,
  82. input_data_format: Optional[Union[str, ChannelDimension]] = None,
  83. ) -> Tuple[int, int]:
  84. input_height, input_width = get_image_size(input_image, input_data_format)
  85. min_size, max_size = shorter, longer
  86. scale = min_size / min(input_height, input_width)
  87. if input_height < input_width:
  88. new_height = min_size
  89. new_width = scale * input_width
  90. else:
  91. new_height = scale * input_height
  92. new_width = min_size
  93. if max(new_height, new_width) > max_size:
  94. scale = max_size / max(new_height, new_width)
  95. new_height = scale * new_height
  96. new_width = scale * new_width
  97. new_height, new_width = int(new_height + 0.5), int(new_width + 0.5)
  98. new_height = new_height // size_divisor * size_divisor
  99. new_width = new_width // size_divisor * size_divisor
  100. return new_height, new_width
  101. class BridgeTowerImageProcessor(BaseImageProcessor):
  102. r"""
  103. Constructs a BridgeTower image processor.
  104. Args:
  105. do_resize (`bool`, *optional*, defaults to `True`):
  106. Whether to resize the image's (height, width) dimensions to the specified `size`. Can be overridden by the
  107. `do_resize` parameter in the `preprocess` method.
  108. size (`Dict[str, int]` *optional*, defaults to `{'shortest_edge': 288}`):
  109. Resize the shorter side of the input to `size["shortest_edge"]`. The longer side will be limited to under
  110. `int((1333 / 800) * size["shortest_edge"])` while preserving the aspect ratio. Only has an effect if
  111. `do_resize` is set to `True`. Can be overridden by the `size` parameter in the `preprocess` method.
  112. size_divisor (`int`, *optional*, defaults to 32):
  113. The size by which to make sure both the height and width can be divided. Only has an effect if `do_resize`
  114. is set to `True`. Can be overridden by the `size_divisor` parameter in the `preprocess` method.
  115. resample (`PILImageResampling`, *optional*, defaults to `Resampling.BICUBIC`):
  116. Resampling filter to use if resizing the image. Only has an effect if `do_resize` is set to `True`. Can be
  117. overridden by the `resample` parameter in the `preprocess` method.
  118. do_rescale (`bool`, *optional*, defaults to `True`):
  119. Whether to rescale the image by the specified scale `rescale_factor`. Can be overridden by the `do_rescale`
  120. parameter in the `preprocess` method.
  121. rescale_factor (`int` or `float`, *optional*, defaults to `1/255`):
  122. Scale factor to use if rescaling the image. Only has an effect if `do_rescale` is set to `True`. Can be
  123. overridden by the `rescale_factor` parameter in the `preprocess` method.
  124. do_normalize (`bool`, *optional*, defaults to `True`):
  125. Whether to normalize the image. Can be overridden by the `do_normalize` parameter in the `preprocess`
  126. method. Can be overridden by the `do_normalize` parameter in the `preprocess` method.
  127. image_mean (`float` or `List[float]`, *optional*, defaults to `IMAGENET_STANDARD_MEAN`):
  128. Mean to use if normalizing the image. This is a float or list of floats the length of the number of
  129. channels in the image. Can be overridden by the `image_mean` parameter in the `preprocess` method. Can be
  130. overridden by the `image_mean` parameter in the `preprocess` method.
  131. image_std (`float` or `List[float]`, *optional*, defaults to `IMAGENET_STANDARD_STD`):
  132. Standard deviation to use if normalizing the image. This is a float or list of floats the length of the
  133. number of channels in the image. Can be overridden by the `image_std` parameter in the `preprocess` method.
  134. Can be overridden by the `image_std` parameter in the `preprocess` method.
  135. do_center_crop (`bool`, *optional*, defaults to `True`):
  136. Whether to center crop the image. Can be overridden by the `do_center_crop` parameter in the `preprocess`
  137. method.
  138. crop_size (`Dict[str, int]`, *optional*):
  139. Desired output size when applying center-cropping. Only has an effect if `do_center_crop` is set to `True`.
  140. Can be overridden by the `crop_size` parameter in the `preprocess` method. If unset defaults to `size`,
  141. do_pad (`bool`, *optional*, defaults to `True`):
  142. Whether to pad the image to the `(max_height, max_width)` of the images in the batch. Can be overridden by
  143. the `do_pad` parameter in the `preprocess` method.
  144. """
  145. model_input_names = ["pixel_values"]
  146. def __init__(
  147. self,
  148. do_resize: bool = True,
  149. size: Dict[str, int] = None,
  150. size_divisor: int = 32,
  151. resample: PILImageResampling = PILImageResampling.BICUBIC,
  152. do_rescale: bool = True,
  153. rescale_factor: Union[int, float] = 1 / 255,
  154. do_normalize: bool = True,
  155. image_mean: Optional[Union[float, List[float]]] = None,
  156. image_std: Optional[Union[float, List[float]]] = None,
  157. do_center_crop: bool = True,
  158. crop_size: Dict[str, int] = None,
  159. do_pad: bool = True,
  160. **kwargs,
  161. ) -> None:
  162. if "pad_and_return_pixel_mask" in kwargs:
  163. do_pad = kwargs.pop("pad_and_return_pixel_mask")
  164. super().__init__(**kwargs)
  165. size = size if size is not None else {"shortest_edge": 288}
  166. size = get_size_dict(size, default_to_square=False)
  167. self.do_resize = do_resize
  168. self.size = size
  169. self.size_divisor = size_divisor
  170. self.resample = resample
  171. self.do_rescale = do_rescale
  172. self.rescale_factor = rescale_factor
  173. self.do_normalize = do_normalize
  174. self.image_mean = image_mean if image_mean is not None else OPENAI_CLIP_MEAN
  175. self.image_std = image_std if image_std is not None else OPENAI_CLIP_STD
  176. self.do_pad = do_pad
  177. self.do_center_crop = do_center_crop
  178. self.crop_size = crop_size
  179. # Copied from transformers.models.vilt.image_processing_vilt.ViltImageProcessor.resize
  180. def resize(
  181. self,
  182. image: np.ndarray,
  183. size: Dict[str, int],
  184. size_divisor: int = 32,
  185. resample: PILImageResampling = PILImageResampling.BICUBIC,
  186. data_format: Optional[Union[str, ChannelDimension]] = None,
  187. input_data_format: Optional[Union[str, ChannelDimension]] = None,
  188. **kwargs,
  189. ) -> np.ndarray:
  190. """
  191. Resize an image.
  192. Resizes the shorter side of the image to `size["shortest_edge"]` while preserving the aspect ratio. If the
  193. longer side is larger than the max size `(int(`size["shortest_edge"]` * 1333 / 800))`, the longer side is then
  194. resized to the max size while preserving the aspect ratio.
  195. Args:
  196. image (`np.ndarray`):
  197. Image to resize.
  198. size (`Dict[str, int]`):
  199. Controls the size of the output image. Should be of the form `{"shortest_edge": int}`.
  200. size_divisor (`int`, *optional*, defaults to 32):
  201. The image is resized to a size that is a multiple of this value.
  202. resample (`PILImageResampling` filter, *optional*, defaults to `PILImageResampling.BICUBIC`):
  203. Resampling filter to use when resiizing the image.
  204. data_format (`str` or `ChannelDimension`, *optional*):
  205. The channel dimension format of the image. If not provided, it will be the same as the input image.
  206. input_data_format (`str` or `ChannelDimension`, *optional*):
  207. The channel dimension format of the input image. If not provided, it will be inferred.
  208. """
  209. size = get_size_dict(size, default_to_square=False)
  210. if "shortest_edge" not in size:
  211. raise ValueError(f"The `size` dictionary must contain the key `shortest_edge`. Got {size.keys()}")
  212. shorter = size["shortest_edge"]
  213. longer = int(1333 / 800 * shorter)
  214. output_size = get_resize_output_image_size(
  215. image, shorter=shorter, longer=longer, size_divisor=size_divisor, input_data_format=input_data_format
  216. )
  217. return resize(
  218. image,
  219. size=output_size,
  220. resample=resample,
  221. data_format=data_format,
  222. input_data_format=input_data_format,
  223. **kwargs,
  224. )
  225. def center_crop(
  226. self,
  227. image: np.ndarray,
  228. size: Dict[str, int],
  229. data_format: Optional[Union[str, ChannelDimension]] = None,
  230. input_data_format: Optional[Union[str, ChannelDimension]] = None,
  231. **kwargs,
  232. ) -> np.ndarray:
  233. """
  234. Center crop an image to `(size["height"], size["width"])`. If the input size is smaller than `crop_size` along
  235. any edge, the image is padded with 0's and then center cropped.
  236. Args:
  237. image (`np.ndarray`):
  238. Image to center crop.
  239. size (`Dict[str, int]`):
  240. Size of the output image in the form `{"height": h, "width": w}`.
  241. data_format (`str` or `ChannelDimension`, *optional*):
  242. The channel dimension format of the image. If not provided, it will be the same as the input image.
  243. input_data_format (`ChannelDimension` or `str`, *optional*):
  244. The channel dimension format of the input image. If not provided, it will be inferred from the input
  245. image.
  246. """
  247. output_size = size["shortest_edge"]
  248. return center_crop(
  249. image,
  250. size=(output_size, output_size),
  251. data_format=data_format,
  252. input_data_format=input_data_format,
  253. **kwargs,
  254. )
  255. # Copied from transformers.models.vilt.image_processing_vilt.ViltImageProcessor._pad_image
  256. def _pad_image(
  257. self,
  258. image: np.ndarray,
  259. output_size: Tuple[int, int],
  260. constant_values: Union[float, Iterable[float]] = 0,
  261. data_format: Optional[ChannelDimension] = None,
  262. input_data_format: Optional[Union[str, ChannelDimension]] = None,
  263. ) -> np.ndarray:
  264. """
  265. Pad an image with zeros to the given size.
  266. """
  267. input_height, input_width = get_image_size(image, channel_dim=input_data_format)
  268. output_height, output_width = output_size
  269. pad_bottom = output_height - input_height
  270. pad_right = output_width - input_width
  271. padding = ((0, pad_bottom), (0, pad_right))
  272. padded_image = pad(
  273. image,
  274. padding,
  275. mode=PaddingMode.CONSTANT,
  276. constant_values=constant_values,
  277. data_format=data_format,
  278. input_data_format=input_data_format,
  279. )
  280. return padded_image
  281. # Copied from transformers.models.vilt.image_processing_vilt.ViltImageProcessor.pad
  282. def pad(
  283. self,
  284. images: List[np.ndarray],
  285. constant_values: Union[float, Iterable[float]] = 0,
  286. return_pixel_mask: bool = True,
  287. return_tensors: Optional[Union[str, TensorType]] = None,
  288. data_format: Optional[ChannelDimension] = None,
  289. input_data_format: Optional[Union[str, ChannelDimension]] = None,
  290. ) -> BatchFeature:
  291. """
  292. Pads a batch of images to the bottom and right of the image with zeros to the size of largest height and width
  293. in the batch and optionally returns their corresponding pixel mask.
  294. Args:
  295. image (`np.ndarray`):
  296. Image to pad.
  297. constant_values (`float` or `Iterable[float]`, *optional*):
  298. The value to use for the padding if `mode` is `"constant"`.
  299. return_pixel_mask (`bool`, *optional*, defaults to `True`):
  300. Whether to return a pixel mask.
  301. return_tensors (`str` or `TensorType`, *optional*):
  302. The type of tensors to return. Can be one of:
  303. - Unset: Return a list of `np.ndarray`.
  304. - `TensorType.TENSORFLOW` or `'tf'`: Return a batch of type `tf.Tensor`.
  305. - `TensorType.PYTORCH` or `'pt'`: Return a batch of type `torch.Tensor`.
  306. - `TensorType.NUMPY` or `'np'`: Return a batch of type `np.ndarray`.
  307. - `TensorType.JAX` or `'jax'`: Return a batch of type `jax.numpy.ndarray`.
  308. data_format (`str` or `ChannelDimension`, *optional*):
  309. The channel dimension format of the image. If not provided, it will be the same as the input image.
  310. input_data_format (`ChannelDimension` or `str`, *optional*):
  311. The channel dimension format of the input image. If not provided, it will be inferred.
  312. """
  313. pad_size = get_max_height_width(images, input_data_format=input_data_format)
  314. padded_images = [
  315. self._pad_image(
  316. image,
  317. pad_size,
  318. constant_values=constant_values,
  319. data_format=data_format,
  320. input_data_format=input_data_format,
  321. )
  322. for image in images
  323. ]
  324. data = {"pixel_values": padded_images}
  325. if return_pixel_mask:
  326. masks = [
  327. make_pixel_mask(image=image, output_size=pad_size, input_data_format=input_data_format)
  328. for image in images
  329. ]
  330. data["pixel_mask"] = masks
  331. return BatchFeature(data=data, tensor_type=return_tensors)
  332. @filter_out_non_signature_kwargs()
  333. def preprocess(
  334. self,
  335. images: ImageInput,
  336. do_resize: Optional[bool] = None,
  337. size: Optional[Dict[str, int]] = None,
  338. size_divisor: Optional[int] = None,
  339. resample: PILImageResampling = None,
  340. do_rescale: Optional[bool] = None,
  341. rescale_factor: Optional[float] = None,
  342. do_normalize: Optional[bool] = None,
  343. image_mean: Optional[Union[float, List[float]]] = None,
  344. image_std: Optional[Union[float, List[float]]] = None,
  345. do_pad: Optional[bool] = None,
  346. do_center_crop: Optional[bool] = None,
  347. crop_size: Dict[str, int] = None,
  348. return_tensors: Optional[Union[str, TensorType]] = None,
  349. data_format: ChannelDimension = ChannelDimension.FIRST,
  350. input_data_format: Optional[Union[str, ChannelDimension]] = None,
  351. ) -> PIL.Image.Image:
  352. """
  353. Preprocess an image or batch of images.
  354. Args:
  355. images (`ImageInput`):
  356. Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
  357. passing in images with pixel values between 0 and 1, set `do_rescale=False`.
  358. do_resize (`bool`, *optional*, defaults to `self.do_resize`):
  359. Whether to resize the image.
  360. size (`Dict[str, int]`, *optional*, defaults to `self.size`):
  361. Controls the size of the image after `resize`. The shortest edge of the image is resized to
  362. `size["shortest_edge"]` whilst preserving the aspect ratio. If the longest edge of this resized image
  363. is > `int(size["shortest_edge"] * (1333 / 800))`, then the image is resized again to make the longest
  364. edge equal to `int(size["shortest_edge"] * (1333 / 800))`.
  365. size_divisor (`int`, *optional*, defaults to `self.size_divisor`):
  366. The image is resized to a size that is a multiple of this value.
  367. resample (`PILImageResampling`, *optional*, defaults to `self.resample`):
  368. Resampling filter to use if resizing the image. Only has an effect if `do_resize` is set to `True`.
  369. do_rescale (`bool`, *optional*, defaults to `self.do_rescale`):
  370. Whether to rescale the image values between [0 - 1].
  371. rescale_factor (`float`, *optional*, defaults to `self.rescale_factor`):
  372. Rescale factor to rescale the image by if `do_rescale` is set to `True`.
  373. do_normalize (`bool`, *optional*, defaults to `self.do_normalize`):
  374. Whether to normalize the image.
  375. image_mean (`float` or `List[float]`, *optional*, defaults to `self.image_mean`):
  376. Image mean to normalize the image by if `do_normalize` is set to `True`.
  377. image_std (`float` or `List[float]`, *optional*, defaults to `self.image_std`):
  378. Image standard deviation to normalize the image by if `do_normalize` is set to `True`.
  379. do_pad (`bool`, *optional*, defaults to `self.do_pad`):
  380. Whether to pad the image to the (max_height, max_width) in the batch. If `True`, a pixel mask is also
  381. created and returned.
  382. do_center_crop (`bool`, *optional*, defaults to `self.do_center_crop`):
  383. Whether to center crop the image. If the input size is smaller than `crop_size` along any edge, the
  384. image is padded with 0's and then center cropped.
  385. crop_size (`Dict[str, int]`, *optional*, defaults to `self.crop_size`):
  386. Size of the image after center crop. If one edge the image is smaller than `crop_size`, it will be
  387. padded with zeros and then cropped
  388. return_tensors (`str` or `TensorType`, *optional*):
  389. The type of tensors to return. Can be one of:
  390. - Unset: Return a list of `np.ndarray`.
  391. - `TensorType.TENSORFLOW` or `'tf'`: Return a batch of type `tf.Tensor`.
  392. - `TensorType.PYTORCH` or `'pt'`: Return a batch of type `torch.Tensor`.
  393. - `TensorType.NUMPY` or `'np'`: Return a batch of type `np.ndarray`.
  394. - `TensorType.JAX` or `'jax'`: Return a batch of type `jax.numpy.ndarray`.
  395. data_format (`ChannelDimension` or `str`, *optional*, defaults to `ChannelDimension.FIRST`):
  396. The channel dimension format for the output image. Can be one of:
  397. - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
  398. - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
  399. - Unset: Use the channel dimension format of the input image.
  400. input_data_format (`ChannelDimension` or `str`, *optional*):
  401. The channel dimension format for the input image. If unset, the channel dimension format is inferred
  402. from the input image. Can be one of:
  403. - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
  404. - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
  405. - `"none"` or `ChannelDimension.NONE`: image in (height, width) format.
  406. """
  407. do_resize = do_resize if do_resize is not None else self.do_resize
  408. size_divisor = size_divisor if size_divisor is not None else self.size_divisor
  409. resample = resample if resample is not None else self.resample
  410. do_rescale = do_rescale if do_rescale is not None else self.do_rescale
  411. rescale_factor = rescale_factor if rescale_factor is not None else self.rescale_factor
  412. do_normalize = do_normalize if do_normalize is not None else self.do_normalize
  413. image_mean = image_mean if image_mean is not None else self.image_mean
  414. image_std = image_std if image_std is not None else self.image_std
  415. do_pad = do_pad if do_pad is not None else self.do_pad
  416. do_center_crop if do_center_crop is not None else self.do_center_crop
  417. # For backwards compatibility. Initial version of this processor was cropping to the "size" argument, which
  418. # it should default to if crop_size is undefined.
  419. crop_size = (
  420. crop_size if crop_size is not None else (self.crop_size if self.crop_size is not None else self.size)
  421. )
  422. size = size if size is not None else self.size
  423. size = get_size_dict(size, default_to_square=False)
  424. if not is_batched(images):
  425. images = [images]
  426. if not valid_images(images):
  427. raise ValueError(
  428. "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
  429. "torch.Tensor, tf.Tensor or jax.ndarray."
  430. )
  431. # Here, crop_size is used only if it is set, else size will be used.
  432. validate_preprocess_arguments(
  433. do_rescale=do_rescale,
  434. rescale_factor=rescale_factor,
  435. do_normalize=do_normalize,
  436. image_mean=image_mean,
  437. image_std=image_std,
  438. do_pad=do_pad,
  439. size_divisibility=size_divisor,
  440. do_center_crop=do_center_crop,
  441. crop_size=crop_size,
  442. do_resize=do_resize,
  443. size=size,
  444. resample=resample,
  445. )
  446. # All transformations expect numpy arrays.
  447. images = [to_numpy_array(image) for image in images]
  448. if is_scaled_image(images[0]) and do_rescale:
  449. logger.warning_once(
  450. "It looks like you are trying to rescale already rescaled images. If the input"
  451. " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
  452. )
  453. if do_resize:
  454. images = [
  455. self.resize(
  456. image=image,
  457. size=size,
  458. size_divisor=size_divisor,
  459. resample=resample,
  460. input_data_format=input_data_format,
  461. )
  462. for image in images
  463. ]
  464. if do_center_crop:
  465. images = [
  466. self.center_crop(image=image, size=crop_size, input_data_format=input_data_format) for image in images
  467. ]
  468. if do_rescale:
  469. images = [
  470. self.rescale(image=image, scale=rescale_factor, input_data_format=input_data_format)
  471. for image in images
  472. ]
  473. if do_normalize:
  474. images = [
  475. self.normalize(image=image, mean=image_mean, std=image_std, input_data_format=input_data_format)
  476. for image in images
  477. ]
  478. images = [
  479. to_channel_dimension_format(image, data_format, input_channel_dim=input_data_format) for image in images
  480. ]
  481. if do_pad:
  482. encoded_outputs = self.pad(
  483. images, return_pixel_mask=True, return_tensors=return_tensors, input_data_format=data_format
  484. )
  485. else:
  486. encoded_outputs = BatchFeature(data={"pixel_values": images}, tensor_type=return_tensors)
  487. return encoded_outputs