PdfImagePlugin.py 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304
  1. #
  2. # The Python Imaging Library.
  3. # $Id$
  4. #
  5. # PDF (Acrobat) file handling
  6. #
  7. # History:
  8. # 1996-07-16 fl Created
  9. # 1997-01-18 fl Fixed header
  10. # 2004-02-21 fl Fixes for 1/L/CMYK images, etc.
  11. # 2004-02-24 fl Fixes for 1 and P images.
  12. #
  13. # Copyright (c) 1997-2004 by Secret Labs AB. All rights reserved.
  14. # Copyright (c) 1996-1997 by Fredrik Lundh.
  15. #
  16. # See the README file for information on usage and redistribution.
  17. #
  18. ##
  19. # Image plugin for PDF images (output only).
  20. ##
  21. from __future__ import annotations
  22. import io
  23. import math
  24. import os
  25. import time
  26. from typing import IO
  27. from . import Image, ImageFile, ImageSequence, PdfParser, __version__, features
  28. #
  29. # --------------------------------------------------------------------
  30. # object ids:
  31. # 1. catalogue
  32. # 2. pages
  33. # 3. image
  34. # 4. page
  35. # 5. page contents
  36. def _save_all(im: Image.Image, fp: IO[bytes], filename: str | bytes) -> None:
  37. _save(im, fp, filename, save_all=True)
  38. ##
  39. # (Internal) Image save plugin for the PDF format.
  40. def _write_image(im, filename, existing_pdf, image_refs):
  41. # FIXME: Should replace ASCIIHexDecode with RunLengthDecode
  42. # (packbits) or LZWDecode (tiff/lzw compression). Note that
  43. # PDF 1.2 also supports Flatedecode (zip compression).
  44. params = None
  45. decode = None
  46. #
  47. # Get image characteristics
  48. width, height = im.size
  49. dict_obj = {"BitsPerComponent": 8}
  50. if im.mode == "1":
  51. if features.check("libtiff"):
  52. filter = "CCITTFaxDecode"
  53. dict_obj["BitsPerComponent"] = 1
  54. params = PdfParser.PdfArray(
  55. [
  56. PdfParser.PdfDict(
  57. {
  58. "K": -1,
  59. "BlackIs1": True,
  60. "Columns": width,
  61. "Rows": height,
  62. }
  63. )
  64. ]
  65. )
  66. else:
  67. filter = "DCTDecode"
  68. dict_obj["ColorSpace"] = PdfParser.PdfName("DeviceGray")
  69. procset = "ImageB" # grayscale
  70. elif im.mode == "L":
  71. filter = "DCTDecode"
  72. # params = f"<< /Predictor 15 /Columns {width-2} >>"
  73. dict_obj["ColorSpace"] = PdfParser.PdfName("DeviceGray")
  74. procset = "ImageB" # grayscale
  75. elif im.mode == "LA":
  76. filter = "JPXDecode"
  77. # params = f"<< /Predictor 15 /Columns {width-2} >>"
  78. procset = "ImageB" # grayscale
  79. dict_obj["SMaskInData"] = 1
  80. elif im.mode == "P":
  81. filter = "ASCIIHexDecode"
  82. palette = im.getpalette()
  83. dict_obj["ColorSpace"] = [
  84. PdfParser.PdfName("Indexed"),
  85. PdfParser.PdfName("DeviceRGB"),
  86. len(palette) // 3 - 1,
  87. PdfParser.PdfBinary(palette),
  88. ]
  89. procset = "ImageI" # indexed color
  90. if "transparency" in im.info:
  91. smask = im.convert("LA").getchannel("A")
  92. smask.encoderinfo = {}
  93. image_ref = _write_image(smask, filename, existing_pdf, image_refs)[0]
  94. dict_obj["SMask"] = image_ref
  95. elif im.mode == "RGB":
  96. filter = "DCTDecode"
  97. dict_obj["ColorSpace"] = PdfParser.PdfName("DeviceRGB")
  98. procset = "ImageC" # color images
  99. elif im.mode == "RGBA":
  100. filter = "JPXDecode"
  101. procset = "ImageC" # color images
  102. dict_obj["SMaskInData"] = 1
  103. elif im.mode == "CMYK":
  104. filter = "DCTDecode"
  105. dict_obj["ColorSpace"] = PdfParser.PdfName("DeviceCMYK")
  106. procset = "ImageC" # color images
  107. decode = [1, 0, 1, 0, 1, 0, 1, 0]
  108. else:
  109. msg = f"cannot save mode {im.mode}"
  110. raise ValueError(msg)
  111. #
  112. # image
  113. op = io.BytesIO()
  114. if filter == "ASCIIHexDecode":
  115. ImageFile._save(im, op, [("hex", (0, 0) + im.size, 0, im.mode)])
  116. elif filter == "CCITTFaxDecode":
  117. im.save(
  118. op,
  119. "TIFF",
  120. compression="group4",
  121. # use a single strip
  122. strip_size=math.ceil(width / 8) * height,
  123. )
  124. elif filter == "DCTDecode":
  125. Image.SAVE["JPEG"](im, op, filename)
  126. elif filter == "JPXDecode":
  127. del dict_obj["BitsPerComponent"]
  128. Image.SAVE["JPEG2000"](im, op, filename)
  129. else:
  130. msg = f"unsupported PDF filter ({filter})"
  131. raise ValueError(msg)
  132. stream = op.getvalue()
  133. if filter == "CCITTFaxDecode":
  134. stream = stream[8:]
  135. filter = PdfParser.PdfArray([PdfParser.PdfName(filter)])
  136. else:
  137. filter = PdfParser.PdfName(filter)
  138. image_ref = image_refs.pop(0)
  139. existing_pdf.write_obj(
  140. image_ref,
  141. stream=stream,
  142. Type=PdfParser.PdfName("XObject"),
  143. Subtype=PdfParser.PdfName("Image"),
  144. Width=width, # * 72.0 / x_resolution,
  145. Height=height, # * 72.0 / y_resolution,
  146. Filter=filter,
  147. Decode=decode,
  148. DecodeParms=params,
  149. **dict_obj,
  150. )
  151. return image_ref, procset
  152. def _save(im, fp, filename, save_all=False):
  153. is_appending = im.encoderinfo.get("append", False)
  154. if is_appending:
  155. existing_pdf = PdfParser.PdfParser(f=fp, filename=filename, mode="r+b")
  156. else:
  157. existing_pdf = PdfParser.PdfParser(f=fp, filename=filename, mode="w+b")
  158. dpi = im.encoderinfo.get("dpi")
  159. if dpi:
  160. x_resolution = dpi[0]
  161. y_resolution = dpi[1]
  162. else:
  163. x_resolution = y_resolution = im.encoderinfo.get("resolution", 72.0)
  164. info = {
  165. "title": (
  166. None if is_appending else os.path.splitext(os.path.basename(filename))[0]
  167. ),
  168. "author": None,
  169. "subject": None,
  170. "keywords": None,
  171. "creator": None,
  172. "producer": None,
  173. "creationDate": None if is_appending else time.gmtime(),
  174. "modDate": None if is_appending else time.gmtime(),
  175. }
  176. for k, default in info.items():
  177. v = im.encoderinfo.get(k) if k in im.encoderinfo else default
  178. if v:
  179. existing_pdf.info[k[0].upper() + k[1:]] = v
  180. #
  181. # make sure image data is available
  182. im.load()
  183. existing_pdf.start_writing()
  184. existing_pdf.write_header()
  185. existing_pdf.write_comment(f"created by Pillow {__version__} PDF driver")
  186. #
  187. # pages
  188. ims = [im]
  189. if save_all:
  190. append_images = im.encoderinfo.get("append_images", [])
  191. for append_im in append_images:
  192. append_im.encoderinfo = im.encoderinfo.copy()
  193. ims.append(append_im)
  194. number_of_pages = 0
  195. image_refs = []
  196. page_refs = []
  197. contents_refs = []
  198. for im in ims:
  199. im_number_of_pages = 1
  200. if save_all:
  201. try:
  202. im_number_of_pages = im.n_frames
  203. except AttributeError:
  204. # Image format does not have n_frames.
  205. # It is a single frame image
  206. pass
  207. number_of_pages += im_number_of_pages
  208. for i in range(im_number_of_pages):
  209. image_refs.append(existing_pdf.next_object_id(0))
  210. if im.mode == "P" and "transparency" in im.info:
  211. image_refs.append(existing_pdf.next_object_id(0))
  212. page_refs.append(existing_pdf.next_object_id(0))
  213. contents_refs.append(existing_pdf.next_object_id(0))
  214. existing_pdf.pages.append(page_refs[-1])
  215. #
  216. # catalog and list of pages
  217. existing_pdf.write_catalog()
  218. page_number = 0
  219. for im_sequence in ims:
  220. im_pages = ImageSequence.Iterator(im_sequence) if save_all else [im_sequence]
  221. for im in im_pages:
  222. image_ref, procset = _write_image(im, filename, existing_pdf, image_refs)
  223. #
  224. # page
  225. existing_pdf.write_page(
  226. page_refs[page_number],
  227. Resources=PdfParser.PdfDict(
  228. ProcSet=[PdfParser.PdfName("PDF"), PdfParser.PdfName(procset)],
  229. XObject=PdfParser.PdfDict(image=image_ref),
  230. ),
  231. MediaBox=[
  232. 0,
  233. 0,
  234. im.width * 72.0 / x_resolution,
  235. im.height * 72.0 / y_resolution,
  236. ],
  237. Contents=contents_refs[page_number],
  238. )
  239. #
  240. # page contents
  241. page_contents = b"q %f 0 0 %f 0 0 cm /image Do Q\n" % (
  242. im.width * 72.0 / x_resolution,
  243. im.height * 72.0 / y_resolution,
  244. )
  245. existing_pdf.write_obj(contents_refs[page_number], stream=page_contents)
  246. page_number += 1
  247. #
  248. # trailer
  249. existing_pdf.write_xref_and_trailer()
  250. if hasattr(fp, "flush"):
  251. fp.flush()
  252. existing_pdf.close()
  253. #
  254. # --------------------------------------------------------------------
  255. Image.register_save("PDF", _save)
  256. Image.register_save_all("PDF", _save_all)
  257. Image.register_extension("PDF", ".pdf")
  258. Image.register_mime("PDF", "application/pdf")