2012-10-02 20:59:02 +04:00
|
|
|
|
"""
|
|
|
|
|
weasyprint.document
|
|
|
|
|
-------------------
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
2019-05-24 00:55:56 +03:00
|
|
|
|
import collections
|
2017-03-25 02:33:36 +03:00
|
|
|
|
import functools
|
2020-04-19 17:49:37 +03:00
|
|
|
|
import hashlib
|
2012-10-02 20:59:02 +04:00
|
|
|
|
import io
|
|
|
|
|
import math
|
|
|
|
|
import shutil
|
2020-04-19 17:49:37 +03:00
|
|
|
|
import zlib
|
|
|
|
|
from os.path import basename
|
|
|
|
|
from urllib.parse import unquote, urlsplit
|
2012-10-02 20:59:02 +04:00
|
|
|
|
|
2020-04-18 01:19:35 +03:00
|
|
|
|
import pydyf
|
2020-05-29 20:43:56 +03:00
|
|
|
|
from fontTools import subset
|
2020-05-30 01:30:13 +03:00
|
|
|
|
from fontTools.ttLib import TTFont, TTLibError
|
2012-10-02 20:59:02 +04:00
|
|
|
|
|
2020-05-13 00:54:42 +03:00
|
|
|
|
from . import CSS, Attachment, __version__
|
2012-10-02 20:59:02 +04:00
|
|
|
|
from .css import get_all_computed_styles
|
2019-12-24 17:56:24 +03:00
|
|
|
|
from .css.counters import CounterStyle
|
2018-03-28 01:34:34 +03:00
|
|
|
|
from .css.targets import TargetCollector
|
2017-03-25 02:33:36 +03:00
|
|
|
|
from .draw import draw_page, stacked
|
2016-10-27 18:36:24 +03:00
|
|
|
|
from .fonts import FontConfiguration
|
2012-10-04 13:35:25 +04:00
|
|
|
|
from .formatting_structure import boxes
|
2012-10-02 20:59:02 +04:00
|
|
|
|
from .formatting_structure.build import build_formatting_structure
|
2020-05-30 02:11:30 +03:00
|
|
|
|
from .html import W3C_DATE_RE, get_html_metadata
|
2018-01-07 03:46:39 +03:00
|
|
|
|
from .images import get_image_from_uri as original_get_image_from_uri
|
2020-10-23 13:36:03 +03:00
|
|
|
|
from .layout import LayoutContext, layout_document
|
2019-06-02 19:06:25 +03:00
|
|
|
|
from .layout.percentages import percentage
|
2019-01-04 01:02:44 +03:00
|
|
|
|
from .logger import LOGGER, PROGRESS_LOGGER
|
2020-05-08 01:11:19 +03:00
|
|
|
|
from .text import ffi, pango
|
2020-04-19 17:49:37 +03:00
|
|
|
|
from .urls import URLFetchingError
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _w3c_date_to_pdf(string, attr_name):
|
|
|
|
|
"""Tranform W3C date to PDF format."""
|
|
|
|
|
if string is None:
|
|
|
|
|
return None
|
|
|
|
|
match = W3C_DATE_RE.match(string)
|
|
|
|
|
if match is None:
|
2020-05-30 16:48:24 +03:00
|
|
|
|
LOGGER.warning(f'Invalid {attr_name} date: {string!r}')
|
2020-04-19 17:49:37 +03:00
|
|
|
|
return None
|
|
|
|
|
groups = match.groupdict()
|
|
|
|
|
pdf_date = ''
|
2020-05-16 16:23:20 +03:00
|
|
|
|
found = groups['hour']
|
2020-04-19 17:49:37 +03:00
|
|
|
|
for key in ('second', 'minute', 'hour', 'day', 'month', 'year'):
|
|
|
|
|
if groups[key]:
|
|
|
|
|
found = True
|
|
|
|
|
pdf_date = groups[key] + pdf_date
|
|
|
|
|
elif found:
|
2020-05-30 02:11:30 +03:00
|
|
|
|
pdf_date = f'{(key in ("day", "month")):02d}{pdf_date}'
|
2020-04-19 17:49:37 +03:00
|
|
|
|
if groups['hour']:
|
|
|
|
|
assert groups['minute']
|
|
|
|
|
if groups['tz_hour']:
|
|
|
|
|
assert groups['tz_hour'].startswith(('+', '-'))
|
|
|
|
|
assert groups['tz_minute']
|
2020-05-30 02:11:30 +03:00
|
|
|
|
tz_hour = int(groups['tz_hour'])
|
|
|
|
|
tz_minute = int(groups['tz_minute'])
|
|
|
|
|
pdf_date += f"{tz_hour:+03d}'{tz_minute:02d}"
|
2020-04-19 17:49:37 +03:00
|
|
|
|
else:
|
|
|
|
|
pdf_date += 'Z'
|
|
|
|
|
return pdf_date
|
2012-10-02 20:59:02 +04:00
|
|
|
|
|
2020-04-18 01:19:35 +03:00
|
|
|
|
|
2020-05-08 01:31:50 +03:00
|
|
|
|
class Font:
|
2020-06-01 02:12:32 +03:00
|
|
|
|
def __init__(self, file_content, pango_font):
|
2020-05-07 20:33:54 +03:00
|
|
|
|
pango_metrics = pango.pango_font_get_metrics(pango_font, ffi.NULL)
|
2020-05-09 23:23:33 +03:00
|
|
|
|
font_description = pango.pango_font_describe(pango_font)
|
2020-05-08 00:27:43 +03:00
|
|
|
|
font_family = ffi.string(pango.pango_font_description_get_family(
|
2020-05-08 02:30:07 +03:00
|
|
|
|
font_description))
|
2020-05-10 18:51:37 +03:00
|
|
|
|
font_size = pango.pango_font_description_get_size(font_description)
|
2020-06-01 02:12:32 +03:00
|
|
|
|
sha = hashlib.sha256()
|
|
|
|
|
sha.update(file_content)
|
2020-05-07 20:33:54 +03:00
|
|
|
|
|
2020-05-08 02:50:41 +03:00
|
|
|
|
self.file_content = file_content
|
2020-06-01 02:12:32 +03:00
|
|
|
|
self.hash = ''.join(
|
|
|
|
|
chr(65 + letter % 26) for letter in sha.digest()[:6])
|
2020-05-31 02:20:38 +03:00
|
|
|
|
self.name = (
|
|
|
|
|
b'/' + self.hash.encode('ascii') + b'+' +
|
|
|
|
|
font_family.replace(b' ', b''))
|
2020-05-08 02:50:41 +03:00
|
|
|
|
self.family = font_family
|
2020-05-08 01:19:07 +03:00
|
|
|
|
self.flags = 4
|
2020-05-07 20:33:54 +03:00
|
|
|
|
self.italic_angle = 0
|
2020-05-10 18:51:37 +03:00
|
|
|
|
self.ascent = int(
|
|
|
|
|
pango.pango_font_metrics_get_ascent(pango_metrics) /
|
|
|
|
|
font_size * 1000)
|
|
|
|
|
self.descent = -int(
|
|
|
|
|
pango.pango_font_metrics_get_descent(pango_metrics) /
|
|
|
|
|
font_size * 1000)
|
2020-05-07 20:33:54 +03:00
|
|
|
|
self.stemv = 80
|
|
|
|
|
self.stemh = 80
|
2020-05-09 23:23:33 +03:00
|
|
|
|
self.bbox = [0, 0, 0, 0]
|
|
|
|
|
self.widths = {}
|
2020-05-12 19:38:12 +03:00
|
|
|
|
self.cmap = {}
|
2020-05-07 20:33:54 +03:00
|
|
|
|
|
|
|
|
|
|
2020-04-18 01:19:35 +03:00
|
|
|
|
class Context(pydyf.Stream):
|
2020-04-19 17:49:37 +03:00
|
|
|
|
"""PDF stream object with context storing alpha states."""
|
2020-06-07 01:32:47 +03:00
|
|
|
|
def __init__(self, document, page_rectangle, alpha_states, x_objects,
|
|
|
|
|
patterns, shadings, *args, **kwargs):
|
2020-04-18 01:19:35 +03:00
|
|
|
|
super().__init__(*args, **kwargs)
|
2020-12-13 00:44:41 +03:00
|
|
|
|
self.compress = True
|
2020-06-07 01:32:47 +03:00
|
|
|
|
self.page_rectangle = page_rectangle
|
2020-05-31 02:20:38 +03:00
|
|
|
|
self._document = document
|
2020-04-18 01:19:35 +03:00
|
|
|
|
self._alpha_states = alpha_states
|
2020-05-17 15:46:41 +03:00
|
|
|
|
self._x_objects = x_objects
|
2020-06-07 01:32:47 +03:00
|
|
|
|
self._patterns = patterns
|
|
|
|
|
self._shadings = shadings
|
2020-06-01 02:12:32 +03:00
|
|
|
|
self._current_color = self._current_color_stroke = None
|
|
|
|
|
self._current_alpha = self._current_alpha_stroke = None
|
|
|
|
|
self._current_font = self._current_font_size = None
|
|
|
|
|
self._old_font = self._old_font_size = None
|
|
|
|
|
|
2020-06-02 19:17:33 +03:00
|
|
|
|
# These objects are used in text.show_first_line
|
2020-06-04 02:27:38 +03:00
|
|
|
|
self.length = ffi.new('unsigned int *')
|
|
|
|
|
self.ink_rect = ffi.new('PangoRectangle *')
|
|
|
|
|
self.logical_rect = ffi.new('PangoRectangle *')
|
2020-06-02 19:17:33 +03:00
|
|
|
|
|
2020-06-01 02:12:32 +03:00
|
|
|
|
def pop_state(self):
|
|
|
|
|
super().pop_state()
|
|
|
|
|
self._current_color = self._current_color_stroke = None
|
|
|
|
|
self._current_alpha = self._current_alpha_stroke = None
|
|
|
|
|
self._current_font = None
|
|
|
|
|
|
|
|
|
|
def begin_text(self):
|
|
|
|
|
if self.stream[-1] == b'ET':
|
|
|
|
|
self._current_font = self._old_font
|
|
|
|
|
self.stream.pop()
|
|
|
|
|
else:
|
|
|
|
|
super().begin_text()
|
2020-05-31 02:20:38 +03:00
|
|
|
|
|
2020-06-01 02:12:32 +03:00
|
|
|
|
def end_text(self):
|
|
|
|
|
self._old_font, self._current_font = self._current_font, None
|
|
|
|
|
super().end_text()
|
|
|
|
|
|
|
|
|
|
def set_color_rgb(self, r, g, b, stroke=False):
|
|
|
|
|
if stroke:
|
|
|
|
|
if (r, g, b) == self._current_color_stroke:
|
|
|
|
|
return
|
|
|
|
|
else:
|
|
|
|
|
self._current_color_stroke = (r, g, b)
|
|
|
|
|
else:
|
|
|
|
|
if (r, g, b) == self._current_color:
|
|
|
|
|
return
|
|
|
|
|
else:
|
|
|
|
|
self._current_color = (r, g, b)
|
|
|
|
|
|
|
|
|
|
super().set_color_rgb(r, g, b, stroke)
|
|
|
|
|
|
|
|
|
|
def set_font_size(self, font, size):
|
|
|
|
|
if (font, size) == self._current_font:
|
|
|
|
|
return
|
|
|
|
|
self._current_font = (font, size)
|
|
|
|
|
super().set_font_size(font, size)
|
2020-04-18 01:19:35 +03:00
|
|
|
|
|
|
|
|
|
def set_alpha(self, alpha, stroke=False):
|
2020-06-01 02:12:32 +03:00
|
|
|
|
if stroke:
|
|
|
|
|
if alpha == self._current_alpha_stroke:
|
|
|
|
|
return
|
|
|
|
|
else:
|
|
|
|
|
self._current_alpha_stroke = alpha
|
|
|
|
|
else:
|
|
|
|
|
if alpha == self._current_alpha:
|
|
|
|
|
return
|
|
|
|
|
else:
|
|
|
|
|
self._current_alpha = alpha
|
|
|
|
|
|
2020-04-18 01:19:35 +03:00
|
|
|
|
if alpha not in self._alpha_states:
|
2020-05-17 15:46:41 +03:00
|
|
|
|
self._alpha_states[alpha] = pydyf.Dictionary()
|
|
|
|
|
if stroke in (None, False):
|
|
|
|
|
self._alpha_states[alpha]['ca'] = alpha
|
|
|
|
|
if stroke in (None, True):
|
|
|
|
|
self._alpha_states[alpha]['CA'] = alpha
|
2020-04-18 01:19:35 +03:00
|
|
|
|
self.set_state(alpha)
|
2018-04-13 11:44:19 +03:00
|
|
|
|
|
2020-06-01 02:12:32 +03:00
|
|
|
|
def add_font(self, font_hash, font_content, pango_font):
|
|
|
|
|
self._document.fonts[font_hash] = Font(font_content, pango_font)
|
2020-05-31 02:20:38 +03:00
|
|
|
|
return self._document.fonts[font_hash]
|
2020-05-06 08:42:45 +03:00
|
|
|
|
|
2020-05-30 02:11:30 +03:00
|
|
|
|
def get_fonts(self):
|
2020-05-31 02:20:38 +03:00
|
|
|
|
return self._document.fonts
|
2020-05-30 02:11:30 +03:00
|
|
|
|
|
2020-11-30 21:12:41 +03:00
|
|
|
|
def add_transparency_group(self, bounding_box):
|
2020-12-19 22:06:16 +03:00
|
|
|
|
alpha_states = pydyf.Dictionary()
|
|
|
|
|
x_objects = pydyf.Dictionary()
|
|
|
|
|
patterns = pydyf.Dictionary()
|
|
|
|
|
shadings = pydyf.Dictionary()
|
|
|
|
|
resources = pydyf.Dictionary({
|
|
|
|
|
'ExtGState': alpha_states,
|
|
|
|
|
'XObject': x_objects,
|
|
|
|
|
'Pattern': patterns,
|
|
|
|
|
'Shading': shadings,
|
2020-12-20 14:37:17 +03:00
|
|
|
|
'Font': None, # Will be set by _use_references
|
2020-12-19 22:06:16 +03:00
|
|
|
|
})
|
2020-06-07 16:18:00 +03:00
|
|
|
|
extra = pydyf.Dictionary({
|
|
|
|
|
'Type': '/XObject',
|
|
|
|
|
'Subtype': '/Form',
|
|
|
|
|
'BBox': pydyf.Array(bounding_box),
|
2020-12-19 22:06:16 +03:00
|
|
|
|
'Resources': resources,
|
2020-06-07 16:18:00 +03:00
|
|
|
|
'Group': pydyf.Dictionary({
|
|
|
|
|
'Type': '/Group',
|
|
|
|
|
'S': '/Transparency',
|
|
|
|
|
'I': 'true',
|
|
|
|
|
'CS': '/DeviceRGB',
|
|
|
|
|
}),
|
|
|
|
|
})
|
|
|
|
|
group = Context(
|
2020-12-19 22:06:16 +03:00
|
|
|
|
self._document, self.page_rectangle, alpha_states, x_objects,
|
|
|
|
|
patterns, shadings, extra=extra)
|
2020-06-07 16:18:00 +03:00
|
|
|
|
group.id = f'x{len(self._x_objects)}'
|
|
|
|
|
self._x_objects[group.id] = group
|
2020-05-17 15:46:41 +03:00
|
|
|
|
return group
|
|
|
|
|
|
2020-07-31 15:46:36 +03:00
|
|
|
|
def add_image(self, pillow_image, image_rendering, optimize_image):
|
2020-08-02 16:44:39 +03:00
|
|
|
|
if 'transparency' in pillow_image.info:
|
|
|
|
|
pillow_image = pillow_image.convert('RGBA')
|
2020-08-03 19:18:55 +03:00
|
|
|
|
elif pillow_image.mode in ('1', 'P'):
|
|
|
|
|
pillow_image = pillow_image.convert('RGB')
|
2020-08-02 16:44:39 +03:00
|
|
|
|
|
2020-08-03 19:18:55 +03:00
|
|
|
|
if pillow_image.mode in ('RGB', 'RGBA'):
|
2020-06-03 20:46:58 +03:00
|
|
|
|
color_space = '/DeviceRGB'
|
2020-08-03 19:18:55 +03:00
|
|
|
|
elif pillow_image.mode == 'L':
|
2020-06-03 20:46:58 +03:00
|
|
|
|
color_space = '/DeviceGray'
|
2020-08-03 19:18:55 +03:00
|
|
|
|
elif pillow_image.mode == 'CMYK':
|
2020-06-03 20:46:58 +03:00
|
|
|
|
color_space = '/DeviceCMYK'
|
2020-10-23 13:36:03 +03:00
|
|
|
|
else:
|
|
|
|
|
LOGGER.warning('Unknown image mode: %s', pillow_image.mode)
|
|
|
|
|
color_space = '/DeviceRGB'
|
2020-06-03 20:46:58 +03:00
|
|
|
|
|
2020-08-02 16:07:32 +03:00
|
|
|
|
interpolate = 'true' if image_rendering == 'auto' else 'false'
|
2020-06-03 18:58:53 +03:00
|
|
|
|
extra = pydyf.Dictionary({
|
|
|
|
|
'Type': '/XObject',
|
|
|
|
|
'Subtype': '/Image',
|
|
|
|
|
'Width': pillow_image.width,
|
|
|
|
|
'Height': pillow_image.height,
|
2020-06-03 20:46:58 +03:00
|
|
|
|
'ColorSpace': color_space,
|
2020-06-07 22:17:29 +03:00
|
|
|
|
'BitsPerComponent': 8,
|
2020-08-02 16:07:32 +03:00
|
|
|
|
'Interpolate': interpolate,
|
2020-06-03 18:58:53 +03:00
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
image_file = io.BytesIO()
|
2020-08-03 19:18:55 +03:00
|
|
|
|
if pillow_image.format == 'JPEG':
|
2020-06-07 18:05:12 +03:00
|
|
|
|
extra['Filter'] = '/DCTDecode'
|
2020-08-02 16:07:32 +03:00
|
|
|
|
pillow_image.save(
|
|
|
|
|
image_file, format='JPEG', optimize=optimize_image)
|
2020-06-07 18:05:12 +03:00
|
|
|
|
else:
|
2020-08-02 16:07:32 +03:00
|
|
|
|
extra['Filter'] = '/JPXDecode'
|
2020-08-03 19:18:55 +03:00
|
|
|
|
if pillow_image.mode == 'RGBA':
|
2020-08-02 16:07:32 +03:00
|
|
|
|
alpha = pillow_image.getchannel('A')
|
|
|
|
|
pillow_image = pillow_image.convert('RGB')
|
|
|
|
|
alpha_file = io.BytesIO()
|
|
|
|
|
alpha.save(
|
|
|
|
|
alpha_file, format='JPEG2000', optimize=optimize_image,
|
|
|
|
|
num_resolutions=1)
|
|
|
|
|
extra['SMask'] = pydyf.Stream([alpha_file.getvalue()], extra={
|
|
|
|
|
'Filter': '/JPXDecode',
|
|
|
|
|
'Type': '/XObject',
|
|
|
|
|
'Subtype': '/Image',
|
|
|
|
|
'Width': pillow_image.width,
|
|
|
|
|
'Height': pillow_image.height,
|
|
|
|
|
'ColorSpace': '/DeviceGray',
|
|
|
|
|
'BitsPerComponent': 8,
|
|
|
|
|
'Interpolate': interpolate,
|
|
|
|
|
})
|
|
|
|
|
# Set number of resolutions to 1 because of
|
|
|
|
|
# https://github.com/uclouvain/openjpeg/issues/215
|
|
|
|
|
pillow_image.save(
|
|
|
|
|
image_file, format='JPEG2000', optimize=optimize_image,
|
|
|
|
|
num_resolutions=1)
|
|
|
|
|
stream = [image_file.getvalue()]
|
2020-06-07 18:05:12 +03:00
|
|
|
|
|
|
|
|
|
xobject = pydyf.Stream(stream, extra=extra)
|
2020-06-03 19:01:25 +03:00
|
|
|
|
image_name = f'Im{len(self._x_objects)}'
|
|
|
|
|
self._x_objects[image_name] = xobject
|
|
|
|
|
return image_name
|
2020-06-03 18:58:53 +03:00
|
|
|
|
|
2020-06-07 12:20:17 +03:00
|
|
|
|
def add_pattern(self, x, y, width, height, repeat_width, repeat_height):
|
2020-12-19 22:06:16 +03:00
|
|
|
|
alpha_states = pydyf.Dictionary()
|
|
|
|
|
x_objects = pydyf.Dictionary()
|
|
|
|
|
patterns = pydyf.Dictionary()
|
|
|
|
|
shadings = pydyf.Dictionary()
|
|
|
|
|
resources = pydyf.Dictionary({
|
|
|
|
|
'ExtGState': alpha_states,
|
|
|
|
|
'XObject': x_objects,
|
|
|
|
|
'Pattern': patterns,
|
|
|
|
|
'Shading': shadings,
|
2020-12-20 14:37:17 +03:00
|
|
|
|
'Font': None, # Will be set by _use_references
|
2020-12-19 22:06:16 +03:00
|
|
|
|
})
|
2020-06-07 01:32:47 +03:00
|
|
|
|
matrix = (1, 0, 0, -1, x, self.page_rectangle[3] - y)
|
|
|
|
|
extra = pydyf.Dictionary({
|
|
|
|
|
'PatternType': 1,
|
|
|
|
|
'BBox': pydyf.Array([0, 0, width, height]),
|
2020-06-07 12:20:17 +03:00
|
|
|
|
'XStep': repeat_width,
|
|
|
|
|
'YStep': repeat_height,
|
2020-06-07 01:32:47 +03:00
|
|
|
|
'TilingType': 1,
|
|
|
|
|
'PaintType': 1,
|
|
|
|
|
'Matrix': pydyf.Array(0.75 * i for i in matrix),
|
2020-12-19 22:06:16 +03:00
|
|
|
|
'Resources': resources,
|
2020-06-07 01:32:47 +03:00
|
|
|
|
})
|
2020-06-07 16:18:00 +03:00
|
|
|
|
pattern = Context(
|
2020-12-19 22:06:16 +03:00
|
|
|
|
self._document, self.page_rectangle, alpha_states, x_objects,
|
|
|
|
|
patterns, shadings, extra=extra)
|
2020-06-07 01:32:47 +03:00
|
|
|
|
pattern.id = f'p{len(self._patterns)}'
|
|
|
|
|
self._patterns[pattern.id] = pattern
|
|
|
|
|
return pattern
|
|
|
|
|
|
|
|
|
|
def add_shading(self):
|
|
|
|
|
shading = pydyf.Dictionary()
|
|
|
|
|
shading.id = f's{len(self._shadings)}'
|
|
|
|
|
self._shadings[shading.id] = shading
|
|
|
|
|
return shading
|
|
|
|
|
|
2012-10-02 20:59:02 +04:00
|
|
|
|
|
2020-04-19 17:49:37 +03:00
|
|
|
|
BookmarkSubtree = collections.namedtuple(
|
|
|
|
|
'BookmarkSubtree', ('label', 'destination', 'children', 'state'))
|
2020-04-18 23:12:25 +03:00
|
|
|
|
|
|
|
|
|
|
2020-04-19 17:49:37 +03:00
|
|
|
|
def _write_pdf_attachment(pdf, attachment, url_fetcher):
|
|
|
|
|
"""Write an attachment to the PDF stream.
|
|
|
|
|
|
|
|
|
|
:return:
|
2020-04-21 23:30:38 +03:00
|
|
|
|
the attachment PDF dictionary.
|
2020-04-19 17:49:37 +03:00
|
|
|
|
|
|
|
|
|
"""
|
2020-04-21 23:30:38 +03:00
|
|
|
|
# Attachments from document links like <link> or <a> can only be URLs.
|
|
|
|
|
# They're passed in as tuples
|
2020-04-19 17:49:37 +03:00
|
|
|
|
url = ''
|
2020-04-21 23:30:38 +03:00
|
|
|
|
if isinstance(attachment, tuple):
|
|
|
|
|
url, description = attachment
|
|
|
|
|
attachment = Attachment(
|
|
|
|
|
url=url, url_fetcher=url_fetcher, description=description)
|
|
|
|
|
elif not isinstance(attachment, Attachment):
|
|
|
|
|
attachment = Attachment(guess=attachment, url_fetcher=url_fetcher)
|
2020-04-19 17:49:37 +03:00
|
|
|
|
|
2020-04-21 23:30:38 +03:00
|
|
|
|
try:
|
2020-04-19 17:49:37 +03:00
|
|
|
|
with attachment.source as (source_type, source, url, _):
|
|
|
|
|
if isinstance(source, bytes):
|
|
|
|
|
source = io.BytesIO(source)
|
2020-04-21 23:30:38 +03:00
|
|
|
|
uncompressed_length = 0
|
|
|
|
|
stream = b''
|
|
|
|
|
md5 = hashlib.md5()
|
|
|
|
|
compress = zlib.compressobj()
|
|
|
|
|
for data in iter(lambda: source.read(4096), b''):
|
|
|
|
|
uncompressed_length += len(data)
|
|
|
|
|
md5.update(data)
|
|
|
|
|
compressed = compress.compress(data)
|
|
|
|
|
stream += compressed
|
|
|
|
|
compressed = compress.flush(zlib.Z_FINISH)
|
|
|
|
|
stream += compressed
|
|
|
|
|
file_extra = pydyf.Dictionary({
|
|
|
|
|
'Type': '/EmbeddedFile',
|
|
|
|
|
'Filter': '/FlateDecode',
|
|
|
|
|
'Params': pydyf.Dictionary({
|
|
|
|
|
'CheckSum': f'<{md5.hexdigest()}>',
|
|
|
|
|
'Size': uncompressed_length,
|
|
|
|
|
})
|
|
|
|
|
})
|
|
|
|
|
file_stream = pydyf.Stream([stream], file_extra)
|
|
|
|
|
pdf.add_object(file_stream)
|
|
|
|
|
|
|
|
|
|
except URLFetchingError as exception:
|
|
|
|
|
LOGGER.error('Failed to load attachment: %s', exception)
|
|
|
|
|
return
|
2020-04-19 17:49:37 +03:00
|
|
|
|
|
|
|
|
|
# TODO: Use the result object from a URL fetch operation to provide more
|
|
|
|
|
# details on the possible filename.
|
2020-05-16 18:05:11 +03:00
|
|
|
|
if url and urlsplit(url).path:
|
|
|
|
|
filename = basename(unquote(urlsplit(url).path))
|
|
|
|
|
else:
|
|
|
|
|
filename = 'attachment.bin'
|
2020-04-19 17:49:37 +03:00
|
|
|
|
|
2020-04-21 23:30:38 +03:00
|
|
|
|
attachment = pydyf.Dictionary({
|
|
|
|
|
'Type': '/Filespec',
|
|
|
|
|
'F': pydyf.String(),
|
|
|
|
|
'UF': pydyf.String(filename),
|
|
|
|
|
'EF': pydyf.Dictionary({'F': file_stream.reference}),
|
|
|
|
|
'Desc': pydyf.String(attachment.description or ''),
|
|
|
|
|
})
|
|
|
|
|
pdf.add_object(attachment)
|
|
|
|
|
return attachment
|
2020-04-19 17:49:37 +03:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def create_bookmarks(bookmarks, pdf, parent=None):
|
2020-04-18 23:12:25 +03:00
|
|
|
|
count = len(bookmarks)
|
|
|
|
|
outlines = []
|
2020-04-19 17:49:37 +03:00
|
|
|
|
for title, (page, x, y), children, state in bookmarks:
|
2020-04-18 23:12:25 +03:00
|
|
|
|
destination = pydyf.Array((
|
2020-04-19 17:49:37 +03:00
|
|
|
|
pdf.objects[pdf.pages['Kids'][page * 3]].reference,
|
2020-04-18 23:12:25 +03:00
|
|
|
|
'/XYZ', x, y, 0))
|
|
|
|
|
outline = pydyf.Dictionary({
|
|
|
|
|
'Title': pydyf.String(title), 'Dest': destination})
|
2020-04-19 17:49:37 +03:00
|
|
|
|
pdf.add_object(outline)
|
2020-04-18 23:12:25 +03:00
|
|
|
|
children_outlines, children_count = create_bookmarks(
|
2020-04-19 17:49:37 +03:00
|
|
|
|
children, pdf, parent=outline)
|
2020-04-18 23:41:23 +03:00
|
|
|
|
outline['Count'] = children_count
|
|
|
|
|
if state == 'closed':
|
|
|
|
|
outline['Count'] *= -1
|
|
|
|
|
else:
|
|
|
|
|
count += children_count
|
2020-04-18 23:12:25 +03:00
|
|
|
|
if outlines:
|
|
|
|
|
outline['Prev'] = outlines[-1].reference
|
|
|
|
|
outlines[-1]['Next'] = outline.reference
|
|
|
|
|
if children_outlines:
|
|
|
|
|
outline['First'] = children_outlines[0].reference
|
|
|
|
|
outline['Last'] = children_outlines[-1].reference
|
|
|
|
|
if parent is not None:
|
|
|
|
|
outline['Parent'] = parent.reference
|
|
|
|
|
outlines.append(outline)
|
|
|
|
|
return outlines, count
|
|
|
|
|
|
|
|
|
|
|
2020-04-19 17:49:37 +03:00
|
|
|
|
def add_hyperlinks(links, anchors, matrix, pdf, page, names):
|
|
|
|
|
"""Include hyperlinks in current PDF page."""
|
|
|
|
|
for link in links:
|
2020-10-23 14:08:29 +03:00
|
|
|
|
link_type, link_target, rectangle, _ = link
|
2020-04-19 17:49:37 +03:00
|
|
|
|
x1, y1 = matrix.transform_point(*rectangle[:2])
|
|
|
|
|
x2, y2 = matrix.transform_point(*rectangle[2:])
|
|
|
|
|
if link_type in ('internal', 'external'):
|
|
|
|
|
annot = pydyf.Dictionary({
|
|
|
|
|
'Type': '/Annot',
|
|
|
|
|
'Subtype': '/Link',
|
|
|
|
|
'Rect': pydyf.Array([x1, y1, x2, y2]),
|
|
|
|
|
'BS': pydyf.Dictionary({'W': 0}),
|
|
|
|
|
})
|
|
|
|
|
if link_type == 'internal':
|
|
|
|
|
annot['Dest'] = pydyf.String(link_target)
|
2012-10-06 13:26:55 +04:00
|
|
|
|
else:
|
2020-04-19 17:49:37 +03:00
|
|
|
|
annot['A'] = pydyf.Dictionary({
|
|
|
|
|
'Type': '/Action',
|
|
|
|
|
'S': '/URI',
|
|
|
|
|
'URI': pydyf.String(link_target),
|
|
|
|
|
})
|
|
|
|
|
pdf.add_object(annot)
|
2020-05-16 18:05:48 +03:00
|
|
|
|
if 'Annots' not in page:
|
|
|
|
|
page['Annots'] = pydyf.Array()
|
2020-04-19 17:49:37 +03:00
|
|
|
|
page['Annots'].append(annot.reference)
|
|
|
|
|
|
|
|
|
|
for anchor in anchors:
|
|
|
|
|
anchor_name, x, y = anchor
|
|
|
|
|
x, y = matrix.transform_point(x, y)
|
|
|
|
|
names.append(pydyf.String(anchor_name))
|
|
|
|
|
names.append(pydyf.Array([page.reference, '/XYZ', x, y, 0]))
|
2012-10-06 13:26:55 +04:00
|
|
|
|
|
|
|
|
|
|
2012-10-07 00:09:17 +04:00
|
|
|
|
def rectangle_aabb(matrix, pos_x, pos_y, width, height):
|
2020-04-19 17:49:37 +03:00
|
|
|
|
"""Apply a transformation matrix to an axis-aligned rectangle.
|
|
|
|
|
|
2020-05-30 02:11:30 +03:00
|
|
|
|
Return its axis-aligned bounding box as ``(x1, y1, x2, y2)``.
|
2012-10-07 00:09:17 +04:00
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
transform_point = matrix.transform_point
|
|
|
|
|
x1, y1 = transform_point(pos_x, pos_y)
|
|
|
|
|
x2, y2 = transform_point(pos_x + width, pos_y)
|
|
|
|
|
x3, y3 = transform_point(pos_x, pos_y + height)
|
|
|
|
|
x4, y4 = transform_point(pos_x + width, pos_y + height)
|
|
|
|
|
box_x1 = min(x1, x2, x3, x4)
|
|
|
|
|
box_y1 = min(y1, y2, y3, y4)
|
|
|
|
|
box_x2 = max(x1, x2, x3, x4)
|
|
|
|
|
box_y2 = max(y1, y2, y3, y4)
|
2020-05-30 02:11:30 +03:00
|
|
|
|
return box_x1, box_y1, box_x2, box_y2
|
2012-10-07 00:09:17 +04:00
|
|
|
|
|
|
|
|
|
|
2020-04-19 19:26:49 +03:00
|
|
|
|
def resolve_links(pages):
|
|
|
|
|
"""Resolve internal hyperlinks.
|
|
|
|
|
|
|
|
|
|
Links to a missing anchor are removed with a warning.
|
|
|
|
|
|
|
|
|
|
If multiple anchors have the same name, the first one is used.
|
|
|
|
|
|
|
|
|
|
:returns:
|
|
|
|
|
A generator yielding lists (one per page) like :attr:`Page.links`,
|
|
|
|
|
except that ``target`` for internal hyperlinks is
|
|
|
|
|
``(page_number, x, y)`` instead of an anchor name.
|
|
|
|
|
The page number is a 0-based index into the :attr:`pages` list,
|
|
|
|
|
and ``x, y`` are in CSS pixels from the top-left of the page.
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
anchors = set()
|
|
|
|
|
paged_anchors = []
|
|
|
|
|
for i, page in enumerate(pages):
|
|
|
|
|
paged_anchors.append([])
|
|
|
|
|
for anchor_name, (point_x, point_y) in page.anchors.items():
|
|
|
|
|
if anchor_name not in anchors:
|
|
|
|
|
paged_anchors[-1].append((anchor_name, point_x, point_y))
|
|
|
|
|
anchors.add(anchor_name)
|
|
|
|
|
for page in pages:
|
|
|
|
|
page_links = []
|
|
|
|
|
for link in page.links:
|
2020-10-23 14:08:29 +03:00
|
|
|
|
link_type, anchor_name, rectangle, _ = link
|
2020-04-19 19:26:49 +03:00
|
|
|
|
if link_type == 'internal':
|
|
|
|
|
if anchor_name not in anchors:
|
|
|
|
|
LOGGER.error(
|
|
|
|
|
'No anchor #%s for internal URI reference',
|
|
|
|
|
anchor_name)
|
|
|
|
|
else:
|
2020-10-23 14:08:29 +03:00
|
|
|
|
page_links.append(
|
|
|
|
|
(link_type, anchor_name, rectangle, None))
|
2020-04-19 19:26:49 +03:00
|
|
|
|
else:
|
|
|
|
|
# External link
|
|
|
|
|
page_links.append(link)
|
|
|
|
|
yield page_links, paged_anchors.pop(0)
|
|
|
|
|
|
|
|
|
|
|
2020-04-19 17:49:37 +03:00
|
|
|
|
class Matrix(list):
|
|
|
|
|
def __init__(self, a=1, b=0, c=0, d=1, e=0, f=0, matrix=None):
|
|
|
|
|
if matrix is None:
|
|
|
|
|
matrix = [[a, b, 0], [c, d, 0], [e, f, 1]]
|
|
|
|
|
super().__init__(matrix)
|
2012-10-04 13:35:25 +04:00
|
|
|
|
|
2020-04-19 17:49:37 +03:00
|
|
|
|
def __matmul__(self, other):
|
|
|
|
|
assert len(self[0]) == len(other) == len(other[0]) == 3
|
|
|
|
|
return Matrix(matrix=[
|
|
|
|
|
[sum(self[i][k] * other[k][j] for k in range(3)) for j in range(3)]
|
2020-05-08 03:13:44 +03:00
|
|
|
|
for i in range(len(self))])
|
2012-10-04 13:35:25 +04:00
|
|
|
|
|
2020-04-19 17:49:37 +03:00
|
|
|
|
@property
|
|
|
|
|
def determinant(self):
|
|
|
|
|
assert len(self) == len(self[0]) == 3
|
|
|
|
|
return (
|
|
|
|
|
self[0][0] * (self[1][1] * self[2][2] - self[1][2] * self[2][1]) -
|
|
|
|
|
self[1][0] * (self[0][1] * self[2][2] - self[0][2] * self[2][1]) +
|
|
|
|
|
self[2][0] * (self[0][1] * self[1][2] - self[0][2] * self[1][1]))
|
2012-10-04 13:35:25 +04:00
|
|
|
|
|
2020-04-19 17:49:37 +03:00
|
|
|
|
def transform_point(self, x, y):
|
|
|
|
|
return (Matrix(matrix=[[x, y, 1]]) @ self)[0][:2]
|
2018-08-06 18:38:02 +03:00
|
|
|
|
|
|
|
|
|
|
2020-01-02 14:06:58 +03:00
|
|
|
|
class Page:
|
2012-10-04 13:35:25 +04:00
|
|
|
|
"""Represents a single rendered page.
|
|
|
|
|
|
2012-10-08 21:51:18 +04:00
|
|
|
|
.. versionadded:: 0.15
|
|
|
|
|
|
2012-10-04 13:35:25 +04:00
|
|
|
|
Should be obtained from :attr:`Document.pages` but not
|
|
|
|
|
instantiated directly.
|
|
|
|
|
|
|
|
|
|
"""
|
2020-04-18 01:19:35 +03:00
|
|
|
|
def __init__(self, page_box):
|
2012-10-08 21:51:18 +04:00
|
|
|
|
#: The page width, including margins, in CSS pixels.
|
2012-10-05 22:12:05 +04:00
|
|
|
|
self.width = page_box.margin_width()
|
2012-10-04 13:35:25 +04:00
|
|
|
|
|
2012-10-08 21:51:18 +04:00
|
|
|
|
#: The page height, including margins, in CSS pixels.
|
2012-10-05 22:12:05 +04:00
|
|
|
|
self.height = page_box.margin_height()
|
2012-10-04 13:35:25 +04:00
|
|
|
|
|
2019-02-22 13:34:46 +03:00
|
|
|
|
#: The page bleed widths as a :obj:`dict` with ``'top'``, ``'right'``,
|
|
|
|
|
#: ``'bottom'`` and ``'left'`` as keys, and values in CSS pixels.
|
2017-10-05 09:45:50 +03:00
|
|
|
|
self.bleed = {
|
2020-05-30 02:11:30 +03:00
|
|
|
|
side: page_box.style[f'bleed_{side}'].value
|
2017-10-05 09:45:50 +03:00
|
|
|
|
for side in ('top', 'right', 'bottom', 'left')}
|
2017-09-05 16:44:50 +03:00
|
|
|
|
|
2019-02-22 13:34:46 +03:00
|
|
|
|
#: The :obj:`list` of ``(bookmark_level, bookmark_label, target)``
|
|
|
|
|
#: :obj:`tuples <tuple>`. ``bookmark_level`` and ``bookmark_label``
|
|
|
|
|
#: are respectively an :obj:`int` and a :obj:`string <str>`, based on
|
|
|
|
|
#: the CSS properties of the same names. ``target`` is an ``(x, y)``
|
|
|
|
|
#: point in CSS pixels from the top-left of the page.
|
|
|
|
|
self.bookmarks = []
|
|
|
|
|
|
|
|
|
|
#: The :obj:`list` of ``(link_type, target, rectangle)`` :obj:`tuples
|
|
|
|
|
#: <tuple>`. A ``rectangle`` is ``(x, y, width, height)``, in CSS
|
|
|
|
|
#: pixels from the top-left of the page. ``link_type`` is one of three
|
|
|
|
|
#: strings:
|
2012-10-04 13:35:25 +04:00
|
|
|
|
#:
|
2019-02-22 13:34:46 +03:00
|
|
|
|
#: * ``'external'``: ``target`` is an absolute URL
|
|
|
|
|
#: * ``'internal'``: ``target`` is an anchor name (see
|
2013-07-14 15:08:02 +04:00
|
|
|
|
#: :attr:`Page.anchors`).
|
2019-02-22 13:34:46 +03:00
|
|
|
|
#: The anchor might be defined in another page,
|
|
|
|
|
#: in multiple pages (in which case the first occurence is used),
|
|
|
|
|
#: or not at all.
|
|
|
|
|
#: * ``'attachment'``: ``target`` is an absolute URL and points
|
2014-04-04 20:46:00 +04:00
|
|
|
|
#: to a resource to attach to the document.
|
2019-02-22 13:34:46 +03:00
|
|
|
|
self.links = []
|
2012-10-04 13:35:25 +04:00
|
|
|
|
|
2019-02-22 13:34:46 +03:00
|
|
|
|
#: The :obj:`dict` mapping each anchor name to its target, an
|
|
|
|
|
#: ``(x, y)`` point in CSS pixels from the top-left of the page.
|
|
|
|
|
self.anchors = {}
|
2012-10-04 13:35:25 +04:00
|
|
|
|
|
2020-04-19 17:49:37 +03:00
|
|
|
|
self._gather_links_and_bookmarks(page_box)
|
2012-10-05 22:12:05 +04:00
|
|
|
|
self._page_box = page_box
|
2012-10-02 20:59:02 +04:00
|
|
|
|
|
2020-08-01 16:12:56 +03:00
|
|
|
|
def _gather_links_and_bookmarks(self, box, parent_matrix=None):
|
2020-04-19 17:49:37 +03:00
|
|
|
|
# Get box transformation matrix.
|
|
|
|
|
# "Transforms apply to block-level and atomic inline-level elements,
|
|
|
|
|
# but do not apply to elements which may be split into
|
|
|
|
|
# multiple inline-level boxes."
|
|
|
|
|
# http://www.w3.org/TR/css3-2d-transforms/#introduction
|
|
|
|
|
if box.style['transform'] and not isinstance(box, boxes.InlineBox):
|
|
|
|
|
border_width = box.border_width()
|
|
|
|
|
border_height = box.border_height()
|
|
|
|
|
origin_x, origin_y = box.style['transform_origin']
|
|
|
|
|
offset_x = percentage(origin_x, border_width)
|
|
|
|
|
offset_y = percentage(origin_y, border_height)
|
|
|
|
|
origin_x = box.border_box_x() + offset_x
|
|
|
|
|
origin_y = box.border_box_y() + offset_y
|
|
|
|
|
|
|
|
|
|
matrix = Matrix(e=origin_x, f=origin_y)
|
|
|
|
|
for name, args in box.style['transform']:
|
|
|
|
|
a, b, c, d, e, f = 1, 0, 0, 1, 0, 0
|
|
|
|
|
if name == 'scale':
|
|
|
|
|
a, d = args
|
|
|
|
|
elif name == 'rotate':
|
|
|
|
|
a = d = math.cos(args)
|
|
|
|
|
b = math.sin(args)
|
|
|
|
|
c = -b
|
|
|
|
|
elif name == 'translate':
|
|
|
|
|
e = percentage(args[0], border_width)
|
|
|
|
|
f = percentage(args[1], border_height)
|
|
|
|
|
elif name == 'skew':
|
|
|
|
|
b, c = math.tan(args[1]), math.tan(args[0])
|
|
|
|
|
else:
|
|
|
|
|
assert name == 'matrix'
|
|
|
|
|
a, b, c, d, e, f = args
|
|
|
|
|
matrix = Matrix(a, b, c, d, e, f) @ matrix
|
|
|
|
|
box.transformation_matrix = (
|
|
|
|
|
Matrix(e=-origin_x, f=-origin_y) @ matrix)
|
2020-08-01 16:12:56 +03:00
|
|
|
|
if parent_matrix:
|
|
|
|
|
matrix = box.transformation_matrix @ parent_matrix
|
2020-04-19 17:49:37 +03:00
|
|
|
|
else:
|
|
|
|
|
matrix = box.transformation_matrix
|
2020-08-01 16:12:56 +03:00
|
|
|
|
else:
|
|
|
|
|
matrix = parent_matrix
|
2020-04-19 17:49:37 +03:00
|
|
|
|
|
|
|
|
|
bookmark_label = box.bookmark_label
|
|
|
|
|
if box.style['bookmark_level'] == 'none':
|
|
|
|
|
bookmark_level = None
|
|
|
|
|
else:
|
|
|
|
|
bookmark_level = box.style['bookmark_level']
|
|
|
|
|
state = box.style['bookmark_state']
|
|
|
|
|
link = box.style['link']
|
|
|
|
|
anchor_name = box.style['anchor']
|
|
|
|
|
has_bookmark = bookmark_label and bookmark_level
|
|
|
|
|
# 'link' is inherited but redundant on text boxes
|
2020-05-17 16:54:02 +03:00
|
|
|
|
has_link = link and not isinstance(box, (boxes.TextBox, boxes.LineBox))
|
2020-04-19 17:49:37 +03:00
|
|
|
|
# In case of duplicate IDs, only the first is an anchor.
|
|
|
|
|
has_anchor = anchor_name and anchor_name not in self.anchors
|
2020-10-23 14:08:29 +03:00
|
|
|
|
is_attachment = getattr(box, 'is_attachment', False)
|
|
|
|
|
download_name = getattr(box, 'attachment_download', None)
|
2020-04-19 17:49:37 +03:00
|
|
|
|
|
|
|
|
|
if has_bookmark or has_link or has_anchor:
|
|
|
|
|
pos_x, pos_y, width, height = box.hit_area()
|
|
|
|
|
if has_link:
|
|
|
|
|
token_type, link = link
|
|
|
|
|
assert token_type == 'url'
|
|
|
|
|
link_type, target = link
|
|
|
|
|
assert isinstance(target, str)
|
|
|
|
|
if link_type == 'external' and is_attachment:
|
|
|
|
|
link_type = 'attachment'
|
|
|
|
|
if matrix:
|
2020-10-23 14:08:29 +03:00
|
|
|
|
link = (
|
|
|
|
|
link_type, target,
|
|
|
|
|
rectangle_aabb(matrix, pos_x, pos_y, width, height),
|
|
|
|
|
download_name)
|
2020-04-19 17:49:37 +03:00
|
|
|
|
else:
|
2020-10-23 14:08:29 +03:00
|
|
|
|
link = (
|
|
|
|
|
link_type, target,
|
|
|
|
|
(pos_x, pos_y, pos_x + width, pos_y + height),
|
|
|
|
|
download_name)
|
2020-04-19 17:49:37 +03:00
|
|
|
|
self.links.append(link)
|
|
|
|
|
if matrix and (has_bookmark or has_anchor):
|
|
|
|
|
pos_x, pos_y = matrix.transform_point(pos_x, pos_y)
|
|
|
|
|
if has_bookmark:
|
|
|
|
|
self.bookmarks.append(
|
|
|
|
|
(bookmark_level, bookmark_label, (pos_x, pos_y), state))
|
|
|
|
|
if has_anchor:
|
|
|
|
|
self.anchors[anchor_name] = pos_x, pos_y
|
|
|
|
|
|
|
|
|
|
for child in box.all_children():
|
2020-08-01 16:12:56 +03:00
|
|
|
|
self._gather_links_and_bookmarks(child, matrix)
|
2020-04-19 17:49:37 +03:00
|
|
|
|
|
2020-04-18 01:19:35 +03:00
|
|
|
|
def paint(self, context, left_x=0, top_y=0, scale=1, clip=False):
|
2020-04-18 23:12:25 +03:00
|
|
|
|
"""Paint the page into the PDF file.
|
2012-10-02 20:59:02 +04:00
|
|
|
|
|
2020-04-19 17:49:37 +03:00
|
|
|
|
:type context: :class:`pdf.Context`
|
2020-04-18 01:19:35 +03:00
|
|
|
|
:param context:
|
|
|
|
|
A context object.
|
2019-02-22 13:34:46 +03:00
|
|
|
|
:type left_x: float
|
2012-10-02 20:59:02 +04:00
|
|
|
|
:param left_x:
|
2020-04-18 23:12:25 +03:00
|
|
|
|
X coordinate of the left of the page, in PDF points.
|
2019-02-22 13:34:46 +03:00
|
|
|
|
:type top_y: float
|
2012-10-02 20:59:02 +04:00
|
|
|
|
:param top_y:
|
2020-04-18 23:12:25 +03:00
|
|
|
|
Y coordinate of the top of the page, in PDF points.
|
2019-02-22 13:34:46 +03:00
|
|
|
|
:type scale: float
|
2012-10-05 22:12:05 +04:00
|
|
|
|
:param scale:
|
2020-04-18 23:12:25 +03:00
|
|
|
|
Zoom scale.
|
2019-02-22 13:34:46 +03:00
|
|
|
|
:type clip: bool
|
2012-10-02 20:59:02 +04:00
|
|
|
|
:param clip:
|
|
|
|
|
Whether to clip/cut content outside the page. If false or
|
|
|
|
|
not provided, content can overflow.
|
|
|
|
|
|
|
|
|
|
"""
|
2020-04-18 01:19:35 +03:00
|
|
|
|
with stacked(context):
|
|
|
|
|
# Make (0, 0) the top-left corner, and make user units CSS pixels:
|
|
|
|
|
context.transform(scale, 0, 0, scale, left_x, top_y)
|
2012-10-02 20:59:02 +04:00
|
|
|
|
if clip:
|
2012-10-05 22:12:05 +04:00
|
|
|
|
width = self.width
|
|
|
|
|
height = self.height
|
2020-04-18 01:19:35 +03:00
|
|
|
|
context.rectangle(0, 0, width, height)
|
|
|
|
|
context.clip()
|
|
|
|
|
draw_page(self._page_box, context)
|
2012-10-02 20:59:02 +04:00
|
|
|
|
|
|
|
|
|
|
2020-01-02 14:06:58 +03:00
|
|
|
|
class DocumentMetadata:
|
2019-02-22 13:34:46 +03:00
|
|
|
|
"""Meta-information belonging to a whole :class:`Document`.
|
2013-07-14 15:08:02 +04:00
|
|
|
|
|
2019-02-22 13:34:46 +03:00
|
|
|
|
.. versionadded:: 0.20
|
2013-07-14 15:08:02 +04:00
|
|
|
|
|
2019-02-22 13:34:46 +03:00
|
|
|
|
New attributes may be added in future versions of WeasyPrint.
|
2013-07-14 15:08:02 +04:00
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
def __init__(self, title=None, authors=None, description=None,
|
2014-04-04 14:32:21 +04:00
|
|
|
|
keywords=None, generator=None, created=None, modified=None,
|
|
|
|
|
attachments=None):
|
2013-07-14 15:08:02 +04:00
|
|
|
|
#: The title of the document, as a string or :obj:`None`.
|
|
|
|
|
#: Extracted from the ``<title>`` element in HTML
|
|
|
|
|
#: and written to the ``/Title`` info field in PDF.
|
|
|
|
|
self.title = title
|
2019-02-22 13:34:46 +03:00
|
|
|
|
#: The authors of the document, as a list of strings.
|
|
|
|
|
#: (Defaults to the empty list.)
|
2013-07-14 15:08:02 +04:00
|
|
|
|
#: Extracted from the ``<meta name=author>`` elements in HTML
|
|
|
|
|
#: and written to the ``/Author`` info field in PDF.
|
|
|
|
|
self.authors = authors or []
|
|
|
|
|
#: The description of the document, as a string or :obj:`None`.
|
|
|
|
|
#: Extracted from the ``<meta name=description>`` element in HTML
|
|
|
|
|
#: and written to the ``/Subject`` info field in PDF.
|
|
|
|
|
self.description = description
|
|
|
|
|
#: Keywords associated with the document, as a list of strings.
|
|
|
|
|
#: (Defaults to the empty list.)
|
|
|
|
|
#: Extracted from ``<meta name=keywords>`` elements in HTML
|
|
|
|
|
#: and written to the ``/Keywords`` info field in PDF.
|
|
|
|
|
self.keywords = keywords or []
|
|
|
|
|
#: The name of one of the software packages
|
|
|
|
|
#: used to generate the document, as a string or :obj:`None`.
|
|
|
|
|
#: Extracted from the ``<meta name=generator>`` element in HTML
|
|
|
|
|
#: and written to the ``/Creator`` info field in PDF.
|
|
|
|
|
self.generator = generator
|
|
|
|
|
#: The creation date of the document, as a string or :obj:`None`.
|
|
|
|
|
#: Dates are in one of the six formats specified in
|
2019-02-22 13:34:46 +03:00
|
|
|
|
#: `W3C’s profile of ISO 8601 <http://www.w3.org/TR/NOTE-datetime>`_.
|
2013-07-14 15:08:02 +04:00
|
|
|
|
#: Extracted from the ``<meta name=dcterms.created>`` element in HTML
|
|
|
|
|
#: and written to the ``/CreationDate`` info field in PDF.
|
|
|
|
|
self.created = created
|
|
|
|
|
#: The modification date of the document, as a string or :obj:`None`.
|
|
|
|
|
#: Dates are in one of the six formats specified in
|
2019-02-22 13:34:46 +03:00
|
|
|
|
#: `W3C’s profile of ISO 8601 <http://www.w3.org/TR/NOTE-datetime>`_.
|
2013-07-14 15:08:02 +04:00
|
|
|
|
#: Extracted from the ``<meta name=dcterms.modified>`` element in HTML
|
|
|
|
|
#: and written to the ``/ModDate`` info field in PDF.
|
|
|
|
|
self.modified = modified
|
2019-02-22 13:34:46 +03:00
|
|
|
|
#: File attachments, as a list of tuples of URL and a description or
|
|
|
|
|
#: :obj:`None`. (Defaults to the empty list.)
|
2014-04-04 14:32:21 +04:00
|
|
|
|
#: Extracted from the ``<link rel=attachment>`` elements in HTML
|
|
|
|
|
#: and written to the ``/EmbeddedFiles`` dictionary in PDF.
|
2019-02-22 13:34:46 +03:00
|
|
|
|
#:
|
|
|
|
|
#: .. versionadded:: 0.22
|
2014-04-04 14:32:21 +04:00
|
|
|
|
self.attachments = attachments or []
|
2013-07-14 15:08:02 +04:00
|
|
|
|
|
|
|
|
|
|
2020-01-02 14:06:58 +03:00
|
|
|
|
class Document:
|
2020-12-11 00:47:27 +03:00
|
|
|
|
"""A rendered document ready to be painted in a pydyf stream.
|
2012-10-04 13:35:25 +04:00
|
|
|
|
|
2019-02-22 13:34:46 +03:00
|
|
|
|
Typically obtained from :meth:`HTML.render() <weasyprint.HTML.render>`, but
|
|
|
|
|
can also be instantiated directly with a list of :class:`pages <Page>`, a
|
|
|
|
|
set of :class:`metadata <DocumentMetadata>`, a :func:`url_fetcher
|
|
|
|
|
<weasyprint.default_url_fetcher>` function, and a :class:`font_config
|
|
|
|
|
<weasyprint.fonts.FontConfiguration>`.
|
2012-10-04 13:35:25 +04:00
|
|
|
|
|
|
|
|
|
"""
|
2019-07-23 08:12:08 +03:00
|
|
|
|
|
2012-10-02 20:59:02 +04:00
|
|
|
|
@classmethod
|
2020-04-18 01:19:35 +03:00
|
|
|
|
def _build_layout_context(cls, html, stylesheets,
|
2020-06-22 17:05:14 +03:00
|
|
|
|
presentational_hints=False,
|
|
|
|
|
optimize_images=False, font_config=None,
|
2020-06-22 17:32:12 +03:00
|
|
|
|
counter_style=None, image_cache=None):
|
2017-10-01 16:17:32 +03:00
|
|
|
|
if font_config is None:
|
|
|
|
|
font_config = FontConfiguration()
|
2019-12-24 16:39:40 +03:00
|
|
|
|
if counter_style is None:
|
2019-12-24 17:56:24 +03:00
|
|
|
|
counter_style = CounterStyle()
|
2018-03-28 01:34:34 +03:00
|
|
|
|
target_collector = TargetCollector()
|
2017-06-30 18:54:02 +03:00
|
|
|
|
page_rules = []
|
2018-03-24 01:57:33 +03:00
|
|
|
|
user_stylesheets = []
|
2020-06-22 17:32:12 +03:00
|
|
|
|
image_cache = {} if image_cache is None else image_cache
|
2018-03-24 01:57:33 +03:00
|
|
|
|
for css in stylesheets or []:
|
|
|
|
|
if not hasattr(css, 'matcher'):
|
|
|
|
|
css = CSS(
|
|
|
|
|
guess=css, media_type=html.media_type,
|
2019-12-24 16:39:40 +03:00
|
|
|
|
font_config=font_config, counter_style=counter_style)
|
2018-03-24 01:57:33 +03:00
|
|
|
|
user_stylesheets.append(css)
|
2018-08-17 11:30:51 +03:00
|
|
|
|
style_for = get_all_computed_styles(
|
2018-03-24 01:57:33 +03:00
|
|
|
|
html, user_stylesheets, presentational_hints, font_config,
|
2019-12-24 16:39:40 +03:00
|
|
|
|
counter_style, page_rules, target_collector)
|
2012-10-05 20:50:40 +04:00
|
|
|
|
get_image_from_uri = functools.partial(
|
2020-06-22 17:32:12 +03:00
|
|
|
|
original_get_image_from_uri, image_cache, html.url_fetcher,
|
|
|
|
|
optimize_images)
|
2019-01-04 01:02:44 +03:00
|
|
|
|
PROGRESS_LOGGER.info('Step 4 - Creating formatting structure')
|
2019-07-23 08:12:08 +03:00
|
|
|
|
context = LayoutContext(
|
2020-04-18 01:19:35 +03:00
|
|
|
|
style_for, get_image_from_uri, font_config, counter_style,
|
|
|
|
|
target_collector)
|
2019-07-23 08:12:08 +03:00
|
|
|
|
return context
|
|
|
|
|
|
|
|
|
|
@classmethod
|
2020-04-19 17:49:37 +03:00
|
|
|
|
def _render(cls, html, stylesheets, presentational_hints=False,
|
2020-07-31 15:46:36 +03:00
|
|
|
|
optimize_images=False, font_config=None, counter_style=None,
|
|
|
|
|
image_cache=None):
|
2019-07-23 08:12:08 +03:00
|
|
|
|
if font_config is None:
|
|
|
|
|
font_config = FontConfiguration()
|
|
|
|
|
|
2019-12-24 16:39:40 +03:00
|
|
|
|
if counter_style is None:
|
2019-12-24 17:56:24 +03:00
|
|
|
|
counter_style = CounterStyle()
|
2019-12-24 16:39:40 +03:00
|
|
|
|
|
2019-07-23 08:12:08 +03:00
|
|
|
|
context = cls._build_layout_context(
|
2020-07-31 15:46:36 +03:00
|
|
|
|
html, stylesheets, presentational_hints, optimize_images,
|
|
|
|
|
font_config, counter_style, image_cache)
|
2019-07-23 08:12:08 +03:00
|
|
|
|
|
2018-08-08 18:47:47 +03:00
|
|
|
|
root_box = build_formatting_structure(
|
2019-07-23 19:07:14 +03:00
|
|
|
|
html.etree_element, context.style_for, context.get_image_from_uri,
|
2019-12-24 16:39:40 +03:00
|
|
|
|
html.base_url, context.target_collector, counter_style)
|
2019-07-23 08:12:08 +03:00
|
|
|
|
|
|
|
|
|
page_boxes = layout_document(html, root_box, context)
|
2016-10-27 12:41:34 +03:00
|
|
|
|
rendering = cls(
|
2020-04-18 01:19:35 +03:00
|
|
|
|
[Page(page_box) for page_box in page_boxes],
|
2020-05-30 02:11:30 +03:00
|
|
|
|
DocumentMetadata(**get_html_metadata(html)),
|
2018-01-28 17:45:39 +03:00
|
|
|
|
html.url_fetcher, font_config)
|
2016-10-27 12:41:34 +03:00
|
|
|
|
return rendering
|
2012-10-02 20:59:02 +04:00
|
|
|
|
|
2020-06-07 16:18:00 +03:00
|
|
|
|
def _use_references(self, pdf, resources):
|
|
|
|
|
# XObjects
|
|
|
|
|
for key, x_object in resources.get('XObject', {}).items():
|
|
|
|
|
pdf.add_object(x_object)
|
|
|
|
|
resources['XObject'][key] = x_object.reference
|
2020-12-20 15:57:36 +03:00
|
|
|
|
if 'SMask' in x_object.extra:
|
|
|
|
|
pdf.add_object(x_object.extra['SMask'])
|
|
|
|
|
x_object.extra['SMask'] = x_object.extra['SMask'].reference
|
2020-06-07 16:18:00 +03:00
|
|
|
|
if 'Resources' in x_object.extra:
|
2020-12-19 22:06:16 +03:00
|
|
|
|
self._use_references(pdf, x_object.extra['Resources'])
|
2020-12-20 14:37:17 +03:00
|
|
|
|
if 'Font' in x_object.extra['Resources']:
|
|
|
|
|
x_object.extra['Resources']['Font'] = resources['Font']
|
2020-12-20 15:57:36 +03:00
|
|
|
|
x_object.extra['Resources']['Font'] = resources['Font']
|
2020-12-19 22:06:16 +03:00
|
|
|
|
pdf.add_object(x_object.extra['Resources'])
|
|
|
|
|
x_object.extra['Resources'] = (
|
|
|
|
|
x_object.extra['Resources'].reference)
|
2020-06-07 16:18:00 +03:00
|
|
|
|
# Patterns
|
|
|
|
|
for key, pattern in resources.get('Pattern', {}).items():
|
|
|
|
|
pdf.add_object(pattern)
|
|
|
|
|
resources['Pattern'][key] = pattern.reference
|
|
|
|
|
if 'Resources' in pattern.extra:
|
2020-12-19 22:06:16 +03:00
|
|
|
|
self._use_references(pdf, pattern.extra['Resources'])
|
2020-12-20 14:37:17 +03:00
|
|
|
|
if 'Font' in pattern.extra['Resources']:
|
|
|
|
|
pattern.extra['Resources']['Font'] = resources['Font']
|
2020-12-19 22:06:16 +03:00
|
|
|
|
pdf.add_object(pattern.extra['Resources'])
|
|
|
|
|
pattern.extra['Resources'] = (
|
|
|
|
|
pattern.extra['Resources'].reference)
|
2020-06-07 16:18:00 +03:00
|
|
|
|
# Shadings
|
|
|
|
|
for key, shading in resources.get('Shading', {}).items():
|
|
|
|
|
pdf.add_object(shading)
|
2020-06-08 17:34:28 +03:00
|
|
|
|
resources['Shading'][key] = shading.reference
|
2020-06-07 16:18:00 +03:00
|
|
|
|
|
2020-11-30 21:12:41 +03:00
|
|
|
|
# Alpha states
|
|
|
|
|
for key, alpha in resources.get('ExtGState', {}).items():
|
|
|
|
|
if 'SMask' in alpha and 'G' in alpha['SMask']:
|
|
|
|
|
alpha['SMask']['G'] = alpha['SMask']['G'].reference
|
|
|
|
|
|
2018-01-28 17:45:39 +03:00
|
|
|
|
def __init__(self, pages, metadata, url_fetcher, font_config):
|
2012-10-02 20:59:02 +04:00
|
|
|
|
#: A list of :class:`Page` objects.
|
|
|
|
|
self.pages = pages
|
2013-07-14 15:08:02 +04:00
|
|
|
|
#: A :class:`DocumentMetadata` object.
|
|
|
|
|
#: Contains information that does not belong to a specific page
|
|
|
|
|
#: but to the whole document.
|
|
|
|
|
self.metadata = metadata
|
2019-02-22 13:34:46 +03:00
|
|
|
|
#: A function or other callable with the same signature as
|
|
|
|
|
#: :func:`default_url_fetcher` called to fetch external resources such
|
|
|
|
|
#: as stylesheets and images. (See :ref:`url-fetchers`.)
|
2014-04-18 17:11:45 +04:00
|
|
|
|
self.url_fetcher = url_fetcher
|
2020-05-31 02:20:38 +03:00
|
|
|
|
#: A :obj:`dict` of fonts used by the document. Keys are hashes used to
|
|
|
|
|
#: identify fonts, values are :class:`Font` objects.
|
|
|
|
|
self.fonts = {}
|
2018-01-28 17:45:39 +03:00
|
|
|
|
# Keep a reference to font_config to avoid its garbage collection until
|
|
|
|
|
# rendering is destroyed. This is needed as font_config.__del__ removes
|
|
|
|
|
# fonts that may be used when rendering
|
|
|
|
|
self._font_config = font_config
|
2012-10-02 20:59:02 +04:00
|
|
|
|
|
|
|
|
|
def copy(self, pages='all'):
|
2012-10-05 20:50:40 +04:00
|
|
|
|
"""Take a subset of the pages.
|
|
|
|
|
|
2019-02-22 13:34:46 +03:00
|
|
|
|
.. versionadded:: 0.15
|
|
|
|
|
|
|
|
|
|
:type pages: :term:`iterable`
|
2012-10-05 20:50:40 +04:00
|
|
|
|
:param pages:
|
|
|
|
|
An iterable of :class:`Page` objects from :attr:`pages`.
|
|
|
|
|
:return:
|
|
|
|
|
A new :class:`Document` object.
|
|
|
|
|
|
2013-07-14 12:17:40 +04:00
|
|
|
|
Examples:
|
2012-10-05 20:50:40 +04:00
|
|
|
|
|
2013-07-14 12:17:40 +04:00
|
|
|
|
Write two PDF files for odd-numbered and even-numbered pages::
|
|
|
|
|
|
|
|
|
|
# Python lists count from 0 but pages are numbered from 1.
|
2012-10-05 20:50:40 +04:00
|
|
|
|
# [::2] is a slice of even list indexes but odd-numbered pages.
|
|
|
|
|
document.copy(document.pages[::2]).write_pdf('odd_pages.pdf')
|
|
|
|
|
document.copy(document.pages[1::2]).write_pdf('even_pages.pdf')
|
|
|
|
|
|
2013-07-14 12:17:40 +04:00
|
|
|
|
Combine multiple documents into one PDF file,
|
|
|
|
|
using metadata from the first::
|
|
|
|
|
|
2019-07-09 01:06:19 +03:00
|
|
|
|
all_pages = [p for doc in documents for p in doc.pages]
|
2013-07-14 12:17:40 +04:00
|
|
|
|
documents[0].copy(all_pages).write_pdf('combined.pdf')
|
|
|
|
|
|
2012-10-05 20:50:40 +04:00
|
|
|
|
"""
|
2012-10-02 20:59:02 +04:00
|
|
|
|
if pages == 'all':
|
|
|
|
|
pages = self.pages
|
2012-10-05 20:50:40 +04:00
|
|
|
|
elif not isinstance(pages, list):
|
|
|
|
|
pages = list(pages)
|
2018-01-28 17:45:39 +03:00
|
|
|
|
return type(self)(
|
|
|
|
|
pages, self.metadata, self.url_fetcher, self._font_config)
|
2012-10-02 20:59:02 +04:00
|
|
|
|
|
2020-04-09 02:46:11 +03:00
|
|
|
|
def write_pdf(self, target=None, zoom=1, attachments=None, finisher=None):
|
2020-12-11 00:47:27 +03:00
|
|
|
|
"""Paint the pages in a PDF file, with metadata.
|
2012-10-02 20:59:02 +04:00
|
|
|
|
|
2019-02-22 13:34:46 +03:00
|
|
|
|
:type target: str, pathlib.Path or file object
|
2012-10-05 20:50:40 +04:00
|
|
|
|
:param target:
|
2019-02-22 13:34:46 +03:00
|
|
|
|
A filename where the PDF file is generated, a file object, or
|
|
|
|
|
:obj:`None`.
|
2012-11-23 01:27:34 +04:00
|
|
|
|
:type zoom: float
|
|
|
|
|
:param zoom:
|
2017-04-28 21:36:14 +03:00
|
|
|
|
The zoom factor in PDF units per CSS units. **Warning**:
|
|
|
|
|
All CSS units are affected, including physical units like
|
|
|
|
|
``cm`` and named sizes like ``A4``. For values other than
|
2019-02-22 13:34:46 +03:00
|
|
|
|
1, the physical CSS units will thus be "wrong".
|
|
|
|
|
:type attachments: list
|
2014-04-22 22:40:46 +04:00
|
|
|
|
:param attachments: A list of additional file attachments for the
|
2014-04-26 01:35:43 +04:00
|
|
|
|
generated PDF document or :obj:`None`. The list's elements are
|
2019-02-22 13:34:46 +03:00
|
|
|
|
:class:`Attachment` objects, filenames, URLs or file-like objects.
|
2020-04-19 11:01:27 +03:00
|
|
|
|
:param finisher: A finisher function, that accepts the document and a
|
|
|
|
|
``pydyf.PDF`` object as parameters, can be passed to perform
|
|
|
|
|
post-processing on the PDF right before the trailer is written.
|
2012-10-05 20:50:40 +04:00
|
|
|
|
:returns:
|
2019-02-22 13:34:46 +03:00
|
|
|
|
The PDF as :obj:`bytes` if ``target`` is not provided or
|
|
|
|
|
:obj:`None`, otherwise :obj:`None` (the PDF is written to
|
|
|
|
|
``target``).
|
2012-10-02 20:59:02 +04:00
|
|
|
|
|
|
|
|
|
"""
|
2020-04-18 23:12:25 +03:00
|
|
|
|
# 0.75 = 72 PDF point per inch / 96 CSS pixel per inch
|
2012-11-23 01:27:34 +04:00
|
|
|
|
scale = zoom * 0.75
|
2018-08-06 18:38:02 +03:00
|
|
|
|
|
2020-04-19 17:49:37 +03:00
|
|
|
|
PROGRESS_LOGGER.info('Step 6 - Creating PDF')
|
|
|
|
|
|
|
|
|
|
pdf = pydyf.PDF()
|
|
|
|
|
alpha_states = pydyf.Dictionary()
|
2020-05-17 15:46:41 +03:00
|
|
|
|
x_objects = pydyf.Dictionary()
|
2020-06-07 01:32:47 +03:00
|
|
|
|
patterns = pydyf.Dictionary()
|
|
|
|
|
shadings = pydyf.Dictionary()
|
2020-05-17 15:46:41 +03:00
|
|
|
|
resources = pydyf.Dictionary({
|
2020-05-17 18:04:45 +03:00
|
|
|
|
'ExtGState': alpha_states,
|
2020-06-07 16:18:00 +03:00
|
|
|
|
'XObject': x_objects,
|
2020-06-07 01:32:47 +03:00
|
|
|
|
'Pattern': patterns,
|
|
|
|
|
'Shading': shadings,
|
2020-05-17 15:46:41 +03:00
|
|
|
|
})
|
2020-04-19 17:49:37 +03:00
|
|
|
|
pdf.add_object(resources)
|
|
|
|
|
pdf_names = pydyf.Array()
|
2018-08-06 18:38:02 +03:00
|
|
|
|
|
2020-04-21 23:47:55 +03:00
|
|
|
|
# Links and anchors
|
2020-05-17 17:59:58 +03:00
|
|
|
|
page_links_and_anchors = list(resolve_links(self.pages))
|
2020-04-21 23:30:38 +03:00
|
|
|
|
attachment_links = [
|
|
|
|
|
[link for link in page_links if link[0] == 'attachment']
|
2020-05-17 17:59:58 +03:00
|
|
|
|
for page_links, page_anchors in page_links_and_anchors]
|
2020-04-21 23:30:38 +03:00
|
|
|
|
|
2020-04-21 23:47:55 +03:00
|
|
|
|
# Annotations
|
|
|
|
|
annot_files = {}
|
2020-04-21 23:30:38 +03:00
|
|
|
|
# A single link can be split in multiple regions. We don't want to
|
|
|
|
|
# embed a file multiple times of course, so keep a reference to every
|
|
|
|
|
# embedded URL and reuse the object number.
|
|
|
|
|
for page_links in attachment_links:
|
2020-10-23 14:08:29 +03:00
|
|
|
|
for link_type, annot_target, rectangle, _ in page_links:
|
2020-04-21 23:30:38 +03:00
|
|
|
|
if link_type == 'attachment' and target not in annot_files:
|
2020-04-21 23:47:55 +03:00
|
|
|
|
# TODO: Use the title attribute as description. The comment
|
|
|
|
|
# above about multiple regions won't always be correct,
|
|
|
|
|
# because two links might have the same href, but different
|
|
|
|
|
# titles.
|
2020-04-21 23:30:38 +03:00
|
|
|
|
annot_files[annot_target] = _write_pdf_attachment(
|
|
|
|
|
pdf, (annot_target, None), self.url_fetcher)
|
|
|
|
|
|
2020-04-21 23:47:55 +03:00
|
|
|
|
# Bookmarks
|
|
|
|
|
root = []
|
|
|
|
|
# At one point in the document, for each "output" depth, how much
|
|
|
|
|
# to add to get the source level (CSS values of bookmark-level).
|
|
|
|
|
# E.g. with <h1> then <h3>, level_shifts == [0, 1]
|
|
|
|
|
# 1 means that <h3> has depth 3 - 1 = 2 in the output.
|
|
|
|
|
skipped_levels = []
|
|
|
|
|
last_by_depth = [root]
|
|
|
|
|
previous_level = 0
|
|
|
|
|
|
|
|
|
|
for page_number, (page, links_and_anchors, page_links) in enumerate(
|
2020-05-17 17:59:58 +03:00
|
|
|
|
zip(self.pages, page_links_and_anchors, attachment_links)):
|
2020-04-21 23:47:55 +03:00
|
|
|
|
# Draw from the top-left corner
|
|
|
|
|
matrix = Matrix(scale, 0, 0, -scale, 0, page.height * scale)
|
|
|
|
|
|
|
|
|
|
# Links and anchors
|
2018-09-24 16:27:24 +03:00
|
|
|
|
links, anchors = links_and_anchors
|
2020-04-19 01:47:19 +03:00
|
|
|
|
|
2020-04-19 10:55:39 +03:00
|
|
|
|
page_width = scale * (
|
|
|
|
|
page.width + page.bleed['left'] + page.bleed['right'])
|
|
|
|
|
page_height = scale * (
|
|
|
|
|
page.height + page.bleed['top'] + page.bleed['bottom'])
|
|
|
|
|
left = -scale * page.bleed['left']
|
|
|
|
|
top = -scale * page.bleed['top']
|
2020-04-19 17:49:37 +03:00
|
|
|
|
right = left + page_width
|
|
|
|
|
bottom = top + page_height
|
2020-04-18 01:19:35 +03:00
|
|
|
|
|
2020-06-07 01:32:47 +03:00
|
|
|
|
page_rectangle = (
|
|
|
|
|
left / scale, top / scale, right / scale, bottom / scale)
|
|
|
|
|
stream = Context(
|
|
|
|
|
self, page_rectangle, alpha_states, x_objects, patterns,
|
|
|
|
|
shadings)
|
2020-04-19 15:40:30 +03:00
|
|
|
|
stream.transform(1, 0, 0, -1, 0, page.height * scale)
|
2020-04-18 01:19:35 +03:00
|
|
|
|
page.paint(stream, scale=scale)
|
2020-04-19 17:49:37 +03:00
|
|
|
|
pdf.add_object(stream)
|
2020-04-18 01:19:35 +03:00
|
|
|
|
|
2020-04-19 01:47:19 +03:00
|
|
|
|
pdf_page = pydyf.Dictionary({
|
2020-04-18 01:19:35 +03:00
|
|
|
|
'Type': '/Page',
|
2020-04-19 17:49:37 +03:00
|
|
|
|
'Parent': pdf.pages.reference,
|
|
|
|
|
'MediaBox': pydyf.Array([left, top, right, bottom]),
|
2020-04-18 01:19:35 +03:00
|
|
|
|
'Contents': stream.reference,
|
|
|
|
|
'Resources': resources.reference,
|
2020-04-19 01:47:19 +03:00
|
|
|
|
})
|
2020-04-19 17:49:37 +03:00
|
|
|
|
pdf.add_page(pdf_page)
|
|
|
|
|
|
|
|
|
|
add_hyperlinks(links, anchors, matrix, pdf, pdf_page, pdf_names)
|
|
|
|
|
|
2020-04-21 23:47:55 +03:00
|
|
|
|
# Bleed
|
2020-04-19 17:49:37 +03:00
|
|
|
|
bleed = {key: value * 0.75 for key, value in page.bleed.items()}
|
|
|
|
|
|
|
|
|
|
trim_left = left + bleed['left']
|
|
|
|
|
trim_top = top + bleed['top']
|
|
|
|
|
trim_right = right - bleed['right']
|
|
|
|
|
trim_bottom = bottom - bleed['bottom']
|
2020-04-19 01:47:19 +03:00
|
|
|
|
|
2020-04-19 17:49:37 +03:00
|
|
|
|
# Arbitrarly set PDF BleedBox between CSS bleed box (MediaBox) and
|
|
|
|
|
# CSS page box (TrimBox) at most 10 points from the TrimBox.
|
|
|
|
|
bleed_left = trim_left - min(10, bleed['left'])
|
|
|
|
|
bleed_top = trim_top - min(10, bleed['top'])
|
|
|
|
|
bleed_right = trim_right + min(10, bleed['right'])
|
|
|
|
|
bleed_bottom = trim_bottom + min(10, bleed['bottom'])
|
2020-04-19 01:47:19 +03:00
|
|
|
|
|
2020-04-19 17:49:37 +03:00
|
|
|
|
pdf_page['TrimBox'] = pydyf.Array([
|
|
|
|
|
trim_left, trim_top, trim_right, trim_bottom])
|
|
|
|
|
pdf_page['BleedBox'] = pydyf.Array([
|
|
|
|
|
bleed_left, bleed_top, bleed_right, bleed_bottom])
|
2012-10-02 20:59:02 +04:00
|
|
|
|
|
2020-04-21 23:47:55 +03:00
|
|
|
|
# Annotations
|
2020-04-21 23:30:38 +03:00
|
|
|
|
# TODO: splitting a link into multiple independent rectangular
|
|
|
|
|
# annotations works well for pure links, but rather mediocre for
|
|
|
|
|
# other annotations and fails completely for transformed (CSS) or
|
|
|
|
|
# complex link shapes (area). It would be better to use /AP for all
|
|
|
|
|
# links and coalesce link shapes that originate from the same HTML
|
|
|
|
|
# link. This would give a feeling similiar to what browsers do with
|
|
|
|
|
# links that span multiple lines.
|
2020-10-23 14:08:29 +03:00
|
|
|
|
for link_type, annot_target, rectangle, _ in page_links:
|
2020-04-21 23:30:38 +03:00
|
|
|
|
annot_file = annot_files[annot_target]
|
|
|
|
|
if link_type == 'attachment' and annot_file is not None:
|
|
|
|
|
rectangle = (
|
|
|
|
|
*matrix.transform_point(*rectangle[:2]),
|
|
|
|
|
*matrix.transform_point(*rectangle[2:]))
|
|
|
|
|
annot = pydyf.Dictionary({
|
|
|
|
|
'Type': '/Annot',
|
|
|
|
|
'Rect': pydyf.Array(rectangle),
|
|
|
|
|
'Subtype': '/FileAttachment',
|
|
|
|
|
'T': pydyf.String(),
|
|
|
|
|
'FS': annot_file.reference,
|
|
|
|
|
'AP': pydyf.Dictionary({'N': pydyf.Stream([], {
|
|
|
|
|
'Type': '/XObject',
|
|
|
|
|
'Subtype': '/Form',
|
|
|
|
|
'BBox': pydyf.Array(rectangle),
|
|
|
|
|
'Length': 0,
|
|
|
|
|
})})
|
|
|
|
|
})
|
|
|
|
|
pdf.add_object(annot)
|
2020-05-16 18:05:48 +03:00
|
|
|
|
if 'Annots' not in pdf_page:
|
|
|
|
|
pdf_page['Annots'] = pydyf.Array()
|
2020-04-21 23:30:38 +03:00
|
|
|
|
pdf_page['Annots'].append(annot.reference)
|
|
|
|
|
|
2020-04-21 23:47:55 +03:00
|
|
|
|
# Bookmarks
|
2020-04-19 17:49:37 +03:00
|
|
|
|
for level, label, (point_x, point_y), state in page.bookmarks:
|
|
|
|
|
if level > previous_level:
|
|
|
|
|
# Example: if the previous bookmark is a <h2>, the next
|
|
|
|
|
# depth "should" be for <h3>. If now we get a <h6> we’re
|
|
|
|
|
# skipping two levels: append 6 - 3 - 1 = 2
|
|
|
|
|
skipped_levels.append(level - previous_level - 1)
|
|
|
|
|
else:
|
|
|
|
|
temp = level
|
|
|
|
|
while temp < previous_level:
|
|
|
|
|
temp += 1 + skipped_levels.pop()
|
|
|
|
|
if temp > previous_level:
|
|
|
|
|
# We remove too many "skips", add some back:
|
|
|
|
|
skipped_levels.append(temp - previous_level - 1)
|
|
|
|
|
|
|
|
|
|
previous_level = level
|
|
|
|
|
depth = level - sum(skipped_levels)
|
|
|
|
|
assert depth == len(skipped_levels)
|
|
|
|
|
assert depth >= 1
|
|
|
|
|
|
|
|
|
|
children = []
|
|
|
|
|
point_x, point_y = matrix.transform_point(point_x, point_y)
|
|
|
|
|
subtree = BookmarkSubtree(
|
|
|
|
|
label, (page_number, point_x, point_y), children, state)
|
|
|
|
|
last_by_depth[depth - 1].append(subtree)
|
|
|
|
|
del last_by_depth[depth:]
|
|
|
|
|
last_by_depth.append(children)
|
|
|
|
|
|
2020-05-17 18:12:16 +03:00
|
|
|
|
# Outlines
|
2020-04-19 17:49:37 +03:00
|
|
|
|
outlines, count = create_bookmarks(root, pdf)
|
2020-04-22 00:07:35 +03:00
|
|
|
|
if outlines:
|
2020-05-12 22:53:54 +03:00
|
|
|
|
outlines_dictionary = pydyf.Dictionary({
|
2020-04-22 00:07:35 +03:00
|
|
|
|
'Count': count,
|
|
|
|
|
'First': outlines[0].reference,
|
|
|
|
|
'Last': outlines[-1].reference,
|
|
|
|
|
})
|
2020-05-12 22:53:54 +03:00
|
|
|
|
pdf.add_object(outlines_dictionary)
|
|
|
|
|
for outline in outlines:
|
|
|
|
|
outline['Parent'] = outlines_dictionary.reference
|
2020-05-16 17:19:28 +03:00
|
|
|
|
pdf.catalog['Outlines'] = outlines_dictionary.reference
|
2020-04-19 17:49:37 +03:00
|
|
|
|
|
2020-04-21 23:47:55 +03:00
|
|
|
|
PROGRESS_LOGGER.info('Step 7 - Adding PDF metadata')
|
|
|
|
|
|
2020-05-06 08:42:45 +03:00
|
|
|
|
# PDF information
|
2020-04-21 23:47:55 +03:00
|
|
|
|
if self.metadata.title:
|
|
|
|
|
pdf.info['Title'] = pydyf.String(self.metadata.title)
|
|
|
|
|
if self.metadata.authors:
|
|
|
|
|
pdf.info['Author'] = pydyf.String(
|
|
|
|
|
', '.join(self.metadata.authors))
|
|
|
|
|
if self.metadata.description:
|
|
|
|
|
pdf.info['Subject'] = pydyf.String(self.metadata.description)
|
|
|
|
|
if self.metadata.keywords:
|
|
|
|
|
pdf.info['Keywords'] = pydyf.String(
|
|
|
|
|
', '.join(self.metadata.keywords))
|
|
|
|
|
if self.metadata.generator:
|
|
|
|
|
pdf.info['Creator'] = pydyf.String(self.metadata.generator)
|
|
|
|
|
pdf.info['Producer'] = pydyf.String(f'WeasyPrint {__version__}')
|
|
|
|
|
if self.metadata.created:
|
|
|
|
|
pdf.info['CreationDate'] = pydyf.String(
|
|
|
|
|
_w3c_date_to_pdf(self.metadata.created, 'created'))
|
|
|
|
|
if self.metadata.modified:
|
|
|
|
|
pdf.info['ModDate'] = pydyf.String(
|
|
|
|
|
_w3c_date_to_pdf(self.metadata.modified, 'modified'))
|
|
|
|
|
|
2020-05-06 08:42:45 +03:00
|
|
|
|
# Embedded files
|
2020-04-21 23:30:38 +03:00
|
|
|
|
attachments = self.metadata.attachments + (attachments or [])
|
2020-05-06 08:42:45 +03:00
|
|
|
|
pdf_attachments = []
|
|
|
|
|
for attachment in attachments:
|
|
|
|
|
pdf_attachment = _write_pdf_attachment(
|
|
|
|
|
pdf, attachment, self.url_fetcher)
|
|
|
|
|
if pdf_attachment is not None:
|
|
|
|
|
pdf_attachments.append(pdf_attachment)
|
|
|
|
|
if pdf_attachments:
|
|
|
|
|
content = pydyf.Dictionary({'Names': pydyf.Array()})
|
|
|
|
|
for i, pdf_attachment in enumerate(pdf_attachments):
|
|
|
|
|
content['Names'].append(pydyf.String(f'attachment{i}'))
|
|
|
|
|
content['Names'].append(pdf_attachment.reference)
|
|
|
|
|
pdf.add_object(content)
|
2020-05-16 17:25:06 +03:00
|
|
|
|
if 'Names' not in pdf.catalog:
|
2020-05-16 18:05:48 +03:00
|
|
|
|
pdf.catalog['Names'] = pydyf.Dictionary()
|
2020-05-06 08:42:45 +03:00
|
|
|
|
pdf.catalog['Names']['EmbeddedFiles'] = content.reference
|
|
|
|
|
|
|
|
|
|
# Embeded fonts
|
2020-12-20 14:37:17 +03:00
|
|
|
|
fonts = pydyf.Dictionary()
|
2020-06-01 02:12:32 +03:00
|
|
|
|
for font in self.fonts.values():
|
2020-05-29 20:43:56 +03:00
|
|
|
|
# Optimize font
|
2020-05-30 01:30:13 +03:00
|
|
|
|
try:
|
|
|
|
|
full_font = io.BytesIO(font.file_content)
|
|
|
|
|
optimized_font = io.BytesIO()
|
|
|
|
|
ttfont = TTFont(full_font)
|
|
|
|
|
options = subset.Options(
|
|
|
|
|
retain_gids=True, passthrough_tables=True)
|
|
|
|
|
subsetter = subset.Subsetter(options)
|
|
|
|
|
subsetter.populate(gids=font.cmap)
|
|
|
|
|
subsetter.subset(ttfont)
|
|
|
|
|
ttfont.save(optimized_font)
|
|
|
|
|
content = optimized_font.getvalue()
|
|
|
|
|
except TTLibError:
|
|
|
|
|
content = font.file_content
|
2020-05-29 20:43:56 +03:00
|
|
|
|
|
|
|
|
|
# Include font
|
|
|
|
|
font_type = 'otf' if content[:4] == b'OTTO' else 'ttf'
|
2020-05-10 01:14:56 +03:00
|
|
|
|
if font_type == 'otf':
|
2020-06-01 12:48:17 +03:00
|
|
|
|
font_extra = pydyf.Dictionary({'Subtype': '/OpenType'})
|
2020-05-12 14:11:52 +03:00
|
|
|
|
else:
|
2020-06-01 12:48:17 +03:00
|
|
|
|
font_extra = pydyf.Dictionary({'Length1': len(content)})
|
2020-12-13 00:44:41 +03:00
|
|
|
|
font_stream = pydyf.Stream([content], font_extra, compress=True)
|
2020-05-06 08:42:45 +03:00
|
|
|
|
pdf.add_object(font_stream)
|
|
|
|
|
|
2020-05-12 03:00:07 +03:00
|
|
|
|
widths = pydyf.Array()
|
2020-05-10 19:09:06 +03:00
|
|
|
|
for i in sorted(font.widths):
|
|
|
|
|
if i - 1 not in font.widths:
|
|
|
|
|
widths.append(i)
|
2020-05-12 03:00:07 +03:00
|
|
|
|
current_widths = pydyf.Array()
|
2020-05-10 19:09:06 +03:00
|
|
|
|
widths.append(current_widths)
|
|
|
|
|
current_widths.append(font.widths[i])
|
2020-12-19 21:02:24 +03:00
|
|
|
|
font_descriptor = pydyf.Dictionary({
|
|
|
|
|
'Type': '/FontDescriptor',
|
|
|
|
|
'FontName': font.name,
|
|
|
|
|
'FontFamily': pydyf.String(font.family),
|
|
|
|
|
'Flags': 32,
|
|
|
|
|
'FontBBox': pydyf.Array(font.bbox),
|
|
|
|
|
'ItalicAngle': font.italic_angle,
|
|
|
|
|
'Ascent': font.ascent,
|
|
|
|
|
'Descent': font.descent,
|
|
|
|
|
'CapHeight': font.bbox[3],
|
|
|
|
|
'StemV': font.stemv,
|
|
|
|
|
'StemH': font.stemh,
|
|
|
|
|
(f'FontFile{"3" if font_type == "otf" else "2"}'):
|
|
|
|
|
font_stream.reference,
|
|
|
|
|
})
|
|
|
|
|
if font_type == 'otf':
|
|
|
|
|
font_descriptor['Subtype'] = '/OpenType'
|
|
|
|
|
pdf.add_object(font_descriptor)
|
2020-05-08 18:14:45 +03:00
|
|
|
|
subfont_dictionary = pydyf.Dictionary({
|
2020-05-06 08:42:45 +03:00
|
|
|
|
'Type': '/Font',
|
2020-05-30 02:11:30 +03:00
|
|
|
|
'Subtype': f'/CIDFontType{"0" if font_type == "otf" else "2"}',
|
2020-05-08 02:55:50 +03:00
|
|
|
|
'BaseFont': font.name,
|
2020-05-08 18:14:45 +03:00
|
|
|
|
'CIDSystemInfo': pydyf.Dictionary({
|
|
|
|
|
'Registry': pydyf.String('Adobe'),
|
|
|
|
|
'Ordering': pydyf.String('Identity'),
|
|
|
|
|
'Supplement': 0,
|
|
|
|
|
}),
|
2020-05-12 03:00:07 +03:00
|
|
|
|
'W': widths,
|
2020-12-19 21:02:24 +03:00
|
|
|
|
'FontDescriptor': font_descriptor.reference,
|
2020-05-08 18:14:45 +03:00
|
|
|
|
})
|
|
|
|
|
pdf.add_object(subfont_dictionary)
|
2020-05-12 19:38:12 +03:00
|
|
|
|
to_unicode = pydyf.Stream([
|
2020-06-01 02:12:32 +03:00
|
|
|
|
b'/CIDInit /ProcSet findresource begin',
|
|
|
|
|
b'12 dict begin',
|
|
|
|
|
b'begincmap',
|
|
|
|
|
b'/CIDSystemInfo',
|
|
|
|
|
b'<< /Registry (Adobe)',
|
|
|
|
|
b'/Ordering (UCS)',
|
|
|
|
|
b'/Supplement 0',
|
|
|
|
|
b'>> def',
|
|
|
|
|
b'/CMapName /Adobe-Identity-UCS def',
|
|
|
|
|
b'/CMapType 2 def',
|
|
|
|
|
b'1 begincodespacerange',
|
|
|
|
|
b'<0000> <ffff>',
|
|
|
|
|
b'endcodespacerange',
|
|
|
|
|
f'{len(font.cmap)} beginbfchar'.encode('ascii')])
|
2020-05-12 19:38:12 +03:00
|
|
|
|
for glyph, text in font.cmap.items():
|
|
|
|
|
unicode_codepoints = ''.join(
|
|
|
|
|
f'{letter.encode("utf-16-be").hex()}' for letter in text)
|
|
|
|
|
to_unicode.stream.append(
|
2020-06-01 02:12:32 +03:00
|
|
|
|
f'<{glyph:04x}> <{unicode_codepoints}>'.encode('ascii'))
|
2020-05-12 19:38:12 +03:00
|
|
|
|
to_unicode.stream.extend([
|
2020-06-01 02:12:32 +03:00
|
|
|
|
b'endbfchar',
|
|
|
|
|
b'endcmap',
|
|
|
|
|
b'CMapName currentdict /CMap defineresource pop',
|
|
|
|
|
b'end',
|
|
|
|
|
b'end'])
|
2020-05-13 00:47:22 +03:00
|
|
|
|
pdf.add_object(to_unicode)
|
2020-05-08 18:14:45 +03:00
|
|
|
|
font_dictionary = pydyf.Dictionary({
|
|
|
|
|
'Type': '/Font',
|
|
|
|
|
'Subtype': '/Type0',
|
|
|
|
|
'BaseFont': font.name,
|
|
|
|
|
'Encoding': '/Identity-H',
|
|
|
|
|
'DescendantFonts': pydyf.Array([subfont_dictionary.reference]),
|
2020-05-13 00:47:22 +03:00
|
|
|
|
'ToUnicode': to_unicode.reference,
|
2020-05-06 08:42:45 +03:00
|
|
|
|
})
|
|
|
|
|
pdf.add_object(font_dictionary)
|
2020-12-20 14:37:17 +03:00
|
|
|
|
fonts[font.hash] = font_dictionary.reference
|
2020-04-21 23:30:38 +03:00
|
|
|
|
|
2020-12-20 14:37:17 +03:00
|
|
|
|
pdf.add_object(fonts)
|
|
|
|
|
resources['Font'] = fonts.reference
|
2020-06-07 16:18:00 +03:00
|
|
|
|
self._use_references(pdf, resources)
|
2020-05-17 15:46:41 +03:00
|
|
|
|
|
2020-05-17 18:12:23 +03:00
|
|
|
|
# Anchors
|
|
|
|
|
if pdf_names:
|
|
|
|
|
pdf.catalog['Names'] = pydyf.Dictionary(
|
|
|
|
|
{'Dests': pydyf.Dictionary({'Names': pdf_names})})
|
|
|
|
|
|
2020-04-19 11:01:27 +03:00
|
|
|
|
if finisher:
|
2020-04-19 17:49:37 +03:00
|
|
|
|
finisher(self, pdf)
|
2020-04-19 11:01:27 +03:00
|
|
|
|
|
2020-04-18 01:19:35 +03:00
|
|
|
|
file_obj = io.BytesIO()
|
2020-04-19 17:49:37 +03:00
|
|
|
|
pdf.write(file_obj)
|
2012-10-02 20:59:02 +04:00
|
|
|
|
|
|
|
|
|
if target is None:
|
|
|
|
|
return file_obj.getvalue()
|
|
|
|
|
else:
|
|
|
|
|
file_obj.seek(0)
|
|
|
|
|
if hasattr(target, 'write'):
|
|
|
|
|
shutil.copyfileobj(file_obj, target)
|
|
|
|
|
else:
|
|
|
|
|
with open(target, 'wb') as fd:
|
|
|
|
|
shutil.copyfileobj(file_obj, fd)
|