2012-10-02 20:59:02 +04:00
|
|
|
|
"""
|
|
|
|
|
weasyprint.document
|
|
|
|
|
-------------------
|
|
|
|
|
|
2019-03-04 13:04:06 +03:00
|
|
|
|
:copyright: Copyright 2011-2019 Simon Sapin and contributors, see AUTHORS.
|
2012-10-02 20:59:02 +04:00
|
|
|
|
:license: BSD, see LICENSE for details.
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
2019-05-24 00:55:56 +03:00
|
|
|
|
import collections
|
2017-03-25 02:33:36 +03:00
|
|
|
|
import functools
|
2012-10-02 20:59:02 +04:00
|
|
|
|
import io
|
|
|
|
|
import math
|
|
|
|
|
import shutil
|
2018-04-13 11:44:19 +03:00
|
|
|
|
import warnings
|
2012-10-02 20:59:02 +04:00
|
|
|
|
|
2012-12-29 04:00:30 +04:00
|
|
|
|
import cairocffi as cairo
|
2019-07-23 08:12:08 +03:00
|
|
|
|
from weasyprint.layout import LayoutContext
|
2012-10-02 20:59:02 +04:00
|
|
|
|
|
2019-06-02 19:06:25 +03:00
|
|
|
|
from . import CSS
|
2012-10-02 20:59:02 +04:00
|
|
|
|
from .css import get_all_computed_styles
|
2018-03-28 01:34:34 +03:00
|
|
|
|
from .css.targets import TargetCollector
|
2017-03-25 02:33:36 +03:00
|
|
|
|
from .draw import draw_page, stacked
|
2016-10-27 18:36:24 +03:00
|
|
|
|
from .fonts import FontConfiguration
|
2012-10-04 13:35:25 +04:00
|
|
|
|
from .formatting_structure import boxes
|
2012-10-02 20:59:02 +04:00
|
|
|
|
from .formatting_structure.build import build_formatting_structure
|
2018-08-06 18:38:02 +03:00
|
|
|
|
from .html import W3C_DATE_RE
|
2018-01-07 03:46:39 +03:00
|
|
|
|
from .images import get_image_from_uri as original_get_image_from_uri
|
2012-10-02 20:59:02 +04:00
|
|
|
|
from .layout import layout_document
|
2019-06-02 19:06:25 +03:00
|
|
|
|
from .layout.percentages import percentage
|
2019-01-04 01:02:44 +03:00
|
|
|
|
from .logger import LOGGER, PROGRESS_LOGGER
|
2012-10-02 20:59:02 +04:00
|
|
|
|
from .pdf import write_pdf_metadata
|
|
|
|
|
|
2018-04-13 11:44:19 +03:00
|
|
|
|
if cairo.cairo_version() < 11504:
|
|
|
|
|
warnings.warn(
|
2018-08-06 18:38:02 +03:00
|
|
|
|
'There are known rendering problems and missing features with '
|
|
|
|
|
'cairo < 1.15.4. WeasyPrint may work with older versions, but please '
|
|
|
|
|
'read the note about the needed cairo version on the "Install" page '
|
|
|
|
|
'of the documentation before reporting bugs. '
|
|
|
|
|
'http://weasyprint.readthedocs.io/en/latest/install.html')
|
2018-04-13 11:44:19 +03:00
|
|
|
|
|
2012-10-02 20:59:02 +04:00
|
|
|
|
|
2012-10-06 13:26:55 +04:00
|
|
|
|
def _get_matrix(box):
|
|
|
|
|
"""Return the matrix for the CSS transforms on this box.
|
|
|
|
|
|
2013-03-28 19:41:51 +04:00
|
|
|
|
:returns: a :class:`cairocffi.Matrix` object or :obj:`None`.
|
2012-10-06 13:26:55 +04:00
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
# "Transforms apply to block-level and atomic inline-level elements,
|
|
|
|
|
# but do not apply to elements which may be split into
|
|
|
|
|
# multiple inline-level boxes."
|
|
|
|
|
# http://www.w3.org/TR/css3-2d-transforms/#introduction
|
2018-01-13 19:05:23 +03:00
|
|
|
|
if box.style['transform'] and not isinstance(box, boxes.InlineBox):
|
2012-10-06 13:26:55 +04:00
|
|
|
|
border_width = box.border_width()
|
|
|
|
|
border_height = box.border_height()
|
2018-01-13 19:05:23 +03:00
|
|
|
|
origin_x, origin_y = box.style['transform_origin']
|
2019-06-02 19:06:25 +03:00
|
|
|
|
offset_x = percentage(origin_x, border_width)
|
|
|
|
|
offset_y = percentage(origin_y, border_height)
|
2019-06-01 02:32:13 +03:00
|
|
|
|
origin_x = box.border_box_x() + offset_x
|
|
|
|
|
origin_y = box.border_box_y() + offset_y
|
2012-10-06 13:26:55 +04:00
|
|
|
|
|
|
|
|
|
matrix = cairo.Matrix()
|
|
|
|
|
matrix.translate(origin_x, origin_y)
|
2018-01-13 19:05:23 +03:00
|
|
|
|
for name, args in box.style['transform']:
|
2012-10-06 13:26:55 +04:00
|
|
|
|
if name == 'scale':
|
|
|
|
|
matrix.scale(*args)
|
|
|
|
|
elif name == 'rotate':
|
|
|
|
|
matrix.rotate(args)
|
|
|
|
|
elif name == 'translate':
|
|
|
|
|
translate_x, translate_y = args
|
|
|
|
|
matrix.translate(
|
2019-06-02 19:06:25 +03:00
|
|
|
|
percentage(translate_x, border_width),
|
|
|
|
|
percentage(translate_y, border_height),
|
2012-10-06 13:26:55 +04:00
|
|
|
|
)
|
|
|
|
|
else:
|
|
|
|
|
if name == 'skewx':
|
|
|
|
|
args = (1, 0, math.tan(args), 1, 0, 0)
|
|
|
|
|
elif name == 'skewy':
|
|
|
|
|
args = (1, math.tan(args), 0, 1, 0, 0)
|
|
|
|
|
else:
|
|
|
|
|
assert name == 'matrix'
|
|
|
|
|
matrix = cairo.Matrix(*args) * matrix
|
|
|
|
|
matrix.translate(-origin_x, -origin_y)
|
|
|
|
|
box.transformation_matrix = matrix
|
|
|
|
|
return matrix
|
|
|
|
|
|
|
|
|
|
|
2012-10-07 00:09:17 +04:00
|
|
|
|
def rectangle_aabb(matrix, pos_x, pos_y, width, height):
|
|
|
|
|
"""Apply a transformation matrix to an axis-aligned rectangle
|
|
|
|
|
and return its axis-aligned bounding box as ``(x, y, width, height)``
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
transform_point = matrix.transform_point
|
|
|
|
|
x1, y1 = transform_point(pos_x, pos_y)
|
|
|
|
|
x2, y2 = transform_point(pos_x + width, pos_y)
|
|
|
|
|
x3, y3 = transform_point(pos_x, pos_y + height)
|
|
|
|
|
x4, y4 = transform_point(pos_x + width, pos_y + height)
|
|
|
|
|
box_x1 = min(x1, x2, x3, x4)
|
|
|
|
|
box_y1 = min(y1, y2, y3, y4)
|
|
|
|
|
box_x2 = max(x1, x2, x3, x4)
|
|
|
|
|
box_y2 = max(y1, y2, y3, y4)
|
|
|
|
|
return box_x1, box_y1, box_x2 - box_x1, box_y2 - box_y1
|
|
|
|
|
|
|
|
|
|
|
2013-07-14 15:08:02 +04:00
|
|
|
|
def _gather_links_and_bookmarks(box, bookmarks, links, anchors, matrix):
|
|
|
|
|
transform = _get_matrix(box)
|
|
|
|
|
if transform:
|
|
|
|
|
matrix = transform * matrix if matrix else transform
|
|
|
|
|
|
2017-07-22 14:51:03 +03:00
|
|
|
|
bookmark_label = box.bookmark_label
|
|
|
|
|
if box.style['bookmark_level'] == 'none':
|
2015-05-01 22:14:10 +03:00
|
|
|
|
bookmark_level = None
|
|
|
|
|
else:
|
2017-07-22 14:51:03 +03:00
|
|
|
|
bookmark_level = box.style['bookmark_level']
|
2019-05-18 01:00:37 +03:00
|
|
|
|
state = box.style['bookmark_state']
|
2018-01-13 19:05:23 +03:00
|
|
|
|
link = box.style['link']
|
|
|
|
|
anchor_name = box.style['anchor']
|
2012-10-04 13:35:25 +04:00
|
|
|
|
has_bookmark = bookmark_label and bookmark_level
|
|
|
|
|
# 'link' is inherited but redundant on text boxes
|
|
|
|
|
has_link = link and not isinstance(box, boxes.TextBox)
|
|
|
|
|
# In case of duplicate IDs, only the first is an anchor.
|
|
|
|
|
has_anchor = anchor_name and anchor_name not in anchors
|
2014-04-23 19:10:31 +04:00
|
|
|
|
is_attachment = hasattr(box, 'is_attachment') and box.is_attachment
|
2012-10-04 13:35:25 +04:00
|
|
|
|
|
|
|
|
|
if has_bookmark or has_link or has_anchor:
|
|
|
|
|
pos_x, pos_y, width, height = box.hit_area()
|
|
|
|
|
if has_link:
|
2018-04-16 14:20:52 +03:00
|
|
|
|
token_type, link = link
|
|
|
|
|
assert token_type == 'url'
|
2012-10-04 13:35:25 +04:00
|
|
|
|
link_type, target = link
|
2018-01-14 03:48:17 +03:00
|
|
|
|
assert isinstance(target, str)
|
2014-04-04 20:46:00 +04:00
|
|
|
|
if link_type == 'external' and is_attachment:
|
|
|
|
|
link_type = 'attachment'
|
2012-10-07 00:09:17 +04:00
|
|
|
|
if matrix:
|
2017-07-01 01:28:14 +03:00
|
|
|
|
link = (
|
|
|
|
|
link_type, target, rectangle_aabb(
|
|
|
|
|
matrix, pos_x, pos_y, width, height))
|
2012-10-07 00:09:17 +04:00
|
|
|
|
else:
|
2017-07-01 01:28:14 +03:00
|
|
|
|
link = (link_type, target, (pos_x, pos_y, width, height))
|
2012-10-04 13:35:25 +04:00
|
|
|
|
links.append(link)
|
2012-10-07 00:09:17 +04:00
|
|
|
|
if matrix and (has_bookmark or has_anchor):
|
|
|
|
|
pos_x, pos_y = matrix.transform_point(pos_x, pos_y)
|
|
|
|
|
if has_bookmark:
|
2019-05-24 00:55:56 +03:00
|
|
|
|
bookmarks.append(
|
|
|
|
|
(bookmark_level, bookmark_label, (pos_x, pos_y), state))
|
2012-10-04 13:35:25 +04:00
|
|
|
|
if has_anchor:
|
|
|
|
|
anchors[anchor_name] = pos_x, pos_y
|
|
|
|
|
|
|
|
|
|
for child in box.all_children():
|
2013-07-14 15:08:02 +04:00
|
|
|
|
_gather_links_and_bookmarks(child, bookmarks, links, anchors, matrix)
|
2012-10-04 13:35:25 +04:00
|
|
|
|
|
|
|
|
|
|
2018-08-06 18:38:02 +03:00
|
|
|
|
def _w3c_date_to_iso(string, attr_name):
|
|
|
|
|
"""Tranform W3C date to ISO-8601 format."""
|
|
|
|
|
if string is None:
|
|
|
|
|
return None
|
|
|
|
|
match = W3C_DATE_RE.match(string)
|
|
|
|
|
if match is None:
|
|
|
|
|
LOGGER.warning('Invalid %s date: %r', attr_name, string)
|
|
|
|
|
return None
|
|
|
|
|
groups = match.groupdict()
|
|
|
|
|
iso_date = '%04i-%02i-%02iT%02i:%02i:%02i' % (
|
|
|
|
|
int(groups['year']),
|
|
|
|
|
int(groups['month'] or 1),
|
|
|
|
|
int(groups['day'] or 1),
|
|
|
|
|
int(groups['hour'] or 0),
|
|
|
|
|
int(groups['minute'] or 0),
|
|
|
|
|
int(groups['second'] or 0))
|
|
|
|
|
if groups['hour']:
|
|
|
|
|
assert groups['minute']
|
2019-05-14 16:32:17 +03:00
|
|
|
|
if groups['tz_hour']:
|
|
|
|
|
assert groups['tz_hour'].startswith(('+', '-'))
|
|
|
|
|
assert groups['tz_minute']
|
|
|
|
|
iso_date += '%+03i:%02i' % (
|
|
|
|
|
int(groups['tz_hour']), int(groups['tz_minute']))
|
|
|
|
|
else:
|
|
|
|
|
iso_date += '+00:00'
|
2018-08-06 18:38:02 +03:00
|
|
|
|
return iso_date
|
|
|
|
|
|
|
|
|
|
|
2012-10-02 20:59:02 +04:00
|
|
|
|
class Page(object):
|
2012-10-04 13:35:25 +04:00
|
|
|
|
"""Represents a single rendered page.
|
|
|
|
|
|
2012-10-08 21:51:18 +04:00
|
|
|
|
.. versionadded:: 0.15
|
|
|
|
|
|
2012-10-04 13:35:25 +04:00
|
|
|
|
Should be obtained from :attr:`Document.pages` but not
|
|
|
|
|
instantiated directly.
|
|
|
|
|
|
|
|
|
|
"""
|
2012-10-05 22:12:05 +04:00
|
|
|
|
def __init__(self, page_box, enable_hinting=False):
|
2012-10-08 21:51:18 +04:00
|
|
|
|
#: The page width, including margins, in CSS pixels.
|
2012-10-05 22:12:05 +04:00
|
|
|
|
self.width = page_box.margin_width()
|
2012-10-04 13:35:25 +04:00
|
|
|
|
|
2012-10-08 21:51:18 +04:00
|
|
|
|
#: The page height, including margins, in CSS pixels.
|
2012-10-05 22:12:05 +04:00
|
|
|
|
self.height = page_box.margin_height()
|
2012-10-04 13:35:25 +04:00
|
|
|
|
|
2019-02-22 13:34:46 +03:00
|
|
|
|
#: The page bleed widths as a :obj:`dict` with ``'top'``, ``'right'``,
|
|
|
|
|
#: ``'bottom'`` and ``'left'`` as keys, and values in CSS pixels.
|
2017-10-05 09:45:50 +03:00
|
|
|
|
self.bleed = {
|
|
|
|
|
side: page_box.style['bleed_%s' % side].value
|
|
|
|
|
for side in ('top', 'right', 'bottom', 'left')}
|
2017-09-05 16:44:50 +03:00
|
|
|
|
|
2019-02-22 13:34:46 +03:00
|
|
|
|
#: The :obj:`list` of ``(bookmark_level, bookmark_label, target)``
|
|
|
|
|
#: :obj:`tuples <tuple>`. ``bookmark_level`` and ``bookmark_label``
|
|
|
|
|
#: are respectively an :obj:`int` and a :obj:`string <str>`, based on
|
|
|
|
|
#: the CSS properties of the same names. ``target`` is an ``(x, y)``
|
|
|
|
|
#: point in CSS pixels from the top-left of the page.
|
|
|
|
|
self.bookmarks = []
|
|
|
|
|
|
|
|
|
|
#: The :obj:`list` of ``(link_type, target, rectangle)`` :obj:`tuples
|
|
|
|
|
#: <tuple>`. A ``rectangle`` is ``(x, y, width, height)``, in CSS
|
|
|
|
|
#: pixels from the top-left of the page. ``link_type`` is one of three
|
|
|
|
|
#: strings:
|
2012-10-04 13:35:25 +04:00
|
|
|
|
#:
|
2019-02-22 13:34:46 +03:00
|
|
|
|
#: * ``'external'``: ``target`` is an absolute URL
|
|
|
|
|
#: * ``'internal'``: ``target`` is an anchor name (see
|
2013-07-14 15:08:02 +04:00
|
|
|
|
#: :attr:`Page.anchors`).
|
2019-02-22 13:34:46 +03:00
|
|
|
|
#: The anchor might be defined in another page,
|
|
|
|
|
#: in multiple pages (in which case the first occurence is used),
|
|
|
|
|
#: or not at all.
|
|
|
|
|
#: * ``'attachment'``: ``target`` is an absolute URL and points
|
2014-04-04 20:46:00 +04:00
|
|
|
|
#: to a resource to attach to the document.
|
2019-02-22 13:34:46 +03:00
|
|
|
|
self.links = []
|
2012-10-04 13:35:25 +04:00
|
|
|
|
|
2019-02-22 13:34:46 +03:00
|
|
|
|
#: The :obj:`dict` mapping each anchor name to its target, an
|
|
|
|
|
#: ``(x, y)`` point in CSS pixels from the top-left of the page.
|
|
|
|
|
self.anchors = {}
|
2012-10-04 13:35:25 +04:00
|
|
|
|
|
2013-07-14 15:08:02 +04:00
|
|
|
|
_gather_links_and_bookmarks(
|
2019-02-22 13:34:46 +03:00
|
|
|
|
page_box, self.bookmarks, self.links, self.anchors, matrix=None)
|
2012-10-05 22:12:05 +04:00
|
|
|
|
self._page_box = page_box
|
2012-10-04 13:35:25 +04:00
|
|
|
|
self._enable_hinting = enable_hinting
|
2012-10-02 20:59:02 +04:00
|
|
|
|
|
2012-10-05 22:12:05 +04:00
|
|
|
|
def paint(self, cairo_context, left_x=0, top_y=0, scale=1, clip=False):
|
2013-03-28 15:44:28 +04:00
|
|
|
|
"""Paint the page in cairo, on any type of surface.
|
2012-10-02 20:59:02 +04:00
|
|
|
|
|
2019-02-22 13:34:46 +03:00
|
|
|
|
:type cairo_context: :class:`cairocffi.Context`
|
2013-03-28 19:41:51 +04:00
|
|
|
|
:param cairo_context:
|
2019-02-22 13:34:46 +03:00
|
|
|
|
Any cairo context object.
|
|
|
|
|
:type left_x: float
|
2012-10-02 20:59:02 +04:00
|
|
|
|
:param left_x:
|
2012-10-05 22:12:05 +04:00
|
|
|
|
X coordinate of the left of the page, in cairo user units.
|
2019-02-22 13:34:46 +03:00
|
|
|
|
:type top_y: float
|
2012-10-02 20:59:02 +04:00
|
|
|
|
:param top_y:
|
2012-10-05 22:12:05 +04:00
|
|
|
|
Y coordinate of the top of the page, in cairo user units.
|
2019-02-22 13:34:46 +03:00
|
|
|
|
:type scale: float
|
2012-10-05 22:12:05 +04:00
|
|
|
|
:param scale:
|
2012-10-08 21:51:18 +04:00
|
|
|
|
Zoom scale in cairo user units per CSS pixel.
|
2019-02-22 13:34:46 +03:00
|
|
|
|
:type clip: bool
|
2012-10-02 20:59:02 +04:00
|
|
|
|
:param clip:
|
|
|
|
|
Whether to clip/cut content outside the page. If false or
|
|
|
|
|
not provided, content can overflow.
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
with stacked(cairo_context):
|
|
|
|
|
if self._enable_hinting:
|
|
|
|
|
left_x, top_y = cairo_context.user_to_device(left_x, top_y)
|
|
|
|
|
# Hint in device space
|
|
|
|
|
left_x = int(left_x)
|
|
|
|
|
top_y = int(top_y)
|
|
|
|
|
left_x, top_y = cairo_context.device_to_user(left_x, top_y)
|
2012-10-05 22:12:05 +04:00
|
|
|
|
# Make (0, 0) the top-left corner:
|
2012-10-02 20:59:02 +04:00
|
|
|
|
cairo_context.translate(left_x, top_y)
|
2012-10-05 22:12:05 +04:00
|
|
|
|
# Make user units CSS pixels:
|
|
|
|
|
cairo_context.scale(scale, scale)
|
2012-10-02 20:59:02 +04:00
|
|
|
|
if clip:
|
2012-10-05 22:12:05 +04:00
|
|
|
|
width = self.width
|
|
|
|
|
height = self.height
|
|
|
|
|
if self._enable_hinting:
|
|
|
|
|
width, height = (
|
|
|
|
|
cairo_context.user_to_device_distance(width, height))
|
|
|
|
|
# Hint in device space
|
|
|
|
|
width = int(math.ceil(width))
|
|
|
|
|
height = int(math.ceil(height))
|
|
|
|
|
width, height = (
|
|
|
|
|
cairo_context.device_to_user_distance(width, height))
|
2012-10-02 20:59:02 +04:00
|
|
|
|
cairo_context.rectangle(0, 0, width, height)
|
|
|
|
|
cairo_context.clip()
|
|
|
|
|
draw_page(self._page_box, cairo_context, self._enable_hinting)
|
|
|
|
|
|
|
|
|
|
|
2013-07-14 15:08:02 +04:00
|
|
|
|
class DocumentMetadata(object):
|
2019-02-22 13:34:46 +03:00
|
|
|
|
"""Meta-information belonging to a whole :class:`Document`.
|
2013-07-14 15:08:02 +04:00
|
|
|
|
|
2019-02-22 13:34:46 +03:00
|
|
|
|
.. versionadded:: 0.20
|
2013-07-14 15:08:02 +04:00
|
|
|
|
|
2019-02-22 13:34:46 +03:00
|
|
|
|
New attributes may be added in future versions of WeasyPrint.
|
2013-07-14 15:08:02 +04:00
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
def __init__(self, title=None, authors=None, description=None,
|
2014-04-04 14:32:21 +04:00
|
|
|
|
keywords=None, generator=None, created=None, modified=None,
|
|
|
|
|
attachments=None):
|
2013-07-14 15:08:02 +04:00
|
|
|
|
#: The title of the document, as a string or :obj:`None`.
|
|
|
|
|
#: Extracted from the ``<title>`` element in HTML
|
|
|
|
|
#: and written to the ``/Title`` info field in PDF.
|
|
|
|
|
self.title = title
|
2019-02-22 13:34:46 +03:00
|
|
|
|
#: The authors of the document, as a list of strings.
|
|
|
|
|
#: (Defaults to the empty list.)
|
2013-07-14 15:08:02 +04:00
|
|
|
|
#: Extracted from the ``<meta name=author>`` elements in HTML
|
|
|
|
|
#: and written to the ``/Author`` info field in PDF.
|
|
|
|
|
self.authors = authors or []
|
|
|
|
|
#: The description of the document, as a string or :obj:`None`.
|
|
|
|
|
#: Extracted from the ``<meta name=description>`` element in HTML
|
|
|
|
|
#: and written to the ``/Subject`` info field in PDF.
|
|
|
|
|
self.description = description
|
|
|
|
|
#: Keywords associated with the document, as a list of strings.
|
|
|
|
|
#: (Defaults to the empty list.)
|
|
|
|
|
#: Extracted from ``<meta name=keywords>`` elements in HTML
|
|
|
|
|
#: and written to the ``/Keywords`` info field in PDF.
|
|
|
|
|
self.keywords = keywords or []
|
|
|
|
|
#: The name of one of the software packages
|
|
|
|
|
#: used to generate the document, as a string or :obj:`None`.
|
|
|
|
|
#: Extracted from the ``<meta name=generator>`` element in HTML
|
|
|
|
|
#: and written to the ``/Creator`` info field in PDF.
|
|
|
|
|
self.generator = generator
|
|
|
|
|
#: The creation date of the document, as a string or :obj:`None`.
|
|
|
|
|
#: Dates are in one of the six formats specified in
|
2019-02-22 13:34:46 +03:00
|
|
|
|
#: `W3C’s profile of ISO 8601 <http://www.w3.org/TR/NOTE-datetime>`_.
|
2013-07-14 15:08:02 +04:00
|
|
|
|
#: Extracted from the ``<meta name=dcterms.created>`` element in HTML
|
|
|
|
|
#: and written to the ``/CreationDate`` info field in PDF.
|
|
|
|
|
self.created = created
|
|
|
|
|
#: The modification date of the document, as a string or :obj:`None`.
|
|
|
|
|
#: Dates are in one of the six formats specified in
|
2019-02-22 13:34:46 +03:00
|
|
|
|
#: `W3C’s profile of ISO 8601 <http://www.w3.org/TR/NOTE-datetime>`_.
|
2013-07-14 15:08:02 +04:00
|
|
|
|
#: Extracted from the ``<meta name=dcterms.modified>`` element in HTML
|
|
|
|
|
#: and written to the ``/ModDate`` info field in PDF.
|
|
|
|
|
self.modified = modified
|
2019-02-22 13:34:46 +03:00
|
|
|
|
#: File attachments, as a list of tuples of URL and a description or
|
|
|
|
|
#: :obj:`None`. (Defaults to the empty list.)
|
2014-04-04 14:32:21 +04:00
|
|
|
|
#: Extracted from the ``<link rel=attachment>`` elements in HTML
|
|
|
|
|
#: and written to the ``/EmbeddedFiles`` dictionary in PDF.
|
2019-02-22 13:34:46 +03:00
|
|
|
|
#:
|
|
|
|
|
#: .. versionadded:: 0.22
|
2014-04-04 14:32:21 +04:00
|
|
|
|
self.attachments = attachments or []
|
2013-07-14 15:08:02 +04:00
|
|
|
|
|
|
|
|
|
|
2019-05-24 00:55:56 +03:00
|
|
|
|
BookmarkSubtree = collections.namedtuple(
|
|
|
|
|
'BookmarkSubtree', ('label', 'destination', 'children', 'state'))
|
2019-05-18 01:00:37 +03:00
|
|
|
|
|
|
|
|
|
|
2012-10-02 20:59:02 +04:00
|
|
|
|
class Document(object):
|
2019-02-22 13:34:46 +03:00
|
|
|
|
"""A rendered document ready to be painted on a cairo surface.
|
2012-10-04 13:35:25 +04:00
|
|
|
|
|
2019-02-22 13:34:46 +03:00
|
|
|
|
Typically obtained from :meth:`HTML.render() <weasyprint.HTML.render>`, but
|
|
|
|
|
can also be instantiated directly with a list of :class:`pages <Page>`, a
|
|
|
|
|
set of :class:`metadata <DocumentMetadata>`, a :func:`url_fetcher
|
|
|
|
|
<weasyprint.default_url_fetcher>` function, and a :class:`font_config
|
|
|
|
|
<weasyprint.fonts.FontConfiguration>`.
|
2012-10-04 13:35:25 +04:00
|
|
|
|
|
|
|
|
|
"""
|
2019-07-23 08:12:08 +03:00
|
|
|
|
|
2012-10-02 20:59:02 +04:00
|
|
|
|
@classmethod
|
2019-07-23 08:12:08 +03:00
|
|
|
|
def _build_layout_context(cls, html, stylesheets, enable_hinting,
|
2019-07-23 19:07:14 +03:00
|
|
|
|
presentational_hints=False, font_config=None):
|
2017-10-01 16:17:32 +03:00
|
|
|
|
if font_config is None:
|
|
|
|
|
font_config = FontConfiguration()
|
2018-03-28 01:34:34 +03:00
|
|
|
|
target_collector = TargetCollector()
|
2017-06-30 18:54:02 +03:00
|
|
|
|
page_rules = []
|
2018-03-24 01:57:33 +03:00
|
|
|
|
user_stylesheets = []
|
|
|
|
|
for css in stylesheets or []:
|
|
|
|
|
if not hasattr(css, 'matcher'):
|
|
|
|
|
css = CSS(
|
|
|
|
|
guess=css, media_type=html.media_type,
|
|
|
|
|
font_config=font_config)
|
|
|
|
|
user_stylesheets.append(css)
|
2018-08-17 11:30:51 +03:00
|
|
|
|
style_for = get_all_computed_styles(
|
2018-03-24 01:57:33 +03:00
|
|
|
|
html, user_stylesheets, presentational_hints, font_config,
|
2018-03-28 01:34:34 +03:00
|
|
|
|
page_rules, target_collector)
|
2012-10-05 20:50:40 +04:00
|
|
|
|
get_image_from_uri = functools.partial(
|
2018-01-07 03:46:39 +03:00
|
|
|
|
original_get_image_from_uri, {}, html.url_fetcher)
|
2019-01-04 01:02:44 +03:00
|
|
|
|
PROGRESS_LOGGER.info('Step 4 - Creating formatting structure')
|
2019-07-23 08:12:08 +03:00
|
|
|
|
context = LayoutContext(
|
2019-07-23 19:07:14 +03:00
|
|
|
|
enable_hinting, style_for, get_image_from_uri, font_config,
|
|
|
|
|
target_collector)
|
2019-07-23 08:12:08 +03:00
|
|
|
|
return context
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
def _render(cls, html, stylesheets, enable_hinting,
|
|
|
|
|
presentational_hints=False, font_config=None):
|
|
|
|
|
if font_config is None:
|
|
|
|
|
font_config = FontConfiguration()
|
|
|
|
|
|
|
|
|
|
context = cls._build_layout_context(
|
2019-07-23 19:07:14 +03:00
|
|
|
|
html, stylesheets, enable_hinting, presentational_hints,
|
|
|
|
|
font_config)
|
2019-07-23 08:12:08 +03:00
|
|
|
|
|
2018-08-08 18:47:47 +03:00
|
|
|
|
root_box = build_formatting_structure(
|
2019-07-23 19:07:14 +03:00
|
|
|
|
html.etree_element, context.style_for, context.get_image_from_uri,
|
|
|
|
|
html.base_url, context.target_collector)
|
2019-07-23 08:12:08 +03:00
|
|
|
|
|
|
|
|
|
page_boxes = layout_document(html, root_box, context)
|
2016-10-27 12:41:34 +03:00
|
|
|
|
rendering = cls(
|
2019-04-02 12:37:16 +03:00
|
|
|
|
[Page(page_box, enable_hinting) for page_box in page_boxes],
|
2018-01-28 17:45:39 +03:00
|
|
|
|
DocumentMetadata(**html._get_metadata()),
|
|
|
|
|
html.url_fetcher, font_config)
|
2016-10-27 12:41:34 +03:00
|
|
|
|
return rendering
|
2012-10-02 20:59:02 +04:00
|
|
|
|
|
2018-01-28 17:45:39 +03:00
|
|
|
|
def __init__(self, pages, metadata, url_fetcher, font_config):
|
2012-10-02 20:59:02 +04:00
|
|
|
|
#: A list of :class:`Page` objects.
|
|
|
|
|
self.pages = pages
|
2013-07-14 15:08:02 +04:00
|
|
|
|
#: A :class:`DocumentMetadata` object.
|
|
|
|
|
#: Contains information that does not belong to a specific page
|
|
|
|
|
#: but to the whole document.
|
|
|
|
|
self.metadata = metadata
|
2019-02-22 13:34:46 +03:00
|
|
|
|
#: A function or other callable with the same signature as
|
|
|
|
|
#: :func:`default_url_fetcher` called to fetch external resources such
|
|
|
|
|
#: as stylesheets and images. (See :ref:`url-fetchers`.)
|
2014-04-18 17:11:45 +04:00
|
|
|
|
self.url_fetcher = url_fetcher
|
2018-01-28 17:45:39 +03:00
|
|
|
|
# Keep a reference to font_config to avoid its garbage collection until
|
|
|
|
|
# rendering is destroyed. This is needed as font_config.__del__ removes
|
|
|
|
|
# fonts that may be used when rendering
|
|
|
|
|
self._font_config = font_config
|
2012-10-02 20:59:02 +04:00
|
|
|
|
|
|
|
|
|
def copy(self, pages='all'):
|
2012-10-05 20:50:40 +04:00
|
|
|
|
"""Take a subset of the pages.
|
|
|
|
|
|
2019-02-22 13:34:46 +03:00
|
|
|
|
.. versionadded:: 0.15
|
|
|
|
|
|
|
|
|
|
:type pages: :term:`iterable`
|
2012-10-05 20:50:40 +04:00
|
|
|
|
:param pages:
|
|
|
|
|
An iterable of :class:`Page` objects from :attr:`pages`.
|
|
|
|
|
:return:
|
|
|
|
|
A new :class:`Document` object.
|
|
|
|
|
|
2013-07-14 12:17:40 +04:00
|
|
|
|
Examples:
|
2012-10-05 20:50:40 +04:00
|
|
|
|
|
2013-07-14 12:17:40 +04:00
|
|
|
|
Write two PDF files for odd-numbered and even-numbered pages::
|
|
|
|
|
|
|
|
|
|
# Python lists count from 0 but pages are numbered from 1.
|
2012-10-05 20:50:40 +04:00
|
|
|
|
# [::2] is a slice of even list indexes but odd-numbered pages.
|
|
|
|
|
document.copy(document.pages[::2]).write_pdf('odd_pages.pdf')
|
|
|
|
|
document.copy(document.pages[1::2]).write_pdf('even_pages.pdf')
|
|
|
|
|
|
2013-07-14 12:17:40 +04:00
|
|
|
|
Write each page to a numbred PNG file::
|
|
|
|
|
|
2012-10-05 20:50:40 +04:00
|
|
|
|
for i, page in enumerate(document.pages):
|
|
|
|
|
document.copy(page).write_png('page_%s.png' % i)
|
|
|
|
|
|
2013-07-14 12:17:40 +04:00
|
|
|
|
Combine multiple documents into one PDF file,
|
|
|
|
|
using metadata from the first::
|
|
|
|
|
|
2019-07-09 01:06:19 +03:00
|
|
|
|
all_pages = [p for doc in documents for p in doc.pages]
|
2013-07-14 12:17:40 +04:00
|
|
|
|
documents[0].copy(all_pages).write_pdf('combined.pdf')
|
|
|
|
|
|
2012-10-05 20:50:40 +04:00
|
|
|
|
"""
|
2012-10-02 20:59:02 +04:00
|
|
|
|
if pages == 'all':
|
|
|
|
|
pages = self.pages
|
2012-10-05 20:50:40 +04:00
|
|
|
|
elif not isinstance(pages, list):
|
|
|
|
|
pages = list(pages)
|
2018-01-28 17:45:39 +03:00
|
|
|
|
return type(self)(
|
|
|
|
|
pages, self.metadata, self.url_fetcher, self._font_config)
|
2012-10-02 20:59:02 +04:00
|
|
|
|
|
2012-10-04 13:35:25 +04:00
|
|
|
|
def resolve_links(self):
|
|
|
|
|
"""Resolve internal hyperlinks.
|
|
|
|
|
|
2019-02-22 13:34:46 +03:00
|
|
|
|
.. versionadded:: 0.15
|
|
|
|
|
|
2012-10-04 13:35:25 +04:00
|
|
|
|
Links to a missing anchor are removed with a warning.
|
2018-09-24 16:27:24 +03:00
|
|
|
|
|
|
|
|
|
If multiple anchors have the same name, the first one is used.
|
2012-10-04 13:35:25 +04:00
|
|
|
|
|
|
|
|
|
:returns:
|
|
|
|
|
A generator yielding lists (one per page) like :attr:`Page.links`,
|
2019-02-22 13:34:46 +03:00
|
|
|
|
except that ``target`` for internal hyperlinks is
|
2012-10-04 13:35:25 +04:00
|
|
|
|
``(page_number, x, y)`` instead of an anchor name.
|
2017-04-28 21:36:14 +03:00
|
|
|
|
The page number is a 0-based index into the :attr:`pages` list,
|
|
|
|
|
and ``x, y`` are in CSS pixels from the top-left of the page.
|
2012-10-04 13:35:25 +04:00
|
|
|
|
|
|
|
|
|
"""
|
2018-09-24 16:27:24 +03:00
|
|
|
|
anchors = set()
|
|
|
|
|
paged_anchors = []
|
2012-10-04 13:35:25 +04:00
|
|
|
|
for i, page in enumerate(self.pages):
|
2018-09-24 16:27:24 +03:00
|
|
|
|
paged_anchors.append([])
|
2018-01-14 03:48:17 +03:00
|
|
|
|
for anchor_name, (point_x, point_y) in page.anchors.items():
|
2018-09-24 16:27:24 +03:00
|
|
|
|
if anchor_name not in anchors:
|
|
|
|
|
paged_anchors[-1].append((anchor_name, point_x, point_y))
|
|
|
|
|
anchors.add(anchor_name)
|
2012-10-04 13:35:25 +04:00
|
|
|
|
for page in self.pages:
|
|
|
|
|
page_links = []
|
|
|
|
|
for link in page.links:
|
|
|
|
|
link_type, anchor_name, rectangle = link
|
|
|
|
|
if link_type == 'internal':
|
2018-09-24 16:27:24 +03:00
|
|
|
|
if anchor_name not in anchors:
|
2017-07-25 14:59:56 +03:00
|
|
|
|
LOGGER.error(
|
2017-07-01 01:28:14 +03:00
|
|
|
|
'No anchor #%s for internal URI reference',
|
|
|
|
|
anchor_name)
|
2012-10-04 13:35:25 +04:00
|
|
|
|
else:
|
2018-09-24 16:27:24 +03:00
|
|
|
|
page_links.append((link_type, anchor_name, rectangle))
|
2012-10-04 13:35:25 +04:00
|
|
|
|
else:
|
|
|
|
|
# External link
|
|
|
|
|
page_links.append(link)
|
2018-09-24 16:27:24 +03:00
|
|
|
|
yield page_links, paged_anchors.pop(0)
|
2012-10-04 13:35:25 +04:00
|
|
|
|
|
|
|
|
|
def make_bookmark_tree(self):
|
|
|
|
|
"""Make a tree of all bookmarks in the document.
|
|
|
|
|
|
2019-02-22 13:34:46 +03:00
|
|
|
|
.. versionadded:: 0.15
|
|
|
|
|
|
|
|
|
|
:return: A list of bookmark subtrees.
|
2019-07-15 16:15:34 +03:00
|
|
|
|
A subtree is ``(label, target, children, state)``. ``label`` is
|
2019-02-22 13:34:46 +03:00
|
|
|
|
a string, ``target`` is ``(page_number, x, y)`` like in
|
|
|
|
|
:meth:`resolve_links`, and ``children`` is a
|
2017-04-28 21:36:14 +03:00
|
|
|
|
list of child subtrees.
|
2012-10-04 13:35:25 +04:00
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
root = []
|
|
|
|
|
# At one point in the document, for each "output" depth, how much
|
|
|
|
|
# to add to get the source level (CSS values of bookmark-level).
|
2017-04-28 21:36:14 +03:00
|
|
|
|
# E.g. with <h1> then <h3>, level_shifts == [0, 1]
|
2012-10-04 13:35:25 +04:00
|
|
|
|
# 1 means that <h3> has depth 3 - 1 = 2 in the output.
|
|
|
|
|
skipped_levels = []
|
|
|
|
|
last_by_depth = [root]
|
|
|
|
|
previous_level = 0
|
|
|
|
|
for page_number, page in enumerate(self.pages):
|
2019-05-18 01:00:37 +03:00
|
|
|
|
for level, label, (point_x, point_y), state in page.bookmarks:
|
2012-10-04 13:35:25 +04:00
|
|
|
|
if level > previous_level:
|
|
|
|
|
# Example: if the previous bookmark is a <h2>, the next
|
|
|
|
|
# depth "should" be for <h3>. If now we get a <h6> we’re
|
|
|
|
|
# skipping two levels: append 6 - 3 - 1 = 2
|
|
|
|
|
skipped_levels.append(level - previous_level - 1)
|
|
|
|
|
else:
|
|
|
|
|
temp = level
|
|
|
|
|
while temp < previous_level:
|
|
|
|
|
temp += 1 + skipped_levels.pop()
|
|
|
|
|
if temp > previous_level:
|
|
|
|
|
# We remove too many "skips", add some back:
|
|
|
|
|
skipped_levels.append(temp - previous_level - 1)
|
|
|
|
|
|
|
|
|
|
previous_level = level
|
|
|
|
|
depth = level - sum(skipped_levels)
|
|
|
|
|
assert depth == len(skipped_levels)
|
|
|
|
|
assert depth >= 1
|
|
|
|
|
|
|
|
|
|
children = []
|
2019-05-24 00:55:56 +03:00
|
|
|
|
subtree = BookmarkSubtree(
|
|
|
|
|
label, (page_number, point_x, point_y), children, state)
|
2012-10-04 13:35:25 +04:00
|
|
|
|
last_by_depth[depth - 1].append(subtree)
|
|
|
|
|
del last_by_depth[depth:]
|
|
|
|
|
last_by_depth.append(children)
|
|
|
|
|
return root
|
|
|
|
|
|
2018-09-24 16:27:24 +03:00
|
|
|
|
def add_hyperlinks(self, links, anchors, context, scale):
|
2019-02-22 13:34:46 +03:00
|
|
|
|
"""Include hyperlinks in current PDF page.
|
|
|
|
|
|
|
|
|
|
.. versionadded:: 43
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""
|
2018-08-06 18:38:02 +03:00
|
|
|
|
if cairo.cairo_version() < 11504:
|
|
|
|
|
return
|
|
|
|
|
|
2018-12-29 19:11:30 +03:00
|
|
|
|
# We round floats to avoid locale problems, see
|
|
|
|
|
# https://github.com/Kozea/WeasyPrint/issues/742
|
|
|
|
|
|
2018-08-06 18:38:02 +03:00
|
|
|
|
# TODO: Instead of using rects, we could use the drawing rectangles
|
|
|
|
|
# defined by cairo when drawing targets. This would give a feeling
|
|
|
|
|
# similiar to what browsers do with links that span multiple lines.
|
|
|
|
|
for link in links:
|
|
|
|
|
link_type, link_target, rectangle = link
|
|
|
|
|
if link_type == 'external':
|
2018-11-22 16:03:41 +03:00
|
|
|
|
attributes = "rect=[{} {} {} {}] uri='{}'".format(*(
|
2018-12-29 16:19:59 +03:00
|
|
|
|
[int(round(i * scale)) for i in rectangle] +
|
2019-01-23 18:05:01 +03:00
|
|
|
|
[link_target.replace("'", '%27')]))
|
2018-08-06 18:38:02 +03:00
|
|
|
|
elif link_type == 'internal':
|
2018-11-22 16:03:41 +03:00
|
|
|
|
attributes = "rect=[{} {} {} {}] dest='{}'".format(*(
|
2018-12-29 16:19:59 +03:00
|
|
|
|
[int(round(i * scale)) for i in rectangle] +
|
2019-01-23 18:05:01 +03:00
|
|
|
|
[link_target.replace("'", '%27')]))
|
2018-08-06 18:38:02 +03:00
|
|
|
|
elif link_type == 'attachment':
|
|
|
|
|
# Attachments are handled in write_pdf_metadata
|
|
|
|
|
continue
|
|
|
|
|
context.tag_begin(cairo.TAG_LINK, attributes)
|
2018-09-24 16:27:24 +03:00
|
|
|
|
context.tag_end(cairo.TAG_LINK)
|
|
|
|
|
|
|
|
|
|
for anchor in anchors:
|
|
|
|
|
anchor_name, x, y = anchor
|
|
|
|
|
attributes = "name='{}' x={} y={}".format(
|
2019-01-23 18:05:01 +03:00
|
|
|
|
anchor_name.replace("'", '%27'), int(round(x * scale)),
|
2018-12-29 16:19:59 +03:00
|
|
|
|
int(round(y * scale)))
|
2018-09-24 16:27:24 +03:00
|
|
|
|
context.tag_begin(cairo.TAG_DEST, attributes)
|
|
|
|
|
context.tag_end(cairo.TAG_DEST)
|
2018-08-06 18:38:02 +03:00
|
|
|
|
|
2014-04-22 22:40:46 +04:00
|
|
|
|
def write_pdf(self, target=None, zoom=1, attachments=None):
|
2012-10-05 20:50:40 +04:00
|
|
|
|
"""Paint the pages in a PDF file, with meta-data.
|
2012-10-02 20:59:02 +04:00
|
|
|
|
|
2012-10-05 20:50:40 +04:00
|
|
|
|
PDF files written directly by cairo do not have meta-data such as
|
|
|
|
|
bookmarks/outlines and hyperlinks.
|
2012-10-02 20:59:02 +04:00
|
|
|
|
|
2019-02-22 13:34:46 +03:00
|
|
|
|
:type target: str, pathlib.Path or file object
|
2012-10-05 20:50:40 +04:00
|
|
|
|
:param target:
|
2019-02-22 13:34:46 +03:00
|
|
|
|
A filename where the PDF file is generated, a file object, or
|
|
|
|
|
:obj:`None`.
|
2012-11-23 01:27:34 +04:00
|
|
|
|
:type zoom: float
|
|
|
|
|
:param zoom:
|
2017-04-28 21:36:14 +03:00
|
|
|
|
The zoom factor in PDF units per CSS units. **Warning**:
|
|
|
|
|
All CSS units are affected, including physical units like
|
|
|
|
|
``cm`` and named sizes like ``A4``. For values other than
|
2019-02-22 13:34:46 +03:00
|
|
|
|
1, the physical CSS units will thus be "wrong".
|
|
|
|
|
:type attachments: list
|
2014-04-22 22:40:46 +04:00
|
|
|
|
:param attachments: A list of additional file attachments for the
|
2014-04-26 01:35:43 +04:00
|
|
|
|
generated PDF document or :obj:`None`. The list's elements are
|
2019-02-22 13:34:46 +03:00
|
|
|
|
:class:`Attachment` objects, filenames, URLs or file-like objects.
|
2012-10-05 20:50:40 +04:00
|
|
|
|
:returns:
|
2019-02-22 13:34:46 +03:00
|
|
|
|
The PDF as :obj:`bytes` if ``target`` is not provided or
|
|
|
|
|
:obj:`None`, otherwise :obj:`None` (the PDF is written to
|
|
|
|
|
``target``).
|
2012-10-02 20:59:02 +04:00
|
|
|
|
|
|
|
|
|
"""
|
2012-11-23 01:27:34 +04:00
|
|
|
|
# 0.75 = 72 PDF point (cairo units) per inch / 96 CSS pixel per inch
|
|
|
|
|
scale = zoom * 0.75
|
2019-07-11 18:01:53 +03:00
|
|
|
|
# Use an in-memory buffer, as we will need to seek for
|
|
|
|
|
# metadata. Directly using the target when possible doesn't
|
|
|
|
|
# significantly save time and memory use.
|
2012-10-02 20:59:02 +04:00
|
|
|
|
file_obj = io.BytesIO()
|
|
|
|
|
# (1, 1) is overridden by .set_size() below.
|
|
|
|
|
surface = cairo.PDFSurface(file_obj, 1, 1)
|
|
|
|
|
context = cairo.Context(surface)
|
2018-08-06 18:38:02 +03:00
|
|
|
|
|
2019-01-04 01:02:44 +03:00
|
|
|
|
PROGRESS_LOGGER.info('Step 6 - Drawing')
|
2018-08-06 18:38:02 +03:00
|
|
|
|
|
2018-09-24 16:27:24 +03:00
|
|
|
|
paged_links_and_anchors = list(self.resolve_links())
|
|
|
|
|
for page, links_and_anchors in zip(
|
|
|
|
|
self.pages, paged_links_and_anchors):
|
|
|
|
|
links, anchors = links_and_anchors
|
2015-04-29 10:32:33 +03:00
|
|
|
|
surface.set_size(
|
2017-10-05 09:45:50 +03:00
|
|
|
|
math.floor(scale * (
|
|
|
|
|
page.width + page.bleed['left'] + page.bleed['right'])),
|
|
|
|
|
math.floor(scale * (
|
|
|
|
|
page.height + page.bleed['top'] + page.bleed['bottom'])))
|
2017-09-05 21:22:24 +03:00
|
|
|
|
with stacked(context):
|
2017-09-21 13:03:09 +03:00
|
|
|
|
context.translate(
|
2017-10-05 09:45:50 +03:00
|
|
|
|
page.bleed['left'] * scale, page.bleed['top'] * scale)
|
2017-09-05 21:22:24 +03:00
|
|
|
|
page.paint(context, scale=scale)
|
2018-09-24 16:27:24 +03:00
|
|
|
|
self.add_hyperlinks(links, anchors, context, scale)
|
2017-09-05 21:22:24 +03:00
|
|
|
|
surface.show_page()
|
2012-10-02 20:59:02 +04:00
|
|
|
|
|
2019-01-04 01:02:44 +03:00
|
|
|
|
PROGRESS_LOGGER.info('Step 7 - Adding PDF metadata')
|
2018-08-06 18:38:02 +03:00
|
|
|
|
|
|
|
|
|
# TODO: overwrite producer when possible in cairo
|
|
|
|
|
if cairo.cairo_version() >= 11504:
|
|
|
|
|
# Set document information
|
|
|
|
|
for attr, key in (
|
|
|
|
|
('title', cairo.PDF_METADATA_TITLE),
|
|
|
|
|
('description', cairo.PDF_METADATA_SUBJECT),
|
|
|
|
|
('generator', cairo.PDF_METADATA_CREATOR)):
|
|
|
|
|
value = getattr(self.metadata, attr)
|
|
|
|
|
if value is not None:
|
|
|
|
|
surface.set_metadata(key, value)
|
|
|
|
|
for attr, key in (
|
|
|
|
|
('authors', cairo.PDF_METADATA_AUTHOR),
|
|
|
|
|
('keywords', cairo.PDF_METADATA_KEYWORDS)):
|
|
|
|
|
value = getattr(self.metadata, attr)
|
|
|
|
|
if value is not None:
|
|
|
|
|
surface.set_metadata(key, ', '.join(value))
|
|
|
|
|
for attr, key in (
|
|
|
|
|
('created', cairo.PDF_METADATA_CREATE_DATE),
|
|
|
|
|
('modified', cairo.PDF_METADATA_MOD_DATE)):
|
|
|
|
|
value = getattr(self.metadata, attr)
|
|
|
|
|
if value is not None:
|
|
|
|
|
surface.set_metadata(key, _w3c_date_to_iso(value, attr))
|
|
|
|
|
|
|
|
|
|
# Set bookmarks
|
|
|
|
|
bookmarks = self.make_bookmark_tree()
|
|
|
|
|
levels = [cairo.PDF_OUTLINE_ROOT] * len(bookmarks)
|
|
|
|
|
while bookmarks:
|
2019-05-18 01:00:37 +03:00
|
|
|
|
bookmark = bookmarks.pop(0)
|
|
|
|
|
title = bookmark.label
|
|
|
|
|
destination = bookmark.destination
|
|
|
|
|
children = bookmark.children
|
|
|
|
|
state = bookmark.state
|
2018-08-06 18:38:02 +03:00
|
|
|
|
page, x, y = destination
|
2018-12-29 19:11:30 +03:00
|
|
|
|
|
|
|
|
|
# We round floats to avoid locale problems, see
|
|
|
|
|
# https://github.com/Kozea/WeasyPrint/issues/742
|
2018-08-06 18:38:02 +03:00
|
|
|
|
link_attribs = 'page={} pos=[{} {}]'.format(
|
2018-12-29 16:19:59 +03:00
|
|
|
|
page + 1, int(round(x * scale)),
|
|
|
|
|
int(round(y * scale)))
|
2018-12-29 19:11:30 +03:00
|
|
|
|
|
2018-08-06 18:38:02 +03:00
|
|
|
|
outline = surface.add_outline(
|
2019-05-18 01:00:37 +03:00
|
|
|
|
levels.pop(), title, link_attribs,
|
2019-05-24 00:55:56 +03:00
|
|
|
|
cairo.PDF_OUTLINE_FLAG_OPEN if state == 'open' else 0)
|
2018-08-06 18:38:02 +03:00
|
|
|
|
levels.extend([outline] * len(children))
|
|
|
|
|
bookmarks = children + bookmarks
|
|
|
|
|
|
2012-10-02 20:59:02 +04:00
|
|
|
|
surface.finish()
|
|
|
|
|
|
2018-08-06 18:38:02 +03:00
|
|
|
|
# Add extra PDF metadata: attachments, embedded files
|
|
|
|
|
attachment_links = [
|
|
|
|
|
[link for link in page_links if link[0] == 'attachment']
|
2018-09-24 16:27:24 +03:00
|
|
|
|
for page_links, page_anchors in paged_links_and_anchors]
|
2018-08-06 18:38:02 +03:00
|
|
|
|
# Write extra PDF metadata only when there is a least one from:
|
|
|
|
|
# - attachments in metadata
|
|
|
|
|
# - attachments as function parameters
|
|
|
|
|
# - attachments as PDF links
|
|
|
|
|
# - bleed boxes
|
|
|
|
|
condition = (
|
|
|
|
|
self.metadata.attachments or
|
|
|
|
|
attachments or
|
|
|
|
|
any(attachment_links) or
|
|
|
|
|
any(any(page.bleed.values()) for page in self.pages))
|
|
|
|
|
if condition:
|
2018-08-08 15:41:03 +03:00
|
|
|
|
write_pdf_metadata(
|
2018-08-06 18:38:02 +03:00
|
|
|
|
file_obj, scale, self.url_fetcher,
|
|
|
|
|
self.metadata.attachments + (attachments or []),
|
|
|
|
|
attachment_links, self.pages)
|
2012-10-02 20:59:02 +04:00
|
|
|
|
|
|
|
|
|
if target is None:
|
|
|
|
|
return file_obj.getvalue()
|
|
|
|
|
else:
|
|
|
|
|
file_obj.seek(0)
|
|
|
|
|
if hasattr(target, 'write'):
|
|
|
|
|
shutil.copyfileobj(file_obj, target)
|
|
|
|
|
else:
|
|
|
|
|
with open(target, 'wb') as fd:
|
|
|
|
|
shutil.copyfileobj(file_obj, fd)
|
|
|
|
|
|
2012-12-29 04:00:30 +04:00
|
|
|
|
def write_image_surface(self, resolution=96):
|
2019-02-22 13:34:46 +03:00
|
|
|
|
"""Render pages on a cairo image surface.
|
|
|
|
|
|
|
|
|
|
.. versionadded:: 0.17
|
2019-02-21 19:34:56 +03:00
|
|
|
|
|
|
|
|
|
There is no decoration around pages other than those specified in CSS
|
|
|
|
|
with ``@page`` rules. The final image is as wide as the widest page.
|
|
|
|
|
Each page is below the previous one, centered horizontally.
|
|
|
|
|
|
|
|
|
|
:type resolution: float
|
|
|
|
|
:param resolution:
|
|
|
|
|
The output resolution in PNG pixels per CSS inch. At 96 dpi
|
|
|
|
|
(the default), PNG pixels match the CSS ``px`` unit.
|
|
|
|
|
:returns:
|
2019-02-22 13:34:46 +03:00
|
|
|
|
A ``(surface, png_width, png_height)`` tuple. ``surface`` is a
|
|
|
|
|
cairo :class:`ImageSurface <cairocffi.ImageSurface>`. ``png_width``
|
|
|
|
|
and ``png_height`` are the size of the final image, in PNG pixels.
|
2019-02-21 19:34:56 +03:00
|
|
|
|
|
|
|
|
|
"""
|
2012-10-05 22:12:05 +04:00
|
|
|
|
dppx = resolution / 96
|
|
|
|
|
|
2012-10-02 20:59:02 +04:00
|
|
|
|
# This duplicates the hinting logic in Page.paint. There is a
|
|
|
|
|
# dependency cycle otherwise:
|
|
|
|
|
# this → hinting logic → context → surface → this
|
|
|
|
|
# But since we do no transform here, cairo_context.user_to_device and
|
|
|
|
|
# friends are identity functions.
|
2012-10-05 22:12:05 +04:00
|
|
|
|
widths = [int(math.ceil(p.width * dppx)) for p in self.pages]
|
|
|
|
|
heights = [int(math.ceil(p.height * dppx)) for p in self.pages]
|
|
|
|
|
|
2012-10-02 20:59:02 +04:00
|
|
|
|
max_width = max(widths)
|
|
|
|
|
sum_heights = sum(heights)
|
2012-10-05 22:12:05 +04:00
|
|
|
|
surface = cairo.ImageSurface(
|
|
|
|
|
cairo.FORMAT_ARGB32, max_width, sum_heights)
|
2012-10-02 20:59:02 +04:00
|
|
|
|
context = cairo.Context(surface)
|
|
|
|
|
pos_y = 0
|
2019-01-04 01:02:44 +03:00
|
|
|
|
PROGRESS_LOGGER.info('Step 6 - Drawing')
|
2018-01-14 03:48:17 +03:00
|
|
|
|
for page, width, height in zip(self.pages, widths, heights):
|
2012-10-02 20:59:02 +04:00
|
|
|
|
pos_x = (max_width - width) / 2
|
2012-10-05 22:12:05 +04:00
|
|
|
|
page.paint(context, pos_x, pos_y, scale=dppx, clip=True)
|
2012-10-02 20:59:02 +04:00
|
|
|
|
pos_y += height
|
2012-12-29 04:00:30 +04:00
|
|
|
|
return surface, max_width, sum_heights
|
2012-10-02 20:59:02 +04:00
|
|
|
|
|
2012-12-29 04:00:30 +04:00
|
|
|
|
def write_png(self, target=None, resolution=96):
|
|
|
|
|
"""Paint the pages vertically to a single PNG image.
|
|
|
|
|
|
|
|
|
|
There is no decoration around pages other than those specified in CSS
|
|
|
|
|
with ``@page`` rules. The final image is as wide as the widest page.
|
|
|
|
|
Each page is below the previous one, centered horizontally.
|
|
|
|
|
|
|
|
|
|
:param target:
|
|
|
|
|
A filename, file-like object, or :obj:`None`.
|
|
|
|
|
:type resolution: float
|
|
|
|
|
:param resolution:
|
|
|
|
|
The output resolution in PNG pixels per CSS inch. At 96 dpi
|
|
|
|
|
(the default), PNG pixels match the CSS ``px`` unit.
|
|
|
|
|
:returns:
|
2019-02-22 13:34:46 +03:00
|
|
|
|
A ``(png_bytes, png_width, png_height)`` tuple. ``png_bytes`` is a
|
|
|
|
|
byte string if ``target`` is :obj:`None`, otherwise :obj:`None`
|
|
|
|
|
(the image is written to ``target``). ``png_width`` and
|
|
|
|
|
``png_height`` are the size of the final image, in PNG pixels.
|
2012-12-29 04:00:30 +04:00
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
surface, max_width, sum_heights = self.write_image_surface(resolution)
|
2012-10-02 20:59:02 +04:00
|
|
|
|
if target is None:
|
|
|
|
|
target = io.BytesIO()
|
|
|
|
|
surface.write_to_png(target)
|
|
|
|
|
png_bytes = target.getvalue()
|
|
|
|
|
else:
|
|
|
|
|
surface.write_to_png(target)
|
|
|
|
|
png_bytes = None
|
2012-10-05 20:19:17 +04:00
|
|
|
|
return png_bytes, max_width, sum_heights
|