WeasyPrint/weasyprint/document.py

"""
    weasyprint.document
    -------------------

"""

import collections
import functools
import hashlib
import io
import math
import shutil
import zlib
from os.path import basename
from subprocess import run
from urllib.parse import unquote, urlsplit

import pydyf
from fontTools import subset
from fontTools.ttLib import TTFont, TTLibError
from PIL import Image
from weasyprint.layout import LayoutContext

from . import CSS, Attachment, __version__
from .css import get_all_computed_styles
from .css.counters import CounterStyle
from .css.targets import TargetCollector
from .draw import draw_page, stacked
from .fonts import FontConfiguration
from .formatting_structure import boxes
from .formatting_structure.build import build_formatting_structure
from .html import W3C_DATE_RE, get_html_metadata
from .images import get_image_from_uri as original_get_image_from_uri
from .layout import layout_document
from .layout.percentages import percentage
from .logger import LOGGER, PROGRESS_LOGGER
from .text import ffi, pango
from .urls import URLFetchingError


def _w3c_date_to_pdf(string, attr_name):
    """Tranform W3C date to PDF format."""
    if string is None:
        return None
    match = W3C_DATE_RE.match(string)
    if match is None:
        LOGGER.warning(f'Invalid {attr_name} date: {string!r}')
        return None
    groups = match.groupdict()
    pdf_date = ''
    found = groups['hour']
    for key in ('second', 'minute', 'hour', 'day', 'month', 'year'):
        if groups[key]:
            found = True
            pdf_date = groups[key] + pdf_date
        elif found:
            pdf_date = f'{(key in ("day", "month")):02d}{pdf_date}'
    if groups['hour']:
        assert groups['minute']
        if groups['tz_hour']:
            assert groups['tz_hour'].startswith(('+', '-'))
            assert groups['tz_minute']
            tz_hour = int(groups['tz_hour'])
            tz_minute = int(groups['tz_minute'])
            pdf_date += f"{tz_hour:+03d}'{tz_minute:02d}"
        else:
            pdf_date += 'Z'
    return pdf_date


class Font:
    def __init__(self, file_content, pango_font):
        pango_metrics = pango.pango_font_get_metrics(pango_font, ffi.NULL)
        font_description = pango.pango_font_describe(pango_font)
        font_family = ffi.string(pango.pango_font_description_get_family(
            font_description))
        font_size = pango.pango_font_description_get_size(font_description)
        sha = hashlib.sha256()
        sha.update(file_content)

        self.file_content = file_content
        self.hash = ''.join(
            chr(65 + letter % 26) for letter in sha.digest()[:6])
        self.name = (
            b'/' + self.hash.encode('ascii') + b'+' +
            font_family.replace(b' ', b''))
        self.family = font_family
        self.flags = 4
        self.italic_angle = 0
        self.ascent = int(
            pango.pango_font_metrics_get_ascent(pango_metrics) /
            font_size * 1000)
        self.descent = -int(
            pango.pango_font_metrics_get_descent(pango_metrics) /
            font_size * 1000)
        self.stemv = 80
        self.stemh = 80
        self.bbox = [0, 0, 0, 0]
        self.widths = {}
        self.cmap = {}


class Context(pydyf.Stream):
    """PDF stream object with context storing alpha states."""
    def __init__(self, document, page_rectangle, alpha_states, x_objects,
                 patterns, shadings, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.compress = True
        self.page_rectangle = page_rectangle
        self._document = document
        self._alpha_states = alpha_states
        self._x_objects = x_objects
        self._patterns = patterns
        self._shadings = shadings
        self._current_color = self._current_color_stroke = None
        self._current_alpha = self._current_alpha_stroke = None
        self._current_font = self._current_font_size = None
        self._old_font = self._old_font_size = None

        # These objects are used in text.show_first_line
        self.length = ffi.new('unsigned int *')
        self.ink_rect = ffi.new('PangoRectangle *')
        self.logical_rect = ffi.new('PangoRectangle *')

    def pop_state(self):
        super().pop_state()
        self._current_color = self._current_color_stroke = None
        self._current_alpha = self._current_alpha_stroke = None
        self._current_font = None

    def begin_text(self):
        if self.stream[-1] == b'ET':
            self._current_font = self._old_font
            self.stream.pop()
        else:
            super().begin_text()

    def end_text(self):
        self._old_font, self._current_font = self._current_font, None
        super().end_text()

    def set_color_rgb(self, r, g, b, stroke=False):
        if stroke:
            if (r, g, b) == self._current_color_stroke:
                return
            else:
                self._current_color_stroke = (r, g, b)
        else:
            if (r, g, b) == self._current_color:
                return
            else:
                self._current_color = (r, g, b)

        super().set_color_rgb(r, g, b, stroke)

    def set_font_size(self, font, size):
        if (font, size) == self._current_font:
            return
        self._current_font = (font, size)
        super().set_font_size(font, size)

    def set_alpha(self, alpha, stroke=False):
        if stroke:
            if alpha == self._current_alpha_stroke:
                return
            else:
                self._current_alpha_stroke = alpha
        else:
            if alpha == self._current_alpha:
                return
            else:
                self._current_alpha = alpha

        if alpha not in self._alpha_states:
            self._alpha_states[alpha] = pydyf.Dictionary()
            if stroke in (None, False):
                self._alpha_states[alpha]['ca'] = alpha
            if stroke in (None, True):
                self._alpha_states[alpha]['CA'] = alpha
        self.set_state(alpha)

    def add_font(self, font_hash, font_content, pango_font):
        self._document.fonts[font_hash] = Font(font_content, pango_font)
        return self._document.fonts[font_hash]

    def get_fonts(self):
        return self._document.fonts

    def sub_context(self, *args, **kwargs):
        return Context(
            self._document, self.page_rectangle, self._alpha_states,
            self._x_objects, self._patterns, self._shadings, *args, **kwargs)

    def push_group(self, bounding_box):
        alpha_states = pydyf.Dictionary()
        x_objects = pydyf.Dictionary()
        patterns = pydyf.Dictionary()
        shadings = pydyf.Dictionary()
        resources = pydyf.Dictionary({
            'ExtGState': alpha_states,
            'XObject': x_objects,
            'Pattern': patterns,
            'Shading': shadings,
        })
        extra = pydyf.Dictionary({
            'Type': '/XObject',
            'Subtype': '/Form',
            'BBox': pydyf.Array(bounding_box),
            'Resources': resources,
            'Group': pydyf.Dictionary({
                'Type': '/Group',
                'S': '/Transparency',
                'I': 'true',
                'CS': '/DeviceRGB',
            }),
        })
        group = Context(
            self._document, self.page_rectangle, alpha_states, x_objects,
            patterns, shadings, extra=extra)
        group.id = f'x{len(self._x_objects)}'
        group._parent = self
        self._x_objects[group.id] = group
        return group

    def pop_group(self):
        return self._parent

    def add_image(self, pillow_image, image_rendering, optimize_image):
        image_format = pillow_image.format
        image_mode = pillow_image.mode
        if image_mode in ('RGB', 'RGBA', 'P'):
            color_space = '/DeviceRGB'
        elif image_mode in ('1', 'L'):
            color_space = '/DeviceGray'
        elif image_mode == 'CMYK':
            color_space = '/DeviceCMYK'

        if image_mode == ('1', 'P'):
            pillow_image = pillow_image.convert('RGB')

        interpolate = 'true' if image_rendering == 'auto' else 'false'
        extra = pydyf.Dictionary({
            'Type': '/XObject',
            'Subtype': '/Image',
            'Width': pillow_image.width,
            'Height': pillow_image.height,
            'ColorSpace': color_space,
            'BitsPerComponent': 8,
            'Interpolate': interpolate,
        })

        image_file = io.BytesIO()
        if image_format == 'JPEG':
            extra['Filter'] = '/DCTDecode'
            pillow_image.save(
                image_file, format='JPEG', optimize=optimize_image)
        else:
            extra['Filter'] = '/JPXDecode'
            if image_mode == 'RGBA':
                alpha = pillow_image.getchannel('A')
                pillow_image = pillow_image.convert('RGB')
                alpha_file = io.BytesIO()
                alpha.save(
                    alpha_file, format='JPEG2000', optimize=optimize_image,
                    num_resolutions=1)
                extra['SMask'] = pydyf.Stream([alpha_file.getvalue()], extra={
                    'Filter': '/JPXDecode',
                    'Type': '/XObject',
                    'Subtype': '/Image',
                    'Width': pillow_image.width,
                    'Height': pillow_image.height,
                    'ColorSpace': '/DeviceGray',
                    'BitsPerComponent': 8,
                    'Interpolate': interpolate,
                })
            # Set number of resolutions to 1 because of
            # https://github.com/uclouvain/openjpeg/issues/215
            pillow_image.save(
                image_file, format='JPEG2000', optimize=optimize_image,
                num_resolutions=1)
        stream = [image_file.getvalue()]

        xobject = pydyf.Stream(stream, extra=extra)
        image_name = f'Im{len(self._x_objects)}'
        self._x_objects[image_name] = xobject
        return image_name

    def add_pattern(self, x, y, width, height, repeat_width, repeat_height):
        alpha_states = pydyf.Dictionary()
        x_objects = pydyf.Dictionary()
        patterns = pydyf.Dictionary()
        shadings = pydyf.Dictionary()
        resources = pydyf.Dictionary({
            'ExtGState': alpha_states,
            'XObject': x_objects,
            'Pattern': patterns,
            'Shading': shadings,
        })
        matrix = (1, 0, 0, -1, x, self.page_rectangle[3] - y)
        extra = pydyf.Dictionary({
            'PatternType': 1,
            'BBox': pydyf.Array([0, 0, width, height]),
            'XStep': repeat_width,
            'YStep': repeat_height,
            'TilingType': 1,
            'PaintType': 1,
            'Matrix': pydyf.Array(0.75 * i for i in matrix),
            'Resources': resources,
        })
        pattern = Context(
            self._document, self.page_rectangle, alpha_states, x_objects,
            patterns, shadings, extra=extra)
        pattern.id = f'p{len(self._patterns)}'
        self._patterns[pattern.id] = pattern
        return pattern

    def add_shading(self):
        shading = pydyf.Dictionary()
        shading.id = f's{len(self._shadings)}'
        self._shadings[shading.id] = shading
        return shading


BookmarkSubtree = collections.namedtuple(
    'BookmarkSubtree', ('label', 'destination', 'children', 'state'))


def _write_pdf_attachment(pdf, attachment, url_fetcher):
    """Write an attachment to the PDF stream.

    :return:
        the attachment PDF dictionary.

    """
    # Attachments from document links like <link> or <a> can only be URLs.
    # They're passed in as tuples
    url = ''
    if isinstance(attachment, tuple):
        url, description = attachment
        attachment = Attachment(
            url=url, url_fetcher=url_fetcher, description=description)
    elif not isinstance(attachment, Attachment):
        attachment = Attachment(guess=attachment, url_fetcher=url_fetcher)

    try:
        with attachment.source as (source_type, source, url, _):
            if isinstance(source, bytes):
                source = io.BytesIO(source)
            uncompressed_length = 0
            stream = b''
            md5 = hashlib.md5()
            compress = zlib.compressobj()
            for data in iter(lambda: source.read(4096), b''):
                uncompressed_length += len(data)
                md5.update(data)
                compressed = compress.compress(data)
                stream += compressed
            compressed = compress.flush(zlib.Z_FINISH)
            stream += compressed
            file_extra = pydyf.Dictionary({
                'Type': '/EmbeddedFile',
                'Filter': '/FlateDecode',
                'Params': pydyf.Dictionary({
                    'CheckSum': f'<{md5.hexdigest()}>',
                    'Size': uncompressed_length,
                })
            })
            file_stream = pydyf.Stream([stream], file_extra)
            pdf.add_object(file_stream)

    except URLFetchingError as exception:
        LOGGER.error('Failed to load attachment: %s', exception)
        return

    # TODO: Use the result object from a URL fetch operation to provide more
    # details on the possible filename.
    if url and urlsplit(url).path:
        filename = basename(unquote(urlsplit(url).path))
    else:
        filename = 'attachment.bin'

    attachment = pydyf.Dictionary({
        'Type': '/Filespec',
        'F': pydyf.String(),
        'UF': pydyf.String(filename),
        'EF': pydyf.Dictionary({'F': file_stream.reference}),
        'Desc': pydyf.String(attachment.description or ''),
    })
    pdf.add_object(attachment)
    return attachment


def create_bookmarks(bookmarks, pdf, parent=None):
    count = len(bookmarks)
    outlines = []
    for title, (page, x, y), children, state in bookmarks:
        destination = pydyf.Array((
            pdf.objects[pdf.pages['Kids'][page * 3]].reference,
            '/XYZ', x, y, 0))
        outline = pydyf.Dictionary({
            'Title': pydyf.String(title), 'Dest': destination})
        pdf.add_object(outline)
        children_outlines, children_count = create_bookmarks(
            children, pdf, parent=outline)
        outline['Count'] = children_count
        if state == 'closed':
            outline['Count'] *= -1
        else:
            count += children_count
        if outlines:
            outline['Prev'] = outlines[-1].reference
            outlines[-1]['Next'] = outline.reference
        if children_outlines:
            outline['First'] = children_outlines[0].reference
            outline['Last'] = children_outlines[-1].reference
        if parent is not None:
            outline['Parent'] = parent.reference
        outlines.append(outline)
    return outlines, count


def add_hyperlinks(links, anchors, matrix, pdf, page, names):
    """Include hyperlinks in current PDF page."""
    for link in links:
        link_type, link_target, rectangle = link
        x1, y1 = matrix.transform_point(*rectangle[:2])
        x2, y2 = matrix.transform_point(*rectangle[2:])
        if link_type in ('internal', 'external'):
            annot = pydyf.Dictionary({
                'Type': '/Annot',
                'Subtype': '/Link',
                'Rect': pydyf.Array([x1, y1, x2, y2]),
                'BS': pydyf.Dictionary({'W': 0}),
            })
            if link_type == 'internal':
                annot['Dest'] = pydyf.String(link_target)
            else:
                annot['A'] = pydyf.Dictionary({
                    'Type': '/Action',
                    'S': '/URI',
                    'URI': pydyf.String(link_target),
                })
            pdf.add_object(annot)
            if 'Annots' not in page:
                page['Annots'] = pydyf.Array()
            page['Annots'].append(annot.reference)

    for anchor in anchors:
        anchor_name, x, y = anchor
        x, y = matrix.transform_point(x, y)
        names.append(pydyf.String(anchor_name))
        names.append(pydyf.Array([page.reference, '/XYZ', x, y, 0]))


def rectangle_aabb(matrix, pos_x, pos_y, width, height):
    """Apply a transformation matrix to an axis-aligned rectangle.

    Return its axis-aligned bounding box as ``(x1, y1, x2, y2)``.

    """
    transform_point = matrix.transform_point
    x1, y1 = transform_point(pos_x, pos_y)
    x2, y2 = transform_point(pos_x + width, pos_y)
    x3, y3 = transform_point(pos_x, pos_y + height)
    x4, y4 = transform_point(pos_x + width, pos_y + height)
    box_x1 = min(x1, x2, x3, x4)
    box_y1 = min(y1, y2, y3, y4)
    box_x2 = max(x1, x2, x3, x4)
    box_y2 = max(y1, y2, y3, y4)
    return box_x1, box_y1, box_x2, box_y2


def resolve_links(pages):
    """Resolve internal hyperlinks.

    Links to a missing anchor are removed with a warning.

    If multiple anchors have the same name, the first one is used.

    :returns:
        A generator yielding lists (one per page) like :attr:`Page.links`,
        except that ``target`` for internal hyperlinks is
        ``(page_number, x, y)`` instead of an anchor name.
        The page number is a 0-based index into the :attr:`pages` list,
        and ``x, y`` are in CSS pixels from the top-left of the page.

    """
    anchors = set()
    paged_anchors = []
    for i, page in enumerate(pages):
        paged_anchors.append([])
        for anchor_name, (point_x, point_y) in page.anchors.items():
            if anchor_name not in anchors:
                paged_anchors[-1].append((anchor_name, point_x, point_y))
                anchors.add(anchor_name)
    for page in pages:
        page_links = []
        for link in page.links:
            link_type, anchor_name, rectangle = link
            if link_type == 'internal':
                if anchor_name not in anchors:
                    LOGGER.error(
                        'No anchor #%s for internal URI reference',
                        anchor_name)
                else:
                    page_links.append((link_type, anchor_name, rectangle))
            else:
                # External link
                page_links.append(link)
        yield page_links, paged_anchors.pop(0)


class Matrix(list):
    def __init__(self, a=1, b=0, c=0, d=1, e=0, f=0, matrix=None):
        if matrix is None:
            matrix = [[a, b, 0], [c, d, 0], [e, f, 1]]
        super().__init__(matrix)

    def __matmul__(self, other):
        assert len(self[0]) == len(other) == len(other[0]) == 3
        return Matrix(matrix=[
            [sum(self[i][k] * other[k][j] for k in range(3)) for j in range(3)]
            for i in range(len(self))])

    @property
    def determinant(self):
        assert len(self) == len(self[0]) == 3
        return (
            self[0][0] * (self[1][1] * self[2][2] - self[1][2] * self[2][1]) -
            self[1][0] * (self[0][1] * self[2][2] - self[0][2] * self[2][1]) +
            self[2][0] * (self[0][1] * self[1][2] - self[0][2] * self[1][1]))

    def transform_point(self, x, y):
        return (Matrix(matrix=[[x, y, 1]]) @ self)[0][:2]


class Page:
    """Represents a single rendered page.

    .. versionadded:: 0.15

    Should be obtained from :attr:`Document.pages` but not
    instantiated directly.

    """
    def __init__(self, page_box):
        #: The page width, including margins, in CSS pixels.
        self.width = page_box.margin_width()

        #: The page height, including margins, in CSS pixels.
        self.height = page_box.margin_height()

        #: The page bleed widths as a :obj:`dict` with ``'top'``, ``'right'``,
        #: ``'bottom'`` and ``'left'`` as keys, and values in CSS pixels.
        self.bleed = {
            side: page_box.style[f'bleed_{side}'].value
            for side in ('top', 'right', 'bottom', 'left')}

        #: The :obj:`list` of ``(bookmark_level, bookmark_label, target)``
        #: :obj:`tuples <tuple>`. ``bookmark_level`` and ``bookmark_label``
        #: are respectively an :obj:`int` and a :obj:`string <str>`, based on
        #: the CSS properties of the same names. ``target`` is an ``(x, y)``
        #: point in CSS pixels from the top-left of the page.
        self.bookmarks = []

        #: The :obj:`list` of ``(link_type, target, rectangle)`` :obj:`tuples
        #: <tuple>`. A ``rectangle`` is ``(x, y, width, height)``, in CSS
        #: pixels from the top-left of the page. ``link_type`` is one of three
        #: strings:
        #:
        #: * ``'external'``: ``target`` is an absolute URL
        #: * ``'internal'``: ``target`` is an anchor name (see
        #:   :attr:`Page.anchors`).
        #:   The anchor might be defined in another page,
        #:   in multiple pages (in which case the first occurence is used),
        #:   or not at all.
        #: * ``'attachment'``: ``target`` is an absolute URL and points
        #:   to a resource to attach to the document.
        self.links = []

        #: The :obj:`dict` mapping each anchor name to its target, an
        #: ``(x, y)`` point in CSS pixels from the top-left of the page.
        self.anchors = {}

        self._gather_links_and_bookmarks(page_box)
        self._page_box = page_box

    def _gather_links_and_bookmarks(self, box, parent_matrix=None):
        # Get box transformation matrix.
        # "Transforms apply to block-level and atomic inline-level elements,
        #  but do not apply to elements which may be split into
        #  multiple inline-level boxes."
        # http://www.w3.org/TR/css3-2d-transforms/#introduction
        if box.style['transform'] and not isinstance(box, boxes.InlineBox):
            border_width = box.border_width()
            border_height = box.border_height()
            origin_x, origin_y = box.style['transform_origin']
            offset_x = percentage(origin_x, border_width)
            offset_y = percentage(origin_y, border_height)
            origin_x = box.border_box_x() + offset_x
            origin_y = box.border_box_y() + offset_y

            matrix = Matrix(e=origin_x, f=origin_y)
            for name, args in box.style['transform']:
                a, b, c, d, e, f = 1, 0, 0, 1, 0, 0
                if name == 'scale':
                    a, d = args
                elif name == 'rotate':
                    a = d = math.cos(args)
                    b = math.sin(args)
                    c = -b
                elif name == 'translate':
                    e = percentage(args[0], border_width)
                    f = percentage(args[1], border_height)
                elif name == 'skew':
                    b, c = math.tan(args[1]), math.tan(args[0])
                else:
                    assert name == 'matrix'
                    a, b, c, d, e, f = args
                matrix = Matrix(a, b, c, d, e, f) @ matrix
            box.transformation_matrix = (
                Matrix(e=-origin_x, f=-origin_y) @ matrix)
            if parent_matrix:
                matrix = box.transformation_matrix @ parent_matrix
            else:
                matrix = box.transformation_matrix
        else:
            matrix = parent_matrix

        bookmark_label = box.bookmark_label
        if box.style['bookmark_level'] == 'none':
            bookmark_level = None
        else:
            bookmark_level = box.style['bookmark_level']
        state = box.style['bookmark_state']
        link = box.style['link']
        anchor_name = box.style['anchor']
        has_bookmark = bookmark_label and bookmark_level
        # 'link' is inherited but redundant on text boxes
        has_link = link and not isinstance(box, (boxes.TextBox, boxes.LineBox))
        # In case of duplicate IDs, only the first is an anchor.
        has_anchor = anchor_name and anchor_name not in self.anchors
        is_attachment = hasattr(box, 'is_attachment') and box.is_attachment

        if has_bookmark or has_link or has_anchor:
            pos_x, pos_y, width, height = box.hit_area()
            if has_link:
                token_type, link = link
                assert token_type == 'url'
                link_type, target = link
                assert isinstance(target, str)
                if link_type == 'external' and is_attachment:
                    link_type = 'attachment'
                if matrix:
                    link = (link_type, target, rectangle_aabb(
                        matrix, pos_x, pos_y, width, height))
                else:
                    link = (link_type, target, (
                        pos_x, pos_y, pos_x + width, pos_y + height))
                self.links.append(link)
            if matrix and (has_bookmark or has_anchor):
                pos_x, pos_y = matrix.transform_point(pos_x, pos_y)
            if has_bookmark:
                self.bookmarks.append(
                    (bookmark_level, bookmark_label, (pos_x, pos_y), state))
            if has_anchor:
                self.anchors[anchor_name] = pos_x, pos_y

        for child in box.all_children():
            self._gather_links_and_bookmarks(child, matrix)

    def paint(self, context, left_x=0, top_y=0, scale=1, clip=False):
        """Paint the page into the PDF file.

        :type context: :class:`pdf.Context`
        :param context:
            A context object.
        :type left_x: float
        :param left_x:
            X coordinate of the left of the page, in PDF points.
        :type top_y: float
        :param top_y:
            Y coordinate of the top of the page, in PDF points.
        :type scale: float
        :param scale:
            Zoom scale.
        :type clip: bool
        :param clip:
            Whether to clip/cut content outside the page. If false or
            not provided, content can overflow.

        """
        with stacked(context):
            # Make (0, 0) the top-left corner, and make user units CSS pixels:
            context.transform(scale, 0, 0, scale, left_x, top_y)
            if clip:
                width = self.width
                height = self.height
                context.rectangle(0, 0, width, height)
                context.clip()
            draw_page(self._page_box, context)


class DocumentMetadata:
    """Meta-information belonging to a whole :class:`Document`.

    .. versionadded:: 0.20

    New attributes may be added in future versions of WeasyPrint.

    """
    def __init__(self, title=None, authors=None, description=None,
                 keywords=None, generator=None, created=None, modified=None,
                 attachments=None):
        #: The title of the document, as a string or :obj:`None`.
        #: Extracted from the ``<title>`` element in HTML
        #: and written to the ``/Title`` info field in PDF.
        self.title = title
        #: The authors of the document, as a list of strings.
        #: (Defaults to the empty list.)
        #: Extracted from the ``<meta name=author>`` elements in HTML
        #: and written to the ``/Author`` info field in PDF.
        self.authors = authors or []
        #: The description of the document, as a string or :obj:`None`.
        #: Extracted from the ``<meta name=description>`` element in HTML
        #: and written to the ``/Subject`` info field in PDF.
        self.description = description
        #: Keywords associated with the document, as a list of strings.
        #: (Defaults to the empty list.)
        #: Extracted from ``<meta name=keywords>`` elements in HTML
        #: and written to the ``/Keywords`` info field in PDF.
        self.keywords = keywords or []
        #: The name of one of the software packages
        #: used to generate the document, as a string or :obj:`None`.
        #: Extracted from the ``<meta name=generator>`` element in HTML
        #: and written to the ``/Creator`` info field in PDF.
        self.generator = generator
        #: The creation date of the document, as a string or :obj:`None`.
        #: Dates are in one of the six formats specified in
        #: `W3C’s profile of ISO 8601 <http://www.w3.org/TR/NOTE-datetime>`_.
        #: Extracted from the ``<meta name=dcterms.created>`` element in HTML
        #: and written to the ``/CreationDate`` info field in PDF.
        self.created = created
        #: The modification date of the document, as a string or :obj:`None`.
        #: Dates are in one of the six formats specified in
        #: `W3C’s profile of ISO 8601 <http://www.w3.org/TR/NOTE-datetime>`_.
        #: Extracted from the ``<meta name=dcterms.modified>`` element in HTML
        #: and written to the ``/ModDate`` info field in PDF.
        self.modified = modified
        #: File attachments, as a list of tuples of URL and a description or
        #: :obj:`None`. (Defaults to the empty list.)
        #: Extracted from the ``<link rel=attachment>`` elements in HTML
        #: and written to the ``/EmbeddedFiles`` dictionary in PDF.
        #:
        #: .. versionadded:: 0.22
        self.attachments = attachments or []


class Document:
    """A rendered document ready to be painted on a cairo surface.

    Typically obtained from :meth:`HTML.render() <weasyprint.HTML.render>`, but
    can also be instantiated directly with a list of :class:`pages <Page>`, a
    set of :class:`metadata <DocumentMetadata>`, a :func:`url_fetcher
    <weasyprint.default_url_fetcher>` function, and a :class:`font_config
    <weasyprint.fonts.FontConfiguration>`.

    """

    @classmethod
    def _build_layout_context(cls, html, stylesheets,
                              presentational_hints=False,
                              optimize_images=False, font_config=None,
                              counter_style=None, image_cache=None):
        if font_config is None:
            font_config = FontConfiguration()
        if counter_style is None:
            counter_style = CounterStyle()
        target_collector = TargetCollector()
        page_rules = []
        user_stylesheets = []
        image_cache = {} if image_cache is None else image_cache
        for css in stylesheets or []:
            if not hasattr(css, 'matcher'):
                css = CSS(
                    guess=css, media_type=html.media_type,
                    font_config=font_config, counter_style=counter_style)
            user_stylesheets.append(css)
        style_for = get_all_computed_styles(
            html, user_stylesheets, presentational_hints, font_config,
            counter_style, page_rules, target_collector)
        get_image_from_uri = functools.partial(
            original_get_image_from_uri, image_cache, html.url_fetcher,
            optimize_images)
        PROGRESS_LOGGER.info('Step 4 - Creating formatting structure')
        context = LayoutContext(
            style_for, get_image_from_uri, font_config, counter_style,
            target_collector)
        return context

    @classmethod
    def _render(cls, html, stylesheets, presentational_hints=False,
                optimize_images=False, font_config=None, counter_style=None,
                image_cache=None):
        if font_config is None:
            font_config = FontConfiguration()

        if counter_style is None:
            counter_style = CounterStyle()

        context = cls._build_layout_context(
            html, stylesheets, presentational_hints, optimize_images,
            font_config, counter_style, image_cache)

        root_box = build_formatting_structure(
            html.etree_element, context.style_for, context.get_image_from_uri,
            html.base_url, context.target_collector, counter_style)

        page_boxes = layout_document(html, root_box, context)
        rendering = cls(
            [Page(page_box) for page_box in page_boxes],
            DocumentMetadata(**get_html_metadata(html)),
            html.url_fetcher, font_config)
        return rendering

    def _use_references(self, pdf, resources):
        # XObjects
        for key, x_object in resources.get('XObject', {}).items():
            pdf.add_object(x_object)
            resources['XObject'][key] = x_object.reference
            if 'Resources' in x_object.extra:
                self._use_references(pdf, x_object.extra['Resources'])
                pdf.add_object(x_object.extra['Resources'])
                x_object.extra['Resources'] = (
                    x_object.extra['Resources'].reference)
        # Patterns
        for key, pattern in resources.get('Pattern', {}).items():
            pdf.add_object(pattern)
            resources['Pattern'][key] = pattern.reference
            if 'Resources' in pattern.extra:
                self._use_references(pdf, pattern.extra['Resources'])
                pdf.add_object(pattern.extra['Resources'])
                pattern.extra['Resources'] = (
                    pattern.extra['Resources'].reference)
        # Shadings
        for key, shading in resources.get('Shading', {}).items():
            pdf.add_object(shading)
            resources['Shading'][key] = shading.reference

    def __init__(self, pages, metadata, url_fetcher, font_config):
        #: A list of :class:`Page` objects.
        self.pages = pages
        #: A :class:`DocumentMetadata` object.
        #: Contains information that does not belong to a specific page
        #: but to the whole document.
        self.metadata = metadata
        #: A function or other callable with the same signature as
        #: :func:`default_url_fetcher` called to fetch external resources such
        #: as stylesheets and images.  (See :ref:`url-fetchers`.)
        self.url_fetcher = url_fetcher
        #: A :obj:`dict` of fonts used by the document. Keys are hashes used to
        #: identify fonts, values are :class:`Font` objects.
        self.fonts = {}
        # Keep a reference to font_config to avoid its garbage collection until
        # rendering is destroyed. This is needed as font_config.__del__ removes
        # fonts that may be used when rendering
        self._font_config = font_config

    def copy(self, pages='all'):
        """Take a subset of the pages.

        .. versionadded:: 0.15

        :type pages: :term:`iterable`
        :param pages:
            An iterable of :class:`Page` objects from :attr:`pages`.
        :return:
            A new :class:`Document` object.

        Examples:

        Write two PDF files for odd-numbered and even-numbered pages::

            # Python lists count from 0 but pages are numbered from 1.
            # [::2] is a slice of even list indexes but odd-numbered pages.
            document.copy(document.pages[::2]).write_pdf('odd_pages.pdf')
            document.copy(document.pages[1::2]).write_pdf('even_pages.pdf')

        Write each page to a numbred PNG file::

            for i, page in enumerate(document.pages):
                document.copy(page).write_png(f'page_{i}.png')

        Combine multiple documents into one PDF file,
        using metadata from the first::

            all_pages = [p for doc in documents for p in doc.pages]
            documents[0].copy(all_pages).write_pdf('combined.pdf')

        """
        if pages == 'all':
            pages = self.pages
        elif not isinstance(pages, list):
            pages = list(pages)
        return type(self)(
            pages, self.metadata, self.url_fetcher, self._font_config)

    def write_pdf(self, target=None, zoom=1, attachments=None, finisher=None):
        """Paint the pages in a PDF file, with meta-data.

        PDF files written directly by cairo do not have meta-data such as
        bookmarks/outlines and hyperlinks.

        :type target: str, pathlib.Path or file object
        :param target:
            A filename where the PDF file is generated, a file object, or
            :obj:`None`.
        :type zoom: float
        :param zoom:
            The zoom factor in PDF units per CSS units.  **Warning**:
            All CSS units are affected, including physical units like
            ``cm`` and named sizes like ``A4``.  For values other than
            1, the physical CSS units will thus be "wrong".
        :type attachments: list
        :param attachments: A list of additional file attachments for the
            generated PDF document or :obj:`None`. The list's elements are
            :class:`Attachment` objects, filenames, URLs or file-like objects.
        :param finisher: A finisher function, that accepts the document and a
            ``pydyf.PDF`` object as parameters, can be passed to perform
            post-processing on the PDF right before the trailer is written.
        :returns:
            The PDF as :obj:`bytes` if ``target`` is not provided or
            :obj:`None`, otherwise :obj:`None` (the PDF is written to
            ``target``).

        """
        # 0.75 = 72 PDF point per inch / 96 CSS pixel per inch
        scale = zoom * 0.75

        PROGRESS_LOGGER.info('Step 6 - Creating PDF')

        pdf = pydyf.PDF()
        alpha_states = pydyf.Dictionary()
        x_objects = pydyf.Dictionary()
        patterns = pydyf.Dictionary()
        shadings = pydyf.Dictionary()
        resources = pydyf.Dictionary({
            'ExtGState': alpha_states,
            'XObject': x_objects,
            'Pattern': patterns,
            'Shading': shadings,
        })
        pdf.add_object(resources)
        pdf_names = pydyf.Array()

        # Links and anchors
        page_links_and_anchors = list(resolve_links(self.pages))
        attachment_links = [
            [link for link in page_links if link[0] == 'attachment']
            for page_links, page_anchors in page_links_and_anchors]

        # Annotations
        annot_files = {}
        # A single link can be split in multiple regions. We don't want to
        # embed a file multiple times of course, so keep a reference to every
        # embedded URL and reuse the object number.
        for page_links in attachment_links:
            for link_type, annot_target, rectangle in page_links:
                if link_type == 'attachment' and target not in annot_files:
                    # TODO: Use the title attribute as description. The comment
                    # above about multiple regions won't always be correct,
                    # because two links might have the same href, but different
                    # titles.
                    annot_files[annot_target] = _write_pdf_attachment(
                        pdf, (annot_target, None), self.url_fetcher)

        # Bookmarks
        root = []
        # At one point in the document, for each "output" depth, how much
        # to add to get the source level (CSS values of bookmark-level).
        # E.g. with <h1> then <h3>, level_shifts == [0, 1]
        # 1 means that <h3> has depth 3 - 1 = 2 in the output.
        skipped_levels = []
        last_by_depth = [root]
        previous_level = 0

        for page_number, (page, links_and_anchors, page_links) in enumerate(
                zip(self.pages, page_links_and_anchors, attachment_links)):
            # Draw from the top-left corner
            matrix = Matrix(scale, 0, 0, -scale, 0, page.height * scale)

            # Links and anchors
            links, anchors = links_and_anchors

            page_width = scale * (
                page.width + page.bleed['left'] + page.bleed['right'])
            page_height = scale * (
                page.height + page.bleed['top'] + page.bleed['bottom'])
            left = -scale * page.bleed['left']
            top = -scale * page.bleed['top']
            right = left + page_width
            bottom = top + page_height

            page_rectangle = (
                left / scale, top / scale, right / scale, bottom / scale)
            stream = Context(
                self, page_rectangle, alpha_states, x_objects, patterns,
                shadings)
            stream.transform(1, 0, 0, -1, 0, page.height * scale)
            page.paint(stream, scale=scale)
            pdf.add_object(stream)

            pdf_page = pydyf.Dictionary({
                'Type': '/Page',
                'Parent': pdf.pages.reference,
                'MediaBox': pydyf.Array([left, top, right, bottom]),
                'Contents': stream.reference,
                'Resources': resources.reference,
            })
            pdf.add_page(pdf_page)

            add_hyperlinks(links, anchors, matrix, pdf, pdf_page, pdf_names)

            # Bleed
            bleed = {key: value * 0.75 for key, value in page.bleed.items()}

            trim_left = left + bleed['left']
            trim_top = top + bleed['top']
            trim_right = right - bleed['right']
            trim_bottom = bottom - bleed['bottom']

            # Arbitrarly set PDF BleedBox between CSS bleed box (MediaBox) and
            # CSS page box (TrimBox) at most 10 points from the TrimBox.
            bleed_left = trim_left - min(10, bleed['left'])
            bleed_top = trim_top - min(10, bleed['top'])
            bleed_right = trim_right + min(10, bleed['right'])
            bleed_bottom = trim_bottom + min(10, bleed['bottom'])

            pdf_page['TrimBox'] = pydyf.Array([
                trim_left, trim_top, trim_right, trim_bottom])
            pdf_page['BleedBox'] = pydyf.Array([
                bleed_left, bleed_top, bleed_right, bleed_bottom])

            # Annotations
            # TODO: splitting a link into multiple independent rectangular
            # annotations works well for pure links, but rather mediocre for
            # other annotations and fails completely for transformed (CSS) or
            # complex link shapes (area). It would be better to use /AP for all
            # links and coalesce link shapes that originate from the same HTML
            # link. This would give a feeling similiar to what browsers do with
            # links that span multiple lines.
            for link_type, annot_target, rectangle in page_links:
                annot_file = annot_files[annot_target]
                if link_type == 'attachment' and annot_file is not None:
                    rectangle = (
                        *matrix.transform_point(*rectangle[:2]),
                        *matrix.transform_point(*rectangle[2:]))
                    annot = pydyf.Dictionary({
                        'Type': '/Annot',
                        'Rect': pydyf.Array(rectangle),
                        'Subtype': '/FileAttachment',
                        'T': pydyf.String(),
                        'FS': annot_file.reference,
                        'AP': pydyf.Dictionary({'N': pydyf.Stream([], {
                            'Type': '/XObject',
                            'Subtype': '/Form',
                            'BBox': pydyf.Array(rectangle),
                            'Length': 0,
                        })})
                    })
                    pdf.add_object(annot)
                    if 'Annots' not in pdf_page:
                        pdf_page['Annots'] = pydyf.Array()
                    pdf_page['Annots'].append(annot.reference)

            # Bookmarks
            for level, label, (point_x, point_y), state in page.bookmarks:
                if level > previous_level:
                    # Example: if the previous bookmark is a <h2>, the next
                    # depth "should" be for <h3>. If now we get a <h6> we’re
                    # skipping two levels: append 6 - 3 - 1 = 2
                    skipped_levels.append(level - previous_level - 1)
                else:
                    temp = level
                    while temp < previous_level:
                        temp += 1 + skipped_levels.pop()
                    if temp > previous_level:
                        # We remove too many "skips", add some back:
                        skipped_levels.append(temp - previous_level - 1)

                previous_level = level
                depth = level - sum(skipped_levels)
                assert depth == len(skipped_levels)
                assert depth >= 1

                children = []
                point_x, point_y = matrix.transform_point(point_x, point_y)
                subtree = BookmarkSubtree(
                    label, (page_number, point_x, point_y), children, state)
                last_by_depth[depth - 1].append(subtree)
                del last_by_depth[depth:]
                last_by_depth.append(children)

        # Outlines
        outlines, count = create_bookmarks(root, pdf)
        if outlines:
            outlines_dictionary = pydyf.Dictionary({
                'Count': count,
                'First': outlines[0].reference,
                'Last': outlines[-1].reference,
            })
            pdf.add_object(outlines_dictionary)
            for outline in outlines:
                outline['Parent'] = outlines_dictionary.reference
            pdf.catalog['Outlines'] = outlines_dictionary.reference

        PROGRESS_LOGGER.info('Step 7 - Adding PDF metadata')

        # PDF information
        if self.metadata.title:
            pdf.info['Title'] = pydyf.String(self.metadata.title)
        if self.metadata.authors:
            pdf.info['Author'] = pydyf.String(
                ', '.join(self.metadata.authors))
        if self.metadata.description:
            pdf.info['Subject'] = pydyf.String(self.metadata.description)
        if self.metadata.keywords:
            pdf.info['Keywords'] = pydyf.String(
                ', '.join(self.metadata.keywords))
        if self.metadata.generator:
            pdf.info['Creator'] = pydyf.String(self.metadata.generator)
        pdf.info['Producer'] = pydyf.String(f'WeasyPrint {__version__}')
        if self.metadata.created:
            pdf.info['CreationDate'] = pydyf.String(
                _w3c_date_to_pdf(self.metadata.created, 'created'))
        if self.metadata.modified:
            pdf.info['ModDate'] = pydyf.String(
                _w3c_date_to_pdf(self.metadata.modified, 'modified'))

        # Embedded files
        attachments = self.metadata.attachments + (attachments or [])
        pdf_attachments = []
        for attachment in attachments:
            pdf_attachment = _write_pdf_attachment(
                pdf, attachment, self.url_fetcher)
            if pdf_attachment is not None:
                pdf_attachments.append(pdf_attachment)
        if pdf_attachments:
            content = pydyf.Dictionary({'Names': pydyf.Array()})
            for i, pdf_attachment in enumerate(pdf_attachments):
                content['Names'].append(pydyf.String(f'attachment{i}'))
                content['Names'].append(pdf_attachment.reference)
            pdf.add_object(content)
            if 'Names' not in pdf.catalog:
                pdf.catalog['Names'] = pydyf.Dictionary()
            pdf.catalog['Names']['EmbeddedFiles'] = content.reference

        # Embeded fonts
        resources['Font'] = pydyf.Dictionary()
        for font in self.fonts.values():
            # Optimize font
            try:
                full_font = io.BytesIO(font.file_content)
                optimized_font = io.BytesIO()
                ttfont = TTFont(full_font)
                options = subset.Options(
                    retain_gids=True, passthrough_tables=True)
                subsetter = subset.Subsetter(options)
                subsetter.populate(gids=font.cmap)
                subsetter.subset(ttfont)
                ttfont.save(optimized_font)
                content = optimized_font.getvalue()
            except TTLibError:
                content = font.file_content

            # Include font
            font_type = 'otf' if content[:4] == b'OTTO' else 'ttf'
            if font_type == 'otf':
                font_extra = pydyf.Dictionary({'Subtype': '/OpenType'})
            else:
                font_extra = pydyf.Dictionary({'Length1': len(content)})
            font_stream = pydyf.Stream([content], font_extra, compress=True)
            pdf.add_object(font_stream)

            widths = pydyf.Array()
            for i in sorted(font.widths):
                if i - 1 not in font.widths:
                    widths.append(i)
                    current_widths = pydyf.Array()
                    widths.append(current_widths)
                current_widths.append(font.widths[i])
            subfont_dictionary = pydyf.Dictionary({
                'Type': '/Font',
                'Subtype': f'/CIDFontType{"0" if font_type == "otf" else "2"}',
                'BaseFont': font.name,
                'CIDSystemInfo': pydyf.Dictionary({
                    'Registry': pydyf.String('Adobe'),
                    'Ordering': pydyf.String('Identity'),
                    'Supplement': 0,
                }),
                'W': widths,
                'FontDescriptor': pydyf.Dictionary({
                    'Type': '/FontDescriptor',
                    'FontName': font.name,
                    'FontFamily': pydyf.String(font.family),
                    'Flags': 32,
                    'FontBBox': pydyf.Array(font.bbox),
                    'ItalicAngle': font.italic_angle,
                    'Ascent': font.ascent,
                    'Descent': font.descent,
                    'CapHeight': font.bbox[3],
                    'StemV': font.stemv,
                    'StemH': font.stemh,
                    (f'FontFile{"3" if font_type == "otf" else "2"}'):
                        font_stream.reference,
                }),
            })
            if font_type == 'otf':
                subfont_dictionary['FontDescriptor']['Subtype'] = '/OpenType'
            pdf.add_object(subfont_dictionary)
            to_unicode = pydyf.Stream([
                b'/CIDInit /ProcSet findresource begin',
                b'12 dict begin',
                b'begincmap',
                b'/CIDSystemInfo',
                b'<< /Registry (Adobe)',
                b'/Ordering (UCS)',
                b'/Supplement 0',
                b'>> def',
                b'/CMapName /Adobe-Identity-UCS def',
                b'/CMapType 2 def',
                b'1 begincodespacerange',
                b'<0000> <ffff>',
                b'endcodespacerange',
                f'{len(font.cmap)} beginbfchar'.encode('ascii')])
            for glyph, text in font.cmap.items():
                unicode_codepoints = ''.join(
                    f'{letter.encode("utf-16-be").hex()}' for letter in text)
                to_unicode.stream.append(
                    f'<{glyph:04x}> <{unicode_codepoints}>'.encode('ascii'))
            to_unicode.stream.extend([
                b'endbfchar',
                b'endcmap',
                b'CMapName currentdict /CMap defineresource pop',
                b'end',
                b'end'])
            pdf.add_object(to_unicode)
            font_dictionary = pydyf.Dictionary({
                'Type': '/Font',
                'Subtype': '/Type0',
                'BaseFont': font.name,
                'Encoding': '/Identity-H',
                'DescendantFonts': pydyf.Array([subfont_dictionary.reference]),
                'ToUnicode': to_unicode.reference,
            })
            pdf.add_object(font_dictionary)
            resources['Font'][font.hash] = font_dictionary.reference

        self._use_references(pdf, resources)

        # Anchors
        if pdf_names:
            pdf.catalog['Names'] = pydyf.Dictionary(
                {'Dests': pydyf.Dictionary({'Names': pdf_names})})

        if finisher:
            finisher(self, pdf)

        file_obj = io.BytesIO()
        pdf.write(file_obj)

        if target is None:
            return file_obj.getvalue()
        else:
            file_obj.seek(0)
            if hasattr(target, 'write'):
                shutil.copyfileobj(file_obj, target)
            else:
                with open(target, 'wb') as fd:
                    shutil.copyfileobj(file_obj, fd)

    def write_png(self, target=None, resolution=96, antialiasing=1):
        """Paint the pages vertically to a single PNG image.

        There is no decoration around pages other than those specified in CSS
        with ``@page`` rules. The final image is as wide as the widest page.
        Each page is below the previous one, centered horizontally.

        :param target:
            A filename, file-like object, or :obj:`None`.
        :type resolution: float
        :param resolution:
            The output resolution in PNG pixels per CSS inch. At 96 dpi
            (the default), PNG pixels match the CSS ``px`` unit.
        :type antialiasing: int
        :param antialiasing:
            The antialiasing subsampling box size. Default is 1 (disabled), can
            be set to 4 for optimal (but slow) antialiasing.
        :returns:
            A ``(png_bytes, png_width, png_height)`` tuple. ``png_bytes`` is a
            byte string if ``target`` is :obj:`None`, otherwise :obj:`None`
            (the image is written to ``target``).  ``png_width`` and
            ``png_height`` are the size of the final image, in PNG pixels.

        """
        # TODO: don’t crash if GhostScript can’t be found
        # TODO: fix that for Windows
        command = [
            'gs', '-q', '-sstdout=%stderr', '-dNOPAUSE', '-dSAFER',
            f'-dTextAlphaBits={antialiasing}',
            f'-dGraphicsAlphaBits={antialiasing}', '-sDEVICE=png16m',
            f'-r{resolution}', '-sOutputFile=-', '-']
        command = run(command, input=self.write_pdf(), capture_output=True)
        pngs = command.stdout
        magic_number = b'\x89\x50\x4e\x47\x0d\x0a\x1a\x0a'

        # TODO: use a different way to find PNG files in stream
        if pngs.count(magic_number) == 1:
            if target is None:
                return pngs
            png = io.BytesIO(pngs)
        else:
            images = []
            for i, png in enumerate(pngs[8:].split(magic_number)):
                images.append(Image.open(io.BytesIO(magic_number + png)))

            width = max(image.width for image in images)
            height = sum(image.height for image in images)
            output_image = Image.new('RGBA', (width, height))
            top = 0
            for image in images:
                output_image.paste(
                    image, (int((width - image.width) / 2), top))
                top += image.height
            png = io.BytesIO()
            output_image.save(png, format='png')

        png.seek(0)

        if target is None:
            return png.read()

        if hasattr(target, 'write'):
            shutil.copyfileobj(png, target)
        else:
            with open(target, 'wb') as fd:
                shutil.copyfileobj(png, fd)
-												The Document is back! (But different.)

											
										
										
											2012-10-02 20:59:02 +04:00
+								"""
 								    weasyprint.document
 								    -------------------
 								"""
-												Small code and style improvements around bookmark-state

Related to #870.

											
										
										
											2019-05-24 00:55:56 +03:00
+								import collections
-												Use isort in tests

											
										
										
											2017-03-25 02:33:36 +03:00
+								import functools
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								import hashlib
-												The Document is back! (But different.)

											
										
										
											2012-10-02 20:59:02 +04:00
+								import io
 								import math
 								import shutil
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								import zlib
 								from os.path import basename
-												Use subprocess.run

It’s just more simple than Popen.

											
										
										
											2020-05-19 00:53:19 +03:00
+								from subprocess import run
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								from urllib.parse import unquote, urlsplit
-												The Document is back! (But different.)

											
										
										
											2012-10-02 20:59:02 +04:00
-												First steps to replace cairo with pydyf

											
										
										
											2020-04-18 01:19:35 +03:00
+								import pydyf
-												Optimize fonts

											
										
										
											2020-05-29 20:43:56 +03:00
+								from fontTools import subset
-												Don’t crash when font can’t be optimized

											
										
										
											2020-05-30 01:30:13 +03:00
+								from fontTools.ttLib import TTFont, TTLibError
-												Fix many tests

											
										
										
											2020-05-13 02:02:43 +03:00
+								from PIL import Image
-												Refactor LayoutContext building

 this will provide a simpler API for testing context-using functions inside tests

											
										
										
											2019-07-23 08:12:08 +03:00
+								from weasyprint.layout import LayoutContext
-												The Document is back! (But different.)

											
										
										
											2012-10-02 20:59:02 +04:00
-												Lint

											
										
										
											2020-05-13 00:54:42 +03:00
+								from . import CSS, Attachment, __version__
-												The Document is back! (But different.)

											
										
										
											2012-10-02 20:59:02 +04:00
+								from .css import get_all_computed_styles
-												Add default counter class

											
										
										
											2019-12-24 17:56:24 +03:00
+								from .css.counters import CounterStyle
-												Don't use a global target collector

											
										
										
											2018-03-28 01:34:34 +03:00
+								from .css.targets import TargetCollector
-												Use isort in tests

											
										
										
											2017-03-25 02:33:36 +03:00
+								from .draw import draw_page, stacked
-												Add the font configuration in LayoutContext

The font configuration is available (almost) everywhere it's needed,
@font-face doesn't rely on a global state anymore.

											
										
										
											2016-10-27 18:36:24 +03:00
+								from .fonts import FontConfiguration
-												Add metadata in the low-level API.

											
										
										
											2012-10-04 13:35:25 +04:00
+								from .formatting_structure import boxes
-												The Document is back! (But different.)

											
										
										
											2012-10-02 20:59:02 +04:00
+								from .formatting_structure.build import build_formatting_structure
-												Clean document.py

											
										
										
											2020-05-30 02:11:30 +03:00
+								from .html import W3C_DATE_RE, get_html_metadata
-												Fix circular imports

											
										
										
											2018-01-07 03:46:39 +03:00
+								from .images import get_image_from_uri as original_get_image_from_uri
-												The Document is back! (But different.)

											
										
										
											2012-10-02 20:59:02 +04:00
+								from .layout import layout_document
-												Move percentage function into percentages module

											
										
										
											2019-06-02 19:06:25 +03:00
+								from .layout.percentages import percentage
-												Use a separate logger for generation progress

											
										
										
											2019-01-04 01:02:44 +03:00
+								from .logger import LOGGER, PROGRESS_LOGGER
-												Get FontBBox and CapHeight

											
										
										
											2020-05-08 01:11:19 +03:00
+								from .text import ffi, pango
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								from .urls import URLFetchingError
 								def _w3c_date_to_pdf(string, attr_name):
 								    """Tranform W3C date to PDF format."""
 								    if string is None:
 								        return None
 								    match = W3C_DATE_RE.match(string)
 								    if match is None:
-												Clean formatted strings

											
										
										
											2020-05-30 16:48:24 +03:00
+								        LOGGER.warning(f'Invalid {attr_name} date: {string!r}')
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								        return None
 								    groups = match.groupdict()
 								    pdf_date = ''
-												Don’t remove seconds when timezone is set

											
										
										
											2020-05-16 16:23:20 +03:00
+								    found = groups['hour']
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								    for key in ('second', 'minute', 'hour', 'day', 'month', 'year'):
 								        if groups[key]:
 								            found = True
 								            pdf_date = groups[key] + pdf_date
 								        elif found:
-												Clean document.py

											
										
										
											2020-05-30 02:11:30 +03:00
+								            pdf_date = f'{(key in ("day", "month")):02d}{pdf_date}'
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								    if groups['hour']:
 								        assert groups['minute']
 								        if groups['tz_hour']:
 								            assert groups['tz_hour'].startswith(('+', '-'))
 								            assert groups['tz_minute']
-												Clean document.py

											
										
										
											2020-05-30 02:11:30 +03:00
+								            tz_hour = int(groups['tz_hour'])
 								            tz_minute = int(groups['tz_minute'])
 								            pdf_date += f"{tz_hour:+03d}'{tz_minute:02d}"
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								        else:
 								            pdf_date += 'Z'
 								    return pdf_date
-												The Document is back! (But different.)

											
										
										
											2012-10-02 20:59:02 +04:00
-												First steps to replace cairo with pydyf

											
										
										
											2020-04-18 01:19:35 +03:00
-												Remove useless parenthesis

											
										
										
											2020-05-08 01:31:50 +03:00
+								class Font:
-												Improve text rendering speed

											
										
										
											2020-06-01 02:12:32 +03:00
+								    def __init__(self, file_content, pango_font):
-												Move getting font informations from text to document

											
										
										
											2020-05-07 20:33:54 +03:00
+								        pango_metrics = pango.pango_font_get_metrics(pango_font, ffi.NULL)
-												Simplify kerning and font size management

											
										
										
											2020-05-09 23:23:33 +03:00
+								        font_description = pango.pango_font_describe(pango_font)
-												Font name, and save glyphs

											
										
										
											2020-05-08 00:27:43 +03:00
+								        font_family = ffi.string(pango.pango_font_description_get_family(
-												Fix font bounding box

											
										
										
											2020-05-08 02:30:07 +03:00
+								            font_description))
-												Fix insconsistent kerning and font metrics values

											
										
										
											2020-05-10 18:51:37 +03:00
+								        font_size = pango.pango_font_description_get_size(font_description)
-												Improve text rendering speed

											
										
										
											2020-06-01 02:12:32 +03:00
+								        sha = hashlib.sha256()
 								        sha.update(file_content)
-												Move getting font informations from text to document

											
										
										
											2020-05-07 20:33:54 +03:00
-												Change some font attributes names

											
										
										
											2020-05-08 02:50:41 +03:00
+								        self.file_content = file_content
-												Improve text rendering speed

											
										
										
											2020-06-01 02:12:32 +03:00
+								        self.hash = ''.join(
 								            chr(65 + letter % 26) for letter in sha.digest()[:6])
-												Handle font subsets

											
										
										
											2020-05-31 02:20:38 +03:00
+								        self.name = (
 								            b'/' + self.hash.encode('ascii') + b'+' +
 								            font_family.replace(b' ', b''))
-												Change some font attributes names

											
										
										
											2020-05-08 02:50:41 +03:00
+								        self.family = font_family
-												Set flags always to 4, symbolic

											
										
										
											2020-05-08 01:19:07 +03:00
+								        self.flags = 4
-												Move getting font informations from text to document

											
										
										
											2020-05-07 20:33:54 +03:00
+								        self.italic_angle = 0
-												Fix insconsistent kerning and font metrics values

											
										
										
											2020-05-10 18:51:37 +03:00
+								        self.ascent = int(
 								            pango.pango_font_metrics_get_ascent(pango_metrics) /
 								            font_size * 1000)
 								        self.descent = -int(
 								            pango.pango_font_metrics_get_descent(pango_metrics) /
 								            font_size * 1000)
-												Move getting font informations from text to document

											
										
										
											2020-05-07 20:33:54 +03:00
+								        self.stemv = 80
 								        self.stemh = 80
-												Simplify kerning and font size management

											
										
										
											2020-05-09 23:23:33 +03:00
+								        self.bbox = [0, 0, 0, 0]
 								        self.widths = {}
-												Cmaps into font

											
										
										
											2020-05-12 19:38:12 +03:00
+								        self.cmap = {}
-												Move getting font informations from text to document

											
										
										
											2020-05-07 20:33:54 +03:00
-												First steps to replace cairo with pydyf

											
										
										
											2020-04-18 01:19:35 +03:00
+								class Context(pydyf.Stream):
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								    """PDF stream object with context storing alpha states."""
-												Handle very simple gradients

											
										
										
											2020-06-07 01:32:47 +03:00
+								    def __init__(self, document, page_rectangle, alpha_states, x_objects,
 								                 patterns, shadings, *args, **kwargs):
-												First steps to replace cairo with pydyf

											
										
										
											2020-04-18 01:19:35 +03:00
+								        super().__init__(*args, **kwargs)
-												Compress streams

											
										
										
											2020-06-01 12:48:17 +03:00
+								        self.compress = True
-												Handle very simple gradients

											
										
										
											2020-06-07 01:32:47 +03:00
+								        self.page_rectangle = page_rectangle
-												Handle font subsets

											
										
										
											2020-05-31 02:20:38 +03:00
+								        self._document = document
-												First steps to replace cairo with pydyf

											
										
										
											2020-04-18 01:19:35 +03:00
+								        self._alpha_states = alpha_states
-												Handle opacity

											
										
										
											2020-05-17 15:46:41 +03:00
+								        self._x_objects = x_objects
-												Handle very simple gradients

											
										
										
											2020-06-07 01:32:47 +03:00
+								        self._patterns = patterns
 								        self._shadings = shadings
-												Improve text rendering speed

											
										
										
											2020-06-01 02:12:32 +03:00
+								        self._current_color = self._current_color_stroke = None
 								        self._current_alpha = self._current_alpha_stroke = None
 								        self._current_font = self._current_font_size = None
 								        self._old_font = self._old_font_size = None
-												Improve memory allocation and fix or document memory leaks

											
										
										
											2020-06-02 19:17:33 +03:00
+								        # These objects are used in text.show_first_line
-												Don’t release memory allocated by ffi.new

These objects are owned by the Python object and are automatically released
when garbage-collected.

											
										
										
											2020-06-04 02:27:38 +03:00
+								        self.length = ffi.new('unsigned int *')
 								        self.ink_rect = ffi.new('PangoRectangle *')
 								        self.logical_rect = ffi.new('PangoRectangle *')
-												Improve memory allocation and fix or document memory leaks

											
										
										
											2020-06-02 19:17:33 +03:00
-												Improve text rendering speed

											
										
										
											2020-06-01 02:12:32 +03:00
+								    def pop_state(self):
 								        super().pop_state()
 								        self._current_color = self._current_color_stroke = None
 								        self._current_alpha = self._current_alpha_stroke = None
 								        self._current_font = None
 								    def begin_text(self):
 								        if self.stream[-1] == b'ET':
 								            self._current_font = self._old_font
 								            self.stream.pop()
 								        else:
 								            super().begin_text()
-												Handle font subsets

											
										
										
											2020-05-31 02:20:38 +03:00
-												Improve text rendering speed

											
										
										
											2020-06-01 02:12:32 +03:00
+								    def end_text(self):
 								        self._old_font, self._current_font = self._current_font, None
 								        super().end_text()
 								    def set_color_rgb(self, r, g, b, stroke=False):
 								        if stroke:
 								            if (r, g, b) == self._current_color_stroke:
 								                return
 								            else:
 								                self._current_color_stroke = (r, g, b)
 								        else:
 								            if (r, g, b) == self._current_color:
 								                return
 								            else:
 								                self._current_color = (r, g, b)
 								        super().set_color_rgb(r, g, b, stroke)
 								    def set_font_size(self, font, size):
 								        if (font, size) == self._current_font:
 								            return
 								        self._current_font = (font, size)
 								        super().set_font_size(font, size)
-												First steps to replace cairo with pydyf

											
										
										
											2020-04-18 01:19:35 +03:00
 								    def set_alpha(self, alpha, stroke=False):
-												Improve text rendering speed

											
										
										
											2020-06-01 02:12:32 +03:00
+								        if stroke:
 								            if alpha == self._current_alpha_stroke:
 								                return
 								            else:
 								                self._current_alpha_stroke = alpha
 								        else:
 								            if alpha == self._current_alpha:
 								                return
 								            else:
 								                self._current_alpha = alpha
-												First steps to replace cairo with pydyf

											
										
										
											2020-04-18 01:19:35 +03:00
+								        if alpha not in self._alpha_states:
-												Handle opacity

											
										
										
											2020-05-17 15:46:41 +03:00
+								            self._alpha_states[alpha] = pydyf.Dictionary()
 								            if stroke in (None, False):
 								                self._alpha_states[alpha]['ca'] = alpha
 								            if stroke in (None, True):
 								                self._alpha_states[alpha]['CA'] = alpha
-												First steps to replace cairo with pydyf

											
										
										
											2020-04-18 01:19:35 +03:00
+								        self.set_state(alpha)
-												Warn users with cairo < 1.15.4

Related to #339, #565, #616.

											
										
										
											2018-04-13 11:44:19 +03:00
-												Improve text rendering speed

											
										
										
											2020-06-01 02:12:32 +03:00
+								    def add_font(self, font_hash, font_content, pango_font):
 								        self._document.fonts[font_hash] = Font(font_content, pango_font)
-												Handle font subsets

											
										
										
											2020-05-31 02:20:38 +03:00
+								        return self._document.fonts[font_hash]
-												Include font streams

											
										
										
											2020-05-06 08:42:45 +03:00
-												Clean document.py

											
										
										
											2020-05-30 02:11:30 +03:00
+								    def get_fonts(self):
-												Handle font subsets

											
										
										
											2020-05-31 02:20:38 +03:00
+								        return self._document.fonts
-												Clean document.py

											
										
										
											2020-05-30 02:11:30 +03:00
-												Handle very simple gradients

											
										
										
											2020-06-07 01:32:47 +03:00
+								    def sub_context(self, *args, **kwargs):
 								        return Context(
-												Fix document parameter in contexts

											
										
										
											2020-06-07 16:38:16 +03:00
+								            self._document, self.page_rectangle, self._alpha_states,
 								            self._x_objects, self._patterns, self._shadings, *args, **kwargs)
-												Handle very simple gradients

											
										
										
											2020-06-07 01:32:47 +03:00
-												Handle opacity

											
										
										
											2020-05-17 15:46:41 +03:00
+								    def push_group(self, bounding_box):
-												Fix some gradients tests

											
										
										
											2020-06-07 21:54:40 +03:00
+								        alpha_states = pydyf.Dictionary()
-												Use dedicated resources for x-objects and patterns

											
										
										
											2020-06-07 16:18:00 +03:00
+								        x_objects = pydyf.Dictionary()
-												Fix some gradients tests

											
										
										
											2020-06-07 21:54:40 +03:00
+								        patterns = pydyf.Dictionary()
 								        shadings = pydyf.Dictionary()
-												Use dedicated resources for x-objects and patterns

											
										
										
											2020-06-07 16:18:00 +03:00
+								        resources = pydyf.Dictionary({
-												Fix some gradients tests

											
										
										
											2020-06-07 21:54:40 +03:00
+								            'ExtGState': alpha_states,
-												Use dedicated resources for x-objects and patterns

											
										
										
											2020-06-07 16:18:00 +03:00
+								            'XObject': x_objects,
-												Fix some gradients tests

											
										
										
											2020-06-07 21:54:40 +03:00
+								            'Pattern': patterns,
 								            'Shading': shadings,
-												Handle opacity

											
										
										
											2020-05-17 15:46:41 +03:00
+								        })
-												Use dedicated resources for x-objects and patterns

											
										
										
											2020-06-07 16:18:00 +03:00
+								        extra = pydyf.Dictionary({
 								            'Type': '/XObject',
 								            'Subtype': '/Form',
 								            'BBox': pydyf.Array(bounding_box),
 								            'Resources': resources,
 								            'Group': pydyf.Dictionary({
 								                'Type': '/Group',
 								                'S': '/Transparency',
 								                'I': 'true',
 								                'CS': '/DeviceRGB',
 								            }),
 								        })
 								        group = Context(
-												Fix some gradients tests

											
										
										
											2020-06-07 21:54:40 +03:00
+								            self._document, self.page_rectangle, alpha_states, x_objects,
 								            patterns, shadings, extra=extra)
-												Use dedicated resources for x-objects and patterns

											
										
										
											2020-06-07 16:18:00 +03:00
+								        group.id = f'x{len(self._x_objects)}'
-												Fix document parameter in contexts

											
										
										
											2020-06-07 16:38:16 +03:00
+								        group._parent = self
-												Use dedicated resources for x-objects and patterns

											
										
										
											2020-06-07 16:18:00 +03:00
+								        self._x_objects[group.id] = group
-												Handle opacity

											
										
										
											2020-05-17 15:46:41 +03:00
+								        return group
 								    def pop_group(self):
-												Fix document parameter in contexts

											
										
										
											2020-06-07 16:38:16 +03:00
+								        return self._parent
-												Handle opacity

											
										
										
											2020-05-17 15:46:41 +03:00
-												Merge remote-tracking branch 'origin/master' into pydyf

											
										
										
											2020-07-31 15:46:36 +03:00
+								    def add_image(self, pillow_image, image_rendering, optimize_image):
-												Filter depending image format

											
										
										
											2020-06-07 17:12:47 +03:00
+								        image_format = pillow_image.format
-												Different colorspace depending of image format

											
										
										
											2020-06-03 20:46:58 +03:00
+								        image_mode = pillow_image.mode
 								        if image_mode in ('RGB', 'RGBA', 'P'):
 								            color_space = '/DeviceRGB'
 								        elif image_mode in ('1', 'L'):
 								            color_space = '/DeviceGray'
 								        elif image_mode == 'CMYK':
 								            color_space = '/DeviceCMYK'
-												Handle trasparent images

											
										
										
											2020-08-02 16:07:32 +03:00
+								        if image_mode == ('1', 'P'):
-												Different colorspace depending of image format

											
										
										
											2020-06-03 20:46:58 +03:00
+								            pillow_image = pillow_image.convert('RGB')
-												Handle trasparent images

											
										
										
											2020-08-02 16:07:32 +03:00
+								        interpolate = 'true' if image_rendering == 'auto' else 'false'
-												Handle jpg images

											
										
										
											2020-06-03 18:58:53 +03:00
+								        extra = pydyf.Dictionary({
 								            'Type': '/XObject',
 								            'Subtype': '/Image',
 								            'Width': pillow_image.width,
 								            'Height': pillow_image.height,
-												Different colorspace depending of image format

											
										
										
											2020-06-03 20:46:58 +03:00
+								            'ColorSpace': color_space,
-												Small fixes for images

											
										
										
											2020-06-07 22:17:29 +03:00
+								            'BitsPerComponent': 8,
-												Handle trasparent images

											
										
										
											2020-08-02 16:07:32 +03:00
+								            'Interpolate': interpolate,
-												Handle jpg images

											
										
										
											2020-06-03 18:58:53 +03:00
+								        })
 								        image_file = io.BytesIO()
-												Handle images less than 32×32

											
										
										
											2020-06-07 18:05:12 +03:00
+								        if image_format == 'JPEG':
 								            extra['Filter'] = '/DCTDecode'
-												Handle trasparent images

											
										
										
											2020-08-02 16:07:32 +03:00
+								            pillow_image.save(
 								                image_file, format='JPEG', optimize=optimize_image)
-												Handle images less than 32×32

											
										
										
											2020-06-07 18:05:12 +03:00
+								        else:
-												Handle trasparent images

											
										
										
											2020-08-02 16:07:32 +03:00
+								            extra['Filter'] = '/JPXDecode'
 								            if image_mode == 'RGBA':
 								                alpha = pillow_image.getchannel('A')
 								                pillow_image = pillow_image.convert('RGB')
 								                alpha_file = io.BytesIO()
 								                alpha.save(
 								                    alpha_file, format='JPEG2000', optimize=optimize_image,
 								                    num_resolutions=1)
 								                extra['SMask'] = pydyf.Stream([alpha_file.getvalue()], extra={
 								                    'Filter': '/JPXDecode',
 								                    'Type': '/XObject',
 								                    'Subtype': '/Image',
 								                    'Width': pillow_image.width,
 								                    'Height': pillow_image.height,
 								                    'ColorSpace': '/DeviceGray',
 								                    'BitsPerComponent': 8,
 								                    'Interpolate': interpolate,
 								                })
 								            # Set number of resolutions to 1 because of
 								            # https://github.com/uclouvain/openjpeg/issues/215
 								            pillow_image.save(
 								                image_file, format='JPEG2000', optimize=optimize_image,
 								                num_resolutions=1)
 								        stream = [image_file.getvalue()]
-												Handle images less than 32×32

											
										
										
											2020-06-07 18:05:12 +03:00
 								        xobject = pydyf.Stream(stream, extra=extra)
-												Use unique image name

											
										
										
											2020-06-03 19:01:25 +03:00
+								        image_name = f'Im{len(self._x_objects)}'
 								        self._x_objects[image_name] = xobject
 								        return image_name
-												Handle jpg images

											
										
										
											2020-06-03 18:58:53 +03:00
-												Handle gradients positions and repeats

											
										
										
											2020-06-07 12:20:17 +03:00
+								    def add_pattern(self, x, y, width, height, repeat_width, repeat_height):
-												Use dedicated resources for x-objects and patterns

											
										
										
											2020-06-07 16:18:00 +03:00
+								        alpha_states = pydyf.Dictionary()
 								        x_objects = pydyf.Dictionary()
 								        patterns = pydyf.Dictionary()
 								        shadings = pydyf.Dictionary()
 								        resources = pydyf.Dictionary({
 								            'ExtGState': alpha_states,
 								            'XObject': x_objects,
 								            'Pattern': patterns,
 								            'Shading': shadings,
 								        })
-												Handle very simple gradients

											
										
										
											2020-06-07 01:32:47 +03:00
+								        matrix = (1, 0, 0, -1, x, self.page_rectangle[3] - y)
 								        extra = pydyf.Dictionary({
 								            'PatternType': 1,
 								            'BBox': pydyf.Array([0, 0, width, height]),
-												Handle gradients positions and repeats

											
										
										
											2020-06-07 12:20:17 +03:00
+								            'XStep': repeat_width,
 								            'YStep': repeat_height,
-												Handle very simple gradients

											
										
										
											2020-06-07 01:32:47 +03:00
+								            'TilingType': 1,
 								            'PaintType': 1,
 								            'Matrix': pydyf.Array(0.75 * i for i in matrix),
-												Use dedicated resources for x-objects and patterns

											
										
										
											2020-06-07 16:18:00 +03:00
+								            'Resources': resources,
-												Handle very simple gradients

											
										
										
											2020-06-07 01:32:47 +03:00
+								        })
-												Use dedicated resources for x-objects and patterns

											
										
										
											2020-06-07 16:18:00 +03:00
+								        pattern = Context(
-												Fix document parameter in contexts

											
										
										
											2020-06-07 16:38:16 +03:00
+								            self._document, self.page_rectangle, alpha_states, x_objects,
 								            patterns, shadings, extra=extra)
-												Handle very simple gradients

											
										
										
											2020-06-07 01:32:47 +03:00
+								        pattern.id = f'p{len(self._patterns)}'
 								        self._patterns[pattern.id] = pattern
 								        return pattern
 								    def add_shading(self):
 								        shading = pydyf.Dictionary()
 								        shading.id = f's{len(self._shadings)}'
 								        self._shadings[shading.id] = shading
 								        return shading
-												The Document is back! (But different.)

											
										
										
											2012-10-02 20:59:02 +04:00
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								BookmarkSubtree = collections.namedtuple(
 								    'BookmarkSubtree', ('label', 'destination', 'children', 'state'))
-												Handle transforms and bookmarks

											
										
										
											2020-04-18 23:12:25 +03:00
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								def _write_pdf_attachment(pdf, attachment, url_fetcher):
 								    """Write an attachment to the PDF stream.
 								    :return:
-												Add inline attachments

											
										
										
											2020-04-21 23:30:38 +03:00
+								        the attachment PDF dictionary.
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
 								    """
-												Add inline attachments

											
										
										
											2020-04-21 23:30:38 +03:00
+								    # Attachments from document links like <link> or <a> can only be URLs.
 								    # They're passed in as tuples
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								    url = ''
-												Add inline attachments

											
										
										
											2020-04-21 23:30:38 +03:00
+								    if isinstance(attachment, tuple):
 								        url, description = attachment
 								        attachment = Attachment(
 								            url=url, url_fetcher=url_fetcher, description=description)
 								    elif not isinstance(attachment, Attachment):
 								        attachment = Attachment(guess=attachment, url_fetcher=url_fetcher)
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
-												Add inline attachments

											
										
										
											2020-04-21 23:30:38 +03:00
+								    try:
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								        with attachment.source as (source_type, source, url, _):
 								            if isinstance(source, bytes):
 								                source = io.BytesIO(source)
-												Add inline attachments

											
										
										
											2020-04-21 23:30:38 +03:00
+								            uncompressed_length = 0
 								            stream = b''
 								            md5 = hashlib.md5()
 								            compress = zlib.compressobj()
 								            for data in iter(lambda: source.read(4096), b''):
 								                uncompressed_length += len(data)
 								                md5.update(data)
 								                compressed = compress.compress(data)
 								                stream += compressed
 								            compressed = compress.flush(zlib.Z_FINISH)
 								            stream += compressed
 								            file_extra = pydyf.Dictionary({
 								                'Type': '/EmbeddedFile',
 								                'Filter': '/FlateDecode',
 								                'Params': pydyf.Dictionary({
 								                    'CheckSum': f'<{md5.hexdigest()}>',
 								                    'Size': uncompressed_length,
 								                })
 								            })
 								            file_stream = pydyf.Stream([stream], file_extra)
 								            pdf.add_object(file_stream)
 								    except URLFetchingError as exception:
 								        LOGGER.error('Failed to load attachment: %s', exception)
 								        return
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
 								    # TODO: Use the result object from a URL fetch operation to provide more
 								    # details on the possible filename.
-												Fix attachments name discovery

											
										
										
											2020-05-16 18:05:11 +03:00
+								    if url and urlsplit(url).path:
 								        filename = basename(unquote(urlsplit(url).path))
 								    else:
 								        filename = 'attachment.bin'
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
-												Add inline attachments

											
										
										
											2020-04-21 23:30:38 +03:00
+								    attachment = pydyf.Dictionary({
 								        'Type': '/Filespec',
 								        'F': pydyf.String(),
 								        'UF': pydyf.String(filename),
 								        'EF': pydyf.Dictionary({'F': file_stream.reference}),
 								        'Desc': pydyf.String(attachment.description or ''),
 								    })
 								    pdf.add_object(attachment)
 								    return attachment
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
 								def create_bookmarks(bookmarks, pdf, parent=None):
-												Handle transforms and bookmarks

											
										
										
											2020-04-18 23:12:25 +03:00
+								    count = len(bookmarks)
 								    outlines = []
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								    for title, (page, x, y), children, state in bookmarks:
-												Handle transforms and bookmarks

											
										
										
											2020-04-18 23:12:25 +03:00
+								        destination = pydyf.Array((
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								            pdf.objects[pdf.pages['Kids'][page * 3]].reference,
-												Handle transforms and bookmarks

											
										
										
											2020-04-18 23:12:25 +03:00
+								            '/XYZ', x, y, 0))
 								        outline = pydyf.Dictionary({
 								            'Title': pydyf.String(title), 'Dest': destination})
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								        pdf.add_object(outline)
-												Handle transforms and bookmarks

											
										
										
											2020-04-18 23:12:25 +03:00
+								        children_outlines, children_count = create_bookmarks(
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								            children, pdf, parent=outline)
-												Handle bookmark state

											
										
										
											2020-04-18 23:41:23 +03:00
+								        outline['Count'] = children_count
 								        if state == 'closed':
 								            outline['Count'] *= -1
 								        else:
 								            count += children_count
-												Handle transforms and bookmarks

											
										
										
											2020-04-18 23:12:25 +03:00
+								        if outlines:
 								            outline['Prev'] = outlines[-1].reference
 								            outlines[-1]['Next'] = outline.reference
 								        if children_outlines:
 								            outline['First'] = children_outlines[0].reference
 								            outline['Last'] = children_outlines[-1].reference
 								        if parent is not None:
 								            outline['Parent'] = parent.reference
 								        outlines.append(outline)
 								    return outlines, count
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								def add_hyperlinks(links, anchors, matrix, pdf, page, names):
 								    """Include hyperlinks in current PDF page."""
 								    for link in links:
 								        link_type, link_target, rectangle = link
 								        x1, y1 = matrix.transform_point(*rectangle[:2])
 								        x2, y2 = matrix.transform_point(*rectangle[2:])
 								        if link_type in ('internal', 'external'):
 								            annot = pydyf.Dictionary({
 								                'Type': '/Annot',
 								                'Subtype': '/Link',
 								                'Rect': pydyf.Array([x1, y1, x2, y2]),
 								                'BS': pydyf.Dictionary({'W': 0}),
 								            })
 								            if link_type == 'internal':
 								                annot['Dest'] = pydyf.String(link_target)
-												Pre-compute transformation matricies.

… so that they are available when getting meta-data.

											
										
										
											2012-10-06 13:26:55 +04:00
+								            else:
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								                annot['A'] = pydyf.Dictionary({
 								                    'Type': '/Action',
 								                    'S': '/URI',
 								                    'URI': pydyf.String(link_target),
 								                })
 								            pdf.add_object(annot)
-												Add various dictionaries only when needed

											
										
										
											2020-05-16 18:05:48 +03:00
+								            if 'Annots' not in page:
 								                page['Annots'] = pydyf.Array()
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								            page['Annots'].append(annot.reference)
 								    for anchor in anchors:
 								        anchor_name, x, y = anchor
 								        x, y = matrix.transform_point(x, y)
 								        names.append(pydyf.String(anchor_name))
 								        names.append(pydyf.Array([page.reference, '/XYZ', x, y, 0]))
-												Pre-compute transformation matricies.

… so that they are available when getting meta-data.

											
										
										
											2012-10-06 13:26:55 +04:00
-												Have metadata account for CSS transforms.

											
										
										
											2012-10-07 00:09:17 +04:00
+								def rectangle_aabb(matrix, pos_x, pos_y, width, height):
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								    """Apply a transformation matrix to an axis-aligned rectangle.
-												Clean document.py

											
										
										
											2020-05-30 02:11:30 +03:00
+								    Return its axis-aligned bounding box as ``(x1, y1, x2, y2)``.
-												Have metadata account for CSS transforms.

											
										
										
											2012-10-07 00:09:17 +04:00
 								    """
 								    transform_point = matrix.transform_point
 								    x1, y1 = transform_point(pos_x, pos_y)
 								    x2, y2 = transform_point(pos_x + width, pos_y)
 								    x3, y3 = transform_point(pos_x, pos_y + height)
 								    x4, y4 = transform_point(pos_x + width, pos_y + height)
 								    box_x1 = min(x1, x2, x3, x4)
 								    box_y1 = min(y1, y2, y3, y4)
 								    box_x2 = max(x1, x2, x3, x4)
 								    box_y2 = max(y1, y2, y3, y4)
-												Clean document.py

											
										
										
											2020-05-30 02:11:30 +03:00
+								    return box_x1, box_y1, box_x2, box_y2
-												Have metadata account for CSS transforms.

											
										
										
											2012-10-07 00:09:17 +04:00
-												Remove resolve_links from Document class

											
										
										
											2020-04-19 19:26:49 +03:00
+								def resolve_links(pages):
 								    """Resolve internal hyperlinks.
 								    Links to a missing anchor are removed with a warning.
 								    If multiple anchors have the same name, the first one is used.
 								    :returns:
 								        A generator yielding lists (one per page) like :attr:`Page.links`,
 								        except that ``target`` for internal hyperlinks is
 								        ``(page_number, x, y)`` instead of an anchor name.
 								        The page number is a 0-based index into the :attr:`pages` list,
 								        and ``x, y`` are in CSS pixels from the top-left of the page.
 								    """
 								    anchors = set()
 								    paged_anchors = []
 								    for i, page in enumerate(pages):
 								        paged_anchors.append([])
 								        for anchor_name, (point_x, point_y) in page.anchors.items():
 								            if anchor_name not in anchors:
 								                paged_anchors[-1].append((anchor_name, point_x, point_y))
 								                anchors.add(anchor_name)
 								    for page in pages:
 								        page_links = []
 								        for link in page.links:
 								            link_type, anchor_name, rectangle = link
 								            if link_type == 'internal':
 								                if anchor_name not in anchors:
 								                    LOGGER.error(
 								                        'No anchor #%s for internal URI reference',
 								                        anchor_name)
 								                else:
 								                    page_links.append((link_type, anchor_name, rectangle))
 								            else:
 								                # External link
 								                page_links.append(link)
 								        yield page_links, paged_anchors.pop(0)
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								class Matrix(list):
 								    def __init__(self, a=1, b=0, c=0, d=1, e=0, f=0, matrix=None):
 								        if matrix is None:
 								            matrix = [[a, b, 0], [c, d, 0], [e, f, 1]]
 								        super().__init__(matrix)
-												Add metadata in the low-level API.

											
										
										
											2012-10-04 13:35:25 +04:00
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								    def __matmul__(self, other):
 								        assert len(self[0]) == len(other) == len(other[0]) == 3
 								        return Matrix(matrix=[
 								            [sum(self[i][k] * other[k][j] for k in range(3)) for j in range(3)]
-												Remove useless variable

											
										
										
											2020-05-08 03:13:44 +03:00
+								            for i in range(len(self))])
-												Add metadata in the low-level API.

											
										
										
											2012-10-04 13:35:25 +04:00
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								    @property
 								    def determinant(self):
 								        assert len(self) == len(self[0]) == 3
 								        return (
 								            self[0][0] * (self[1][1] * self[2][2] - self[1][2] * self[2][1]) -
 								            self[1][0] * (self[0][1] * self[2][2] - self[0][2] * self[2][1]) +
 								            self[2][0] * (self[0][1] * self[1][2] - self[0][2] * self[1][1]))
-												Add metadata in the low-level API.

											
										
										
											2012-10-04 13:35:25 +04:00
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								    def transform_point(self, x, y):
 								        return (Matrix(matrix=[[x, y, 1]]) @ self)[0][:2]
-												Don't use pdfrw anymore

pdfrw is a great piece of software, but we don't know PDF enough to debug the
problems we've met. It's safer to use the new cairo API and get back to manual
edition for attachments and bleed boxes.

We only have two regressions for now:
- some internal links are broken,
- PDF producer is not overwritten.

A mail has been sent to cairo's mailing-list about that:
https://lists.cairographics.org/archives/cairo/2018-August/028694.html

Fix #639, #615, fix #596, fix #565.

											
										
										
											2018-08-06 18:38:02 +03:00
-												Remove useless explicit object inheritance

											
										
										
											2020-01-02 14:06:58 +03:00
+								class Page:
-												Add metadata in the low-level API.

											
										
										
											2012-10-04 13:35:25 +04:00
+								    """Represents a single rendered page.
-												Docs docs docs.

But no ducks.

											
										
										
											2012-10-08 21:51:18 +04:00
+								    .. versionadded:: 0.15
-												Add metadata in the low-level API.

											
										
										
											2012-10-04 13:35:25 +04:00
+								    Should be obtained from :attr:`Document.pages` but not
 								    instantiated directly.
 								    """
-												First steps to replace cairo with pydyf

											
										
										
											2020-04-18 01:19:35 +03:00
+								    def __init__(self, page_box):
-												Docs docs docs.

But no ducks.

											
										
										
											2012-10-08 21:51:18 +04:00
+								        #: The page width, including margins, in CSS pixels.
-												Move resolution to *.write_png only.

											
										
										
											2012-10-05 22:12:05 +04:00
+								        self.width = page_box.margin_width()
-												Add metadata in the low-level API.

											
										
										
											2012-10-04 13:35:25 +04:00
-												Docs docs docs.

But no ducks.

											
										
										
											2012-10-08 21:51:18 +04:00
+								        #: The page height, including margins, in CSS pixels.
-												Move resolution to *.write_png only.

											
										
										
											2012-10-05 22:12:05 +04:00
+								        self.height = page_box.margin_height()
-												Add metadata in the low-level API.

											
										
										
											2012-10-04 13:35:25 +04:00
-												Clean a lot of things in API documentation

											
										
										
											2019-02-22 13:34:46 +03:00
+								        #: The page bleed widths as a :obj:`dict` with ``'top'``, ``'right'``,
 								        #: ``'bottom'`` and ``'left'`` as keys, and values in CSS pixels.
-												Cleanups

											
										
										
											2017-10-05 09:45:50 +03:00
+								        self.bleed = {
-												Clean document.py

											
										
										
											2020-05-30 02:11:30 +03:00
+								            side: page_box.style[f'bleed_{side}'].value
-												Cleanups

											
										
										
											2017-10-05 09:45:50 +03:00
+								            for side in ('top', 'right', 'bottom', 'left')}
-												Add support of marks and bleed pages properties

Fix #471.

											
										
										
											2017-09-05 16:44:50 +03:00
-												Clean a lot of things in API documentation

											
										
										
											2019-02-22 13:34:46 +03:00
+								        #: The :obj:`list` of ``(bookmark_level, bookmark_label, target)``
 								        #: :obj:`tuples <tuple>`. ``bookmark_level`` and ``bookmark_label``
 								        #: are respectively an :obj:`int` and a :obj:`string <str>`, based on
 								        #: the CSS properties of the same names. ``target`` is an ``(x, y)``
 								        #: point in CSS pixels from the top-left of the page.
 								        self.bookmarks = []
 								        #: The :obj:`list` of ``(link_type, target, rectangle)`` :obj:`tuples
 								        #: <tuple>`. A ``rectangle`` is ``(x, y, width, height)``, in CSS
 								        #: pixels from the top-left of the page. ``link_type`` is one of three
 								        #: strings:
-												Add metadata in the low-level API.

											
										
										
											2012-10-04 13:35:25 +04:00
+								        #:
-												Clean a lot of things in API documentation

											
										
										
											2019-02-22 13:34:46 +03:00
+								        #: * ``'external'``: ``target`` is an absolute URL
 								        #: * ``'internal'``: ``target`` is an anchor name (see
-												Add PDF metadata parsed from HTML. Fix #77.

<title> → /Title
<meta name=author> → /Author
<meta name=description> → /Subject
<meta name=keywords> → /Keywords
<meta name=generator> → /Creator
<meta name=dcterms.created> → /CreationDate
<meta name=dcterms.modified> → /ModDate
"WeasyPrint vX.Y" → /Producer

											
										
										
											2013-07-14 15:08:02 +04:00
+								        #:   :attr:`Page.anchors`).
-												Clean a lot of things in API documentation

											
										
										
											2019-02-22 13:34:46 +03:00
+								        #:   The anchor might be defined in another page,
 								        #:   in multiple pages (in which case the first occurence is used),
 								        #:   or not at all.
 								        #: * ``'attachment'``: ``target`` is an absolute URL and points
-												Added support for PDF file annotations.

											
										
										
											2014-04-04 20:46:00 +04:00
+								        #:   to a resource to attach to the document.
-												Clean a lot of things in API documentation

											
										
										
											2019-02-22 13:34:46 +03:00
+								        self.links = []
-												Add metadata in the low-level API.

											
										
										
											2012-10-04 13:35:25 +04:00
-												Clean a lot of things in API documentation

											
										
										
											2019-02-22 13:34:46 +03:00
+								        #: The :obj:`dict` mapping each anchor name to its target, an
 								        #: ``(x, y)`` point in CSS pixels from the top-left of the page.
 								        self.anchors = {}
-												Add metadata in the low-level API.

											
										
										
											2012-10-04 13:35:25 +04:00
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								        self._gather_links_and_bookmarks(page_box)
-												Move resolution to *.write_png only.

											
										
										
											2012-10-05 22:12:05 +04:00
+								        self._page_box = page_box
-												The Document is back! (But different.)

											
										
										
											2012-10-02 20:59:02 +04:00
-												Fix links matrices

											
										
										
											2020-08-01 16:12:56 +03:00
+								    def _gather_links_and_bookmarks(self, box, parent_matrix=None):
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								        # Get box transformation matrix.
 								        # "Transforms apply to block-level and atomic inline-level elements,
 								        #  but do not apply to elements which may be split into
 								        #  multiple inline-level boxes."
 								        # http://www.w3.org/TR/css3-2d-transforms/#introduction
 								        if box.style['transform'] and not isinstance(box, boxes.InlineBox):
 								            border_width = box.border_width()
 								            border_height = box.border_height()
 								            origin_x, origin_y = box.style['transform_origin']
 								            offset_x = percentage(origin_x, border_width)
 								            offset_y = percentage(origin_y, border_height)
 								            origin_x = box.border_box_x() + offset_x
 								            origin_y = box.border_box_y() + offset_y
 								            matrix = Matrix(e=origin_x, f=origin_y)
 								            for name, args in box.style['transform']:
 								                a, b, c, d, e, f = 1, 0, 0, 1, 0, 0
 								                if name == 'scale':
 								                    a, d = args
 								                elif name == 'rotate':
 								                    a = d = math.cos(args)
 								                    b = math.sin(args)
 								                    c = -b
 								                elif name == 'translate':
 								                    e = percentage(args[0], border_width)
 								                    f = percentage(args[1], border_height)
 								                elif name == 'skew':
 								                    b, c = math.tan(args[1]), math.tan(args[0])
 								                else:
 								                    assert name == 'matrix'
 								                    a, b, c, d, e, f = args
 								                matrix = Matrix(a, b, c, d, e, f) @ matrix
 								            box.transformation_matrix = (
 								                Matrix(e=-origin_x, f=-origin_y) @ matrix)
-												Fix links matrices

											
										
										
											2020-08-01 16:12:56 +03:00
+								            if parent_matrix:
 								                matrix = box.transformation_matrix @ parent_matrix
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								            else:
 								                matrix = box.transformation_matrix
-												Fix links matrices

											
										
										
											2020-08-01 16:12:56 +03:00
+								        else:
 								            matrix = parent_matrix
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
 								        bookmark_label = box.bookmark_label
 								        if box.style['bookmark_level'] == 'none':
 								            bookmark_level = None
 								        else:
 								            bookmark_level = box.style['bookmark_level']
 								        state = box.style['bookmark_state']
 								        link = box.style['link']
 								        anchor_name = box.style['anchor']
 								        has_bookmark = bookmark_label and bookmark_level
 								        # 'link' is inherited but redundant on text boxes
-												Don’t set links on lineboxes

											
										
										
											2020-05-17 16:54:02 +03:00
+								        has_link = link and not isinstance(box, (boxes.TextBox, boxes.LineBox))
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								        # In case of duplicate IDs, only the first is an anchor.
 								        has_anchor = anchor_name and anchor_name not in self.anchors
 								        is_attachment = hasattr(box, 'is_attachment') and box.is_attachment
 								        if has_bookmark or has_link or has_anchor:
 								            pos_x, pos_y, width, height = box.hit_area()
 								            if has_link:
 								                token_type, link = link
 								                assert token_type == 'url'
 								                link_type, target = link
 								                assert isinstance(target, str)
 								                if link_type == 'external' and is_attachment:
 								                    link_type = 'attachment'
 								                if matrix:
 								                    link = (link_type, target, rectangle_aabb(
-												Clean document.py

											
										
										
											2020-05-30 02:11:30 +03:00
+								                        matrix, pos_x, pos_y, width, height))
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								                else:
 								                    link = (link_type, target, (
 								                        pos_x, pos_y, pos_x + width, pos_y + height))
 								                self.links.append(link)
 								            if matrix and (has_bookmark or has_anchor):
 								                pos_x, pos_y = matrix.transform_point(pos_x, pos_y)
 								            if has_bookmark:
 								                self.bookmarks.append(
 								                    (bookmark_level, bookmark_label, (pos_x, pos_y), state))
 								            if has_anchor:
 								                self.anchors[anchor_name] = pos_x, pos_y
 								        for child in box.all_children():
-												Fix links matrices

											
										
										
											2020-08-01 16:12:56 +03:00
+								            self._gather_links_and_bookmarks(child, matrix)
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
-												First steps to replace cairo with pydyf

											
										
										
											2020-04-18 01:19:35 +03:00
+								    def paint(self, context, left_x=0, top_y=0, scale=1, clip=False):
-												Handle transforms and bookmarks

											
										
										
											2020-04-18 23:12:25 +03:00
+								        """Paint the page into the PDF file.
-												The Document is back! (But different.)

											
										
										
											2012-10-02 20:59:02 +04:00
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								        :type context: :class:`pdf.Context`
-												First steps to replace cairo with pydyf

											
										
										
											2020-04-18 01:19:35 +03:00
+								        :param context:
 								            A context object.
-												Clean a lot of things in API documentation

											
										
										
											2019-02-22 13:34:46 +03:00
+								        :type left_x: float
-												The Document is back! (But different.)

											
										
										
											2012-10-02 20:59:02 +04:00
+								        :param left_x:
-												Handle transforms and bookmarks

											
										
										
											2020-04-18 23:12:25 +03:00
+								            X coordinate of the left of the page, in PDF points.
-												Clean a lot of things in API documentation

											
										
										
											2019-02-22 13:34:46 +03:00
+								        :type top_y: float
-												The Document is back! (But different.)

											
										
										
											2012-10-02 20:59:02 +04:00
+								        :param top_y:
-												Handle transforms and bookmarks

											
										
										
											2020-04-18 23:12:25 +03:00
+								            Y coordinate of the top of the page, in PDF points.
-												Clean a lot of things in API documentation

											
										
										
											2019-02-22 13:34:46 +03:00
+								        :type scale: float
-												Move resolution to *.write_png only.

											
										
										
											2012-10-05 22:12:05 +04:00
+								        :param scale:
-												Handle transforms and bookmarks

											
										
										
											2020-04-18 23:12:25 +03:00
+								            Zoom scale.
-												Clean a lot of things in API documentation

											
										
										
											2019-02-22 13:34:46 +03:00
+								        :type clip: bool
-												The Document is back! (But different.)

											
										
										
											2012-10-02 20:59:02 +04:00
+								        :param clip:
 								            Whether to clip/cut content outside the page. If false or
 								            not provided, content can overflow.
 								        """
-												First steps to replace cairo with pydyf

											
										
										
											2020-04-18 01:19:35 +03:00
+								        with stacked(context):
 								            # Make (0, 0) the top-left corner, and make user units CSS pixels:
 								            context.transform(scale, 0, 0, scale, left_x, top_y)
-												The Document is back! (But different.)

											
										
										
											2012-10-02 20:59:02 +04:00
+								            if clip:
-												Move resolution to *.write_png only.

											
										
										
											2012-10-05 22:12:05 +04:00
+								                width = self.width
 								                height = self.height
-												First steps to replace cairo with pydyf

											
										
										
											2020-04-18 01:19:35 +03:00
+								                context.rectangle(0, 0, width, height)
 								                context.clip()
 								            draw_page(self._page_box, context)
-												The Document is back! (But different.)

											
										
										
											2012-10-02 20:59:02 +04:00
-												Remove useless explicit object inheritance

											
										
										
											2020-01-02 14:06:58 +03:00
+								class DocumentMetadata:
-												Clean a lot of things in API documentation

											
										
										
											2019-02-22 13:34:46 +03:00
+								    """Meta-information belonging to a whole :class:`Document`.
-												Add PDF metadata parsed from HTML. Fix #77.

<title> → /Title
<meta name=author> → /Author
<meta name=description> → /Subject
<meta name=keywords> → /Keywords
<meta name=generator> → /Creator
<meta name=dcterms.created> → /CreationDate
<meta name=dcterms.modified> → /ModDate
"WeasyPrint vX.Y" → /Producer

											
										
										
											2013-07-14 15:08:02 +04:00
-												Clean a lot of things in API documentation

											
										
										
											2019-02-22 13:34:46 +03:00
+								    .. versionadded:: 0.20
-												Add PDF metadata parsed from HTML. Fix #77.

<title> → /Title
<meta name=author> → /Author
<meta name=description> → /Subject
<meta name=keywords> → /Keywords
<meta name=generator> → /Creator
<meta name=dcterms.created> → /CreationDate
<meta name=dcterms.modified> → /ModDate
"WeasyPrint vX.Y" → /Producer

											
										
										
											2013-07-14 15:08:02 +04:00
-												Clean a lot of things in API documentation

											
										
										
											2019-02-22 13:34:46 +03:00
+								    New attributes may be added in future versions of WeasyPrint.
-												Add PDF metadata parsed from HTML. Fix #77.

<title> → /Title
<meta name=author> → /Author
<meta name=description> → /Subject
<meta name=keywords> → /Keywords
<meta name=generator> → /Creator
<meta name=dcterms.created> → /CreationDate
<meta name=dcterms.modified> → /ModDate
"WeasyPrint vX.Y" → /Producer

											
										
										
											2013-07-14 15:08:02 +04:00
 								    """
 								    def __init__(self, title=None, authors=None, description=None,
-												Added support for PDF attachments (v2)

											
										
										
											2014-04-04 14:32:21 +04:00
+								                 keywords=None, generator=None, created=None, modified=None,
 								                 attachments=None):
-												Add PDF metadata parsed from HTML. Fix #77.

<title> → /Title
<meta name=author> → /Author
<meta name=description> → /Subject
<meta name=keywords> → /Keywords
<meta name=generator> → /Creator
<meta name=dcterms.created> → /CreationDate
<meta name=dcterms.modified> → /ModDate
"WeasyPrint vX.Y" → /Producer

											
										
										
											2013-07-14 15:08:02 +04:00
+								        #: The title of the document, as a string or :obj:`None`.
 								        #: Extracted from the ``<title>`` element in HTML
 								        #: and written to the ``/Title`` info field in PDF.
 								        self.title = title
-												Clean a lot of things in API documentation

											
										
										
											2019-02-22 13:34:46 +03:00
+								        #: The authors of the document, as a list of strings.
 								        #: (Defaults to the empty list.)
-												Add PDF metadata parsed from HTML. Fix #77.

<title> → /Title
<meta name=author> → /Author
<meta name=description> → /Subject
<meta name=keywords> → /Keywords
<meta name=generator> → /Creator
<meta name=dcterms.created> → /CreationDate
<meta name=dcterms.modified> → /ModDate
"WeasyPrint vX.Y" → /Producer

											
										
										
											2013-07-14 15:08:02 +04:00
+								        #: Extracted from the ``<meta name=author>`` elements in HTML
 								        #: and written to the ``/Author`` info field in PDF.
 								        self.authors = authors or []
 								        #: The description of the document, as a string or :obj:`None`.
 								        #: Extracted from the ``<meta name=description>`` element in HTML
 								        #: and written to the ``/Subject`` info field in PDF.
 								        self.description = description
 								        #: Keywords associated with the document, as a list of strings.
 								        #: (Defaults to the empty list.)
 								        #: Extracted from ``<meta name=keywords>`` elements in HTML
 								        #: and written to the ``/Keywords`` info field in PDF.
 								        self.keywords = keywords or []
 								        #: The name of one of the software packages
 								        #: used to generate the document, as a string or :obj:`None`.
 								        #: Extracted from the ``<meta name=generator>`` element in HTML
 								        #: and written to the ``/Creator`` info field in PDF.
 								        self.generator = generator
 								        #: The creation date of the document, as a string or :obj:`None`.
 								        #: Dates are in one of the six formats specified in
-												Clean a lot of things in API documentation

											
										
										
											2019-02-22 13:34:46 +03:00
+								        #: `W3C’s profile of ISO 8601 <http://www.w3.org/TR/NOTE-datetime>`_.
-												Add PDF metadata parsed from HTML. Fix #77.

<title> → /Title
<meta name=author> → /Author
<meta name=description> → /Subject
<meta name=keywords> → /Keywords
<meta name=generator> → /Creator
<meta name=dcterms.created> → /CreationDate
<meta name=dcterms.modified> → /ModDate
"WeasyPrint vX.Y" → /Producer

											
										
										
											2013-07-14 15:08:02 +04:00
+								        #: Extracted from the ``<meta name=dcterms.created>`` element in HTML
 								        #: and written to the ``/CreationDate`` info field in PDF.
 								        self.created = created
 								        #: The modification date of the document, as a string or :obj:`None`.
 								        #: Dates are in one of the six formats specified in
-												Clean a lot of things in API documentation

											
										
										
											2019-02-22 13:34:46 +03:00
+								        #: `W3C’s profile of ISO 8601 <http://www.w3.org/TR/NOTE-datetime>`_.
-												Add PDF metadata parsed from HTML. Fix #77.

<title> → /Title
<meta name=author> → /Author
<meta name=description> → /Subject
<meta name=keywords> → /Keywords
<meta name=generator> → /Creator
<meta name=dcterms.created> → /CreationDate
<meta name=dcterms.modified> → /ModDate
"WeasyPrint vX.Y" → /Producer

											
										
										
											2013-07-14 15:08:02 +04:00
+								        #: Extracted from the ``<meta name=dcterms.modified>`` element in HTML
 								        #: and written to the ``/ModDate`` info field in PDF.
 								        self.modified = modified
-												Clean a lot of things in API documentation

											
										
										
											2019-02-22 13:34:46 +03:00
+								        #: File attachments, as a list of tuples of URL and a description or
 								        #: :obj:`None`. (Defaults to the empty list.)
-												Added support for PDF attachments (v2)

											
										
										
											2014-04-04 14:32:21 +04:00
+								        #: Extracted from the ``<link rel=attachment>`` elements in HTML
 								        #: and written to the ``/EmbeddedFiles`` dictionary in PDF.
-												Clean a lot of things in API documentation

											
										
										
											2019-02-22 13:34:46 +03:00
+								        #:
 								        #: .. versionadded:: 0.22
-												Added support for PDF attachments (v2)

											
										
										
											2014-04-04 14:32:21 +04:00
+								        self.attachments = attachments or []
-												Add PDF metadata parsed from HTML. Fix #77.

<title> → /Title
<meta name=author> → /Author
<meta name=description> → /Subject
<meta name=keywords> → /Keywords
<meta name=generator> → /Creator
<meta name=dcterms.created> → /CreationDate
<meta name=dcterms.modified> → /ModDate
"WeasyPrint vX.Y" → /Producer

											
										
										
											2013-07-14 15:08:02 +04:00
-												Remove useless explicit object inheritance

											
										
										
											2020-01-02 14:06:58 +03:00
+								class Document:
-												Clean a lot of things in API documentation

											
										
										
											2019-02-22 13:34:46 +03:00
+								    """A rendered document ready to be painted on a cairo surface.
-												Add metadata in the low-level API.

											
										
										
											2012-10-04 13:35:25 +04:00
-												Clean a lot of things in API documentation

											
										
										
											2019-02-22 13:34:46 +03:00
+								    Typically obtained from :meth:`HTML.render() <weasyprint.HTML.render>`, but
 								    can also be instantiated directly with a list of :class:`pages <Page>`, a
 								    set of :class:`metadata <DocumentMetadata>`, a :func:`url_fetcher
 								    <weasyprint.default_url_fetcher>` function, and a :class:`font_config
 								    <weasyprint.fonts.FontConfiguration>`.
-												Add metadata in the low-level API.

											
										
										
											2012-10-04 13:35:25 +04:00
 								    """
-												Refactor LayoutContext building

 this will provide a simpler API for testing context-using functions inside tests

											
										
										
											2019-07-23 08:12:08 +03:00
-												The Document is back! (But different.)

											
										
										
											2012-10-02 20:59:02 +04:00
+								    @classmethod
-												First steps to replace cairo with pydyf

											
										
										
											2020-04-18 01:19:35 +03:00
+								    def _build_layout_context(cls, html, stylesheets,
-												Add an option to optimize embedded images size

											
										
										
											2020-06-22 17:05:14 +03:00
+								                              presentational_hints=False,
 								                              optimize_images=False, font_config=None,
-												Add an image cache that can be shared between documents

Fix #969.

											
										
										
											2020-06-22 17:32:12 +03:00
+								                              counter_style=None, image_cache=None):
-												Add a font_config parameter to various render methods

Fix #506.

											
										
										
											2017-10-01 16:17:32 +03:00
+								        if font_config is None:
 								            font_config = FontConfiguration()
-												Add base code for @counter-style rules

											
										
										
											2019-12-24 16:39:40 +03:00
+								        if counter_style is None:
-												Add default counter class

											
										
										
											2019-12-24 17:56:24 +03:00
+								            counter_style = CounterStyle()
-												Don't use a global target collector

											
										
										
											2018-03-28 01:34:34 +03:00
+								        target_collector = TargetCollector()
-												Use cssselect2 instead of cssselect

											
										
										
											2017-06-30 18:54:02 +03:00
+								        page_rules = []
-												Use font config in stylesheets given in CLI

Related to #596.

											
										
										
											2018-03-24 01:57:33 +03:00
+								        user_stylesheets = []
-												Add an image cache that can be shared between documents

Fix #969.

											
										
										
											2020-06-22 17:32:12 +03:00
+								        image_cache = {} if image_cache is None else image_cache
-												Use font config in stylesheets given in CLI

Related to #596.

											
										
										
											2018-03-24 01:57:33 +03:00
+								        for css in stylesheets or []:
 								            if not hasattr(css, 'matcher'):
 								                css = CSS(
 								                    guess=css, media_type=html.media_type,
-												Add base code for @counter-style rules

											
										
										
											2019-12-24 16:39:40 +03:00
+								                    font_config=font_config, counter_style=counter_style)
-												Use font config in stylesheets given in CLI

Related to #596.

											
										
										
											2018-03-24 01:57:33 +03:00
+								            user_stylesheets.append(css)
-												Put media queries in a separate module, create a class for style_for

											
										
										
											2018-08-17 11:30:51 +03:00
+								        style_for = get_all_computed_styles(
-												Use font config in stylesheets given in CLI

Related to #596.

											
										
										
											2018-03-24 01:57:33 +03:00
+								            html, user_stylesheets, presentational_hints, font_config,
-												Add base code for @counter-style rules

											
										
										
											2019-12-24 16:39:40 +03:00
+								            counter_style, page_rules, target_collector)
-												Refine docstrings.

											
										
										
											2012-10-05 20:50:40 +04:00
+								        get_image_from_uri = functools.partial(
-												Add an image cache that can be shared between documents

Fix #969.

											
										
										
											2020-06-22 17:32:12 +03:00
+								            original_get_image_from_uri, image_cache, html.url_fetcher,
 								            optimize_images)
-												Use a separate logger for generation progress

											
										
										
											2019-01-04 01:02:44 +03:00
+								        PROGRESS_LOGGER.info('Step 4 - Creating formatting structure')
-												Refactor LayoutContext building

 this will provide a simpler API for testing context-using functions inside tests

											
										
										
											2019-07-23 08:12:08 +03:00
+								        context = LayoutContext(
-												First steps to replace cairo with pydyf

											
										
										
											2020-04-18 01:19:35 +03:00
+								            style_for, get_image_from_uri, font_config, counter_style,
 								            target_collector)
-												Refactor LayoutContext building

 this will provide a simpler API for testing context-using functions inside tests

											
										
										
											2019-07-23 08:12:08 +03:00
+								        return context
 								    @classmethod
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								    def _render(cls, html, stylesheets, presentational_hints=False,
-												Merge remote-tracking branch 'origin/master' into pydyf

											
										
										
											2020-07-31 15:46:36 +03:00
+								                optimize_images=False, font_config=None, counter_style=None,
 								                image_cache=None):
-												Refactor LayoutContext building

 this will provide a simpler API for testing context-using functions inside tests

											
										
										
											2019-07-23 08:12:08 +03:00
+								        if font_config is None:
 								            font_config = FontConfiguration()
-												Add base code for @counter-style rules

											
										
										
											2019-12-24 16:39:40 +03:00
+								        if counter_style is None:
-												Add default counter class

											
										
										
											2019-12-24 17:56:24 +03:00
+								            counter_style = CounterStyle()
-												Add base code for @counter-style rules

											
										
										
											2019-12-24 16:39:40 +03:00
-												Refactor LayoutContext building

 this will provide a simpler API for testing context-using functions inside tests

											
										
										
											2019-07-23 08:12:08 +03:00
+								        context = cls._build_layout_context(
-												Merge remote-tracking branch 'origin/master' into pydyf

											
										
										
											2020-07-31 15:46:36 +03:00
+								            html, stylesheets, presentational_hints, optimize_images,
 								            font_config, counter_style, image_cache)
-												Refactor LayoutContext building

 this will provide a simpler API for testing context-using functions inside tests

											
										
										
											2019-07-23 08:12:08 +03:00
-												Avoid useless nested function call

											
										
										
											2018-08-08 18:47:47 +03:00
+								        root_box = build_formatting_structure(
-												Fix lint

											
										
										
											2019-07-23 19:07:14 +03:00
+								            html.etree_element, context.style_for, context.get_image_from_uri,
-												Add base code for @counter-style rules

											
										
										
											2019-12-24 16:39:40 +03:00
+								            html.base_url, context.target_collector, counter_style)
-												Refactor LayoutContext building

 this will provide a simpler API for testing context-using functions inside tests

											
										
										
											2019-07-23 08:12:08 +03:00
 								        page_boxes = layout_document(html, root_box, context)
-												Clean the font config after rendering the document

											
										
										
											2016-10-27 12:41:34 +03:00
+								        rendering = cls(
-												First steps to replace cairo with pydyf

											
										
										
											2020-04-18 01:19:35 +03:00
+								            [Page(page_box) for page_box in page_boxes],
-												Clean document.py

											
										
										
											2020-05-30 02:11:30 +03:00
+								            DocumentMetadata(**get_html_metadata(html)),
-												Keep a reference to font_config in Document

Fix #566.

											
										
										
											2018-01-28 17:45:39 +03:00
+								            html.url_fetcher, font_config)
-												Clean the font config after rendering the document

											
										
										
											2016-10-27 12:41:34 +03:00
+								        return rendering
-												The Document is back! (But different.)

											
										
										
											2012-10-02 20:59:02 +04:00
-												Use dedicated resources for x-objects and patterns

											
										
										
											2020-06-07 16:18:00 +03:00
+								    def _use_references(self, pdf, resources):
 								        # XObjects
 								        for key, x_object in resources.get('XObject', {}).items():
 								            pdf.add_object(x_object)
 								            resources['XObject'][key] = x_object.reference
 								            if 'Resources' in x_object.extra:
 								                self._use_references(pdf, x_object.extra['Resources'])
 								                pdf.add_object(x_object.extra['Resources'])
 								                x_object.extra['Resources'] = (
 								                    x_object.extra['Resources'].reference)
 								        # Patterns
 								        for key, pattern in resources.get('Pattern', {}).items():
 								            pdf.add_object(pattern)
 								            resources['Pattern'][key] = pattern.reference
 								            if 'Resources' in pattern.extra:
 								                self._use_references(pdf, pattern.extra['Resources'])
 								                pdf.add_object(pattern.extra['Resources'])
 								                pattern.extra['Resources'] = (
 								                    pattern.extra['Resources'].reference)
 								        # Shadings
 								        for key, shading in resources.get('Shading', {}).items():
 								            pdf.add_object(shading)
-												Fix shading dictionary

											
										
										
											2020-06-08 17:34:28 +03:00
+								            resources['Shading'][key] = shading.reference
-												Use dedicated resources for x-objects and patterns

											
										
										
											2020-06-07 16:18:00 +03:00
-												Keep a reference to font_config in Document

Fix #566.

											
										
										
											2018-01-28 17:45:39 +03:00
+								    def __init__(self, pages, metadata, url_fetcher, font_config):
-												The Document is back! (But different.)

											
										
										
											2012-10-02 20:59:02 +04:00
+								        #: A list of :class:`Page` objects.
 								        self.pages = pages
-												Add PDF metadata parsed from HTML. Fix #77.

<title> → /Title
<meta name=author> → /Author
<meta name=description> → /Subject
<meta name=keywords> → /Keywords
<meta name=generator> → /Creator
<meta name=dcterms.created> → /CreationDate
<meta name=dcterms.modified> → /ModDate
"WeasyPrint vX.Y" → /Producer

											
										
										
											2013-07-14 15:08:02 +04:00
+								        #: A :class:`DocumentMetadata` object.
 								        #: Contains information that does not belong to a specific page
 								        #: but to the whole document.
 								        self.metadata = metadata
-												Clean a lot of things in API documentation

											
										
										
											2019-02-22 13:34:46 +03:00
+								        #: A function or other callable with the same signature as
 								        #: :func:`default_url_fetcher` called to fetch external resources such
 								        #: as stylesheets and images.  (See :ref:`url-fetchers`.)
-												Refactored the `url_fetcher` argument for `write_pdf` to an attribute of the `Document` class

											
										
										
											2014-04-18 17:11:45 +04:00
+								        self.url_fetcher = url_fetcher
-												Handle font subsets

											
										
										
											2020-05-31 02:20:38 +03:00
+								        #: A :obj:`dict` of fonts used by the document. Keys are hashes used to
 								        #: identify fonts, values are :class:`Font` objects.
 								        self.fonts = {}
-												Keep a reference to font_config in Document

Fix #566.

											
										
										
											2018-01-28 17:45:39 +03:00
+								        # Keep a reference to font_config to avoid its garbage collection until
 								        # rendering is destroyed. This is needed as font_config.__del__ removes
 								        # fonts that may be used when rendering
 								        self._font_config = font_config
-												The Document is back! (But different.)

											
										
										
											2012-10-02 20:59:02 +04:00
 								    def copy(self, pages='all'):
-												Refine docstrings.

											
										
										
											2012-10-05 20:50:40 +04:00
+								        """Take a subset of the pages.
-												Clean a lot of things in API documentation

											
										
										
											2019-02-22 13:34:46 +03:00
+								        .. versionadded:: 0.15
 								        :type pages: :term:`iterable`
-												Refine docstrings.

											
										
										
											2012-10-05 20:50:40 +04:00
+								        :param pages:
 								            An iterable of :class:`Page` objects from :attr:`pages`.
 								        :return:
 								            A new :class:`Document` object.
-												Docs: add an example combining documents into one PDF file.

											
										
										
											2013-07-14 12:17:40 +04:00
+								        Examples:
-												Refine docstrings.

											
										
										
											2012-10-05 20:50:40 +04:00
-												Docs: add an example combining documents into one PDF file.

											
										
										
											2013-07-14 12:17:40 +04:00
+								        Write two PDF files for odd-numbered and even-numbered pages::
 								            # Python lists count from 0 but pages are numbered from 1.
-												Refine docstrings.

											
										
										
											2012-10-05 20:50:40 +04:00
+								            # [::2] is a slice of even list indexes but odd-numbered pages.
 								            document.copy(document.pages[::2]).write_pdf('odd_pages.pdf')
 								            document.copy(document.pages[1::2]).write_pdf('even_pages.pdf')
-												Docs: add an example combining documents into one PDF file.

											
										
										
											2013-07-14 12:17:40 +04:00
+								        Write each page to a numbred PNG file::
-												Refine docstrings.

											
										
										
											2012-10-05 20:50:40 +04:00
+								            for i, page in enumerate(document.pages):
-												Clean document.py

											
										
										
											2020-05-30 02:11:30 +03:00
+								                document.copy(page).write_png(f'page_{i}.png')
-												Refine docstrings.

											
										
										
											2012-10-05 20:50:40 +04:00
-												Docs: add an example combining documents into one PDF file.

											
										
										
											2013-07-14 12:17:40 +04:00
+								        Combine multiple documents into one PDF file,
 								        using metadata from the first::
-												Docs: Fixed wrong nested list comprehension example

											
										
										
											2019-07-09 01:06:19 +03:00
+								            all_pages = [p for doc in documents for p in doc.pages]
-												Docs: add an example combining documents into one PDF file.

											
										
										
											2013-07-14 12:17:40 +04:00
+								            documents[0].copy(all_pages).write_pdf('combined.pdf')
-												Refine docstrings.

											
										
										
											2012-10-05 20:50:40 +04:00
+								        """
-												The Document is back! (But different.)

											
										
										
											2012-10-02 20:59:02 +04:00
+								        if pages == 'all':
 								            pages = self.pages
-												Refine docstrings.

											
										
										
											2012-10-05 20:50:40 +04:00
+								        elif not isinstance(pages, list):
 								            pages = list(pages)
-												Keep a reference to font_config in Document

Fix #566.

											
										
										
											2018-01-28 17:45:39 +03:00
+								        return type(self)(
 								            pages, self.metadata, self.url_fetcher, self._font_config)
-												The Document is back! (But different.)

											
										
										
											2012-10-02 20:59:02 +04:00
-												Support for post-processing by passing a finisher function to write_pdf

											
										
										
											2020-04-09 02:46:11 +03:00
+								    def write_pdf(self, target=None, zoom=1, attachments=None, finisher=None):
-												Refine docstrings.

											
										
										
											2012-10-05 20:50:40 +04:00
+								        """Paint the pages in a PDF file, with meta-data.
-												The Document is back! (But different.)

											
										
										
											2012-10-02 20:59:02 +04:00
-												Refine docstrings.

											
										
										
											2012-10-05 20:50:40 +04:00
+								        PDF files written directly by cairo do not have meta-data such as
 								        bookmarks/outlines and hyperlinks.
-												The Document is back! (But different.)

											
										
										
											2012-10-02 20:59:02 +04:00
-												Clean a lot of things in API documentation

											
										
										
											2019-02-22 13:34:46 +03:00
+								        :type target: str, pathlib.Path or file object
-												Refine docstrings.

											
										
										
											2012-10-05 20:50:40 +04:00
+								        :param target:
-												Clean a lot of things in API documentation

											
										
										
											2019-02-22 13:34:46 +03:00
+								            A filename where the PDF file is generated, a file object, or
 								            :obj:`None`.
-												Rename PDF scale to zoom, have the default be 1 rather than 0.75

The 0.75 factor is an implementation detail that should not be exposed
in the API.

											
										
										
											2012-11-23 01:27:34 +04:00
+								        :type zoom: float
 								        :param zoom:
-												Fix typos and awkward grammar on Tutorial and API pages.

											
										
										
											2017-04-28 21:36:14 +03:00
+								            The zoom factor in PDF units per CSS units.  **Warning**:
 								            All CSS units are affected, including physical units like
 								            ``cm`` and named sizes like ``A4``.  For values other than
-												Clean a lot of things in API documentation

											
										
										
											2019-02-22 13:34:46 +03:00
+, the physical CSS units will thus be "wrong".
 								        :type attachments: list
-												Refactored `attachments` attribute from the `HTML` class to an argument for `write_pdf`

											
										
										
											2014-04-22 22:40:46 +04:00
+								        :param attachments: A list of additional file attachments for the
-												Added an `Attachment` class for attachments provided through the API instead of the URL/description tuples

											
										
										
											2014-04-26 01:35:43 +04:00
+								            generated PDF document or :obj:`None`. The list's elements are
-												Clean a lot of things in API documentation

											
										
										
											2019-02-22 13:34:46 +03:00
+								            :class:`Attachment` objects, filenames, URLs or file-like objects.
-												Call finisher

											
										
										
											2020-04-19 11:01:27 +03:00
+								        :param finisher: A finisher function, that accepts the document and a
 								            ``pydyf.PDF`` object as parameters, can be passed to perform
 								            post-processing on the PDF right before the trailer is written.
-												Refine docstrings.

											
										
										
											2012-10-05 20:50:40 +04:00
+								        :returns:
-												Clean a lot of things in API documentation

											
										
										
											2019-02-22 13:34:46 +03:00
+								            The PDF as :obj:`bytes` if ``target`` is not provided or
 								            :obj:`None`, otherwise :obj:`None` (the PDF is written to
 								            ``target``).
-												The Document is back! (But different.)

											
										
										
											2012-10-02 20:59:02 +04:00
 								        """
-												Handle transforms and bookmarks

											
										
										
											2020-04-18 23:12:25 +03:00
+								        # 0.75 = 72 PDF point per inch / 96 CSS pixel per inch
-												Rename PDF scale to zoom, have the default be 1 rather than 0.75

The 0.75 factor is an implementation detail that should not be exposed
in the API.

											
										
										
											2012-11-23 01:27:34 +04:00
+								        scale = zoom * 0.75
-												Don't use pdfrw anymore

pdfrw is a great piece of software, but we don't know PDF enough to debug the
problems we've met. It's safer to use the new cairo API and get back to manual
edition for attachments and bleed boxes.

We only have two regressions for now:
- some internal links are broken,
- PDF producer is not overwritten.

A mail has been sent to cairo's mailing-list about that:
https://lists.cairographics.org/archives/cairo/2018-August/028694.html

Fix #639, #615, fix #596, fix #565.

											
										
										
											2018-08-06 18:38:02 +03:00
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								        PROGRESS_LOGGER.info('Step 6 - Creating PDF')
 								        pdf = pydyf.PDF()
 								        alpha_states = pydyf.Dictionary()
-												Handle opacity

											
										
										
											2020-05-17 15:46:41 +03:00
+								        x_objects = pydyf.Dictionary()
-												Handle very simple gradients

											
										
										
											2020-06-07 01:32:47 +03:00
+								        patterns = pydyf.Dictionary()
 								        shadings = pydyf.Dictionary()
-												Handle opacity

											
										
										
											2020-05-17 15:46:41 +03:00
+								        resources = pydyf.Dictionary({
-												Don’t use references for states and xobjects dicts

											
										
										
											2020-05-17 18:04:45 +03:00
+								            'ExtGState': alpha_states,
-												Use dedicated resources for x-objects and patterns

											
										
										
											2020-06-07 16:18:00 +03:00
+								            'XObject': x_objects,
-												Handle very simple gradients

											
										
										
											2020-06-07 01:32:47 +03:00
+								            'Pattern': patterns,
 								            'Shading': shadings,
-												Handle opacity

											
										
										
											2020-05-17 15:46:41 +03:00
+								        })
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								        pdf.add_object(resources)
 								        pdf_names = pydyf.Array()
-												Don't use pdfrw anymore

pdfrw is a great piece of software, but we don't know PDF enough to debug the
problems we've met. It's safer to use the new cairo API and get back to manual
edition for attachments and bleed boxes.

We only have two regressions for now:
- some internal links are broken,
- PDF producer is not overwritten.

A mail has been sent to cairo's mailing-list about that:
https://lists.cairographics.org/archives/cairo/2018-August/028694.html

Fix #639, #615, fix #596, fix #565.

											
										
										
											2018-08-06 18:38:02 +03:00
-												Go through PDF pages only once during generation

											
										
										
											2020-04-21 23:47:55 +03:00
+								        # Links and anchors
-												Fix typo

											
										
										
											2020-05-17 17:59:58 +03:00
+								        page_links_and_anchors = list(resolve_links(self.pages))
-												Add inline attachments

											
										
										
											2020-04-21 23:30:38 +03:00
+								        attachment_links = [
 								            [link for link in page_links if link[0] == 'attachment']
-												Fix typo

											
										
										
											2020-05-17 17:59:58 +03:00
+								            for page_links, page_anchors in page_links_and_anchors]
-												Add inline attachments

											
										
										
											2020-04-21 23:30:38 +03:00
-												Go through PDF pages only once during generation

											
										
										
											2020-04-21 23:47:55 +03:00
+								        # Annotations
 								        annot_files = {}
-												Add inline attachments

											
										
										
											2020-04-21 23:30:38 +03:00
+								        # A single link can be split in multiple regions. We don't want to
 								        # embed a file multiple times of course, so keep a reference to every
 								        # embedded URL and reuse the object number.
 								        for page_links in attachment_links:
 								            for link_type, annot_target, rectangle in page_links:
 								                if link_type == 'attachment' and target not in annot_files:
-												Go through PDF pages only once during generation

											
										
										
											2020-04-21 23:47:55 +03:00
+								                    # TODO: Use the title attribute as description. The comment
 								                    # above about multiple regions won't always be correct,
 								                    # because two links might have the same href, but different
 								                    # titles.
-												Add inline attachments

											
										
										
											2020-04-21 23:30:38 +03:00
+								                    annot_files[annot_target] = _write_pdf_attachment(
 								                        pdf, (annot_target, None), self.url_fetcher)
-												Go through PDF pages only once during generation

											
										
										
											2020-04-21 23:47:55 +03:00
+								        # Bookmarks
 								        root = []
 								        # At one point in the document, for each "output" depth, how much
 								        # to add to get the source level (CSS values of bookmark-level).
 								        # E.g. with <h1> then <h3>, level_shifts == [0, 1]
 								        # 1 means that <h3> has depth 3 - 1 = 2 in the output.
 								        skipped_levels = []
 								        last_by_depth = [root]
 								        previous_level = 0
 								        for page_number, (page, links_and_anchors, page_links) in enumerate(
-												Fix typo

											
										
										
											2020-05-17 17:59:58 +03:00
+								                zip(self.pages, page_links_and_anchors, attachment_links)):
-												Go through PDF pages only once during generation

											
										
										
											2020-04-21 23:47:55 +03:00
+								            # Draw from the top-left corner
 								            matrix = Matrix(scale, 0, 0, -scale, 0, page.height * scale)
 								            # Links and anchors
-												Use named destinations instead of pages and position for links

There's a limitation / bug in cairo: we can't add links to pages that have not
been created yet. We have to use named destinations instead as they work even
if the destination has not been created.

This change offers the advantage of advertising targets: generated PDF files
now embed the list of named targets (even if I don't know if PDF readers have a
UI for that feature).

Fix #678.

											
										
										
											2018-09-24 16:27:24 +03:00
+								            links, anchors = links_and_anchors
-												Handle hyperlinks and anchors

											
										
										
											2020-04-19 01:47:19 +03:00
-												Handle bleed box

											
										
										
											2020-04-19 10:55:39 +03:00
+								            page_width = scale * (
 								                page.width + page.bleed['left'] + page.bleed['right'])
 								            page_height = scale * (
 								                page.height + page.bleed['top'] + page.bleed['bottom'])
 								            left = -scale * page.bleed['left']
 								            top = -scale * page.bleed['top']
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								            right = left + page_width
 								            bottom = top + page_height
-												First steps to replace cairo with pydyf

											
										
										
											2020-04-18 01:19:35 +03:00
-												Handle very simple gradients

											
										
										
											2020-06-07 01:32:47 +03:00
+								            page_rectangle = (
 								                left / scale, top / scale, right / scale, bottom / scale)
 								            stream = Context(
 								                self, page_rectangle, alpha_states, x_objects, patterns,
 								                shadings)
-												Fix transformation matrix with bleed box

											
										
										
											2020-04-19 15:40:30 +03:00
+								            stream.transform(1, 0, 0, -1, 0, page.height * scale)
-												First steps to replace cairo with pydyf

											
										
										
											2020-04-18 01:19:35 +03:00
+								            page.paint(stream, scale=scale)
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								            pdf.add_object(stream)
-												First steps to replace cairo with pydyf

											
										
										
											2020-04-18 01:19:35 +03:00
-												Handle hyperlinks and anchors

											
										
										
											2020-04-19 01:47:19 +03:00
+								            pdf_page = pydyf.Dictionary({
-												First steps to replace cairo with pydyf

											
										
										
											2020-04-18 01:19:35 +03:00
+								                'Type': '/Page',
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								                'Parent': pdf.pages.reference,
 								                'MediaBox': pydyf.Array([left, top, right, bottom]),
-												First steps to replace cairo with pydyf

											
										
										
											2020-04-18 01:19:35 +03:00
+								                'Contents': stream.reference,
 								                'Resources': resources.reference,
-												Handle hyperlinks and anchors

											
										
										
											2020-04-19 01:47:19 +03:00
+								            })
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								            pdf.add_page(pdf_page)
 								            add_hyperlinks(links, anchors, matrix, pdf, pdf_page, pdf_names)
-												Go through PDF pages only once during generation

											
										
										
											2020-04-21 23:47:55 +03:00
+								            # Bleed
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								            bleed = {key: value * 0.75 for key, value in page.bleed.items()}
 								            trim_left = left + bleed['left']
 								            trim_top = top + bleed['top']
 								            trim_right = right - bleed['right']
 								            trim_bottom = bottom - bleed['bottom']
-												Handle hyperlinks and anchors

											
										
										
											2020-04-19 01:47:19 +03:00
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								            # Arbitrarly set PDF BleedBox between CSS bleed box (MediaBox) and
 								            # CSS page box (TrimBox) at most 10 points from the TrimBox.
 								            bleed_left = trim_left - min(10, bleed['left'])
 								            bleed_top = trim_top - min(10, bleed['top'])
 								            bleed_right = trim_right + min(10, bleed['right'])
 								            bleed_bottom = trim_bottom + min(10, bleed['bottom'])
-												Handle hyperlinks and anchors

											
										
										
											2020-04-19 01:47:19 +03:00
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								            pdf_page['TrimBox'] = pydyf.Array([
 								                trim_left, trim_top, trim_right, trim_bottom])
 								            pdf_page['BleedBox'] = pydyf.Array([
 								                bleed_left, bleed_top, bleed_right, bleed_bottom])
-												The Document is back! (But different.)

											
										
										
											2012-10-02 20:59:02 +04:00
-												Go through PDF pages only once during generation

											
										
										
											2020-04-21 23:47:55 +03:00
+								            # Annotations
-												Add inline attachments

											
										
										
											2020-04-21 23:30:38 +03:00
+								            # TODO: splitting a link into multiple independent rectangular
 								            # annotations works well for pure links, but rather mediocre for
 								            # other annotations and fails completely for transformed (CSS) or
 								            # complex link shapes (area). It would be better to use /AP for all
 								            # links and coalesce link shapes that originate from the same HTML
 								            # link. This would give a feeling similiar to what browsers do with
 								            # links that span multiple lines.
 								            for link_type, annot_target, rectangle in page_links:
 								                annot_file = annot_files[annot_target]
 								                if link_type == 'attachment' and annot_file is not None:
 								                    rectangle = (
 								                        *matrix.transform_point(*rectangle[:2]),
 								                        *matrix.transform_point(*rectangle[2:]))
 								                    annot = pydyf.Dictionary({
 								                        'Type': '/Annot',
 								                        'Rect': pydyf.Array(rectangle),
 								                        'Subtype': '/FileAttachment',
 								                        'T': pydyf.String(),
 								                        'FS': annot_file.reference,
 								                        'AP': pydyf.Dictionary({'N': pydyf.Stream([], {
 								                            'Type': '/XObject',
 								                            'Subtype': '/Form',
 								                            'BBox': pydyf.Array(rectangle),
 								                            'Length': 0,
 								                        })})
 								                    })
 								                    pdf.add_object(annot)
-												Add various dictionaries only when needed

											
										
										
											2020-05-16 18:05:48 +03:00
+								                    if 'Annots' not in pdf_page:
 								                        pdf_page['Annots'] = pydyf.Array()
-												Add inline attachments

											
										
										
											2020-04-21 23:30:38 +03:00
+								                    pdf_page['Annots'].append(annot.reference)
-												Go through PDF pages only once during generation

											
										
										
											2020-04-21 23:47:55 +03:00
+								            # Bookmarks
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								            for level, label, (point_x, point_y), state in page.bookmarks:
 								                if level > previous_level:
 								                    # Example: if the previous bookmark is a <h2>, the next
 								                    # depth "should" be for <h3>. If now we get a <h6> we’re
 								                    # skipping two levels: append 6 - 3 - 1 = 2
 								                    skipped_levels.append(level - previous_level - 1)
 								                else:
 								                    temp = level
 								                    while temp < previous_level:
 								                        temp += 1 + skipped_levels.pop()
 								                    if temp > previous_level:
 								                        # We remove too many "skips", add some back:
 								                        skipped_levels.append(temp - previous_level - 1)
 								                previous_level = level
 								                depth = level - sum(skipped_levels)
 								                assert depth == len(skipped_levels)
 								                assert depth >= 1
 								                children = []
 								                point_x, point_y = matrix.transform_point(point_x, point_y)
 								                subtree = BookmarkSubtree(
 								                    label, (page_number, point_x, point_y), children, state)
 								                last_by_depth[depth - 1].append(subtree)
 								                del last_by_depth[depth:]
 								                last_by_depth.append(children)
-												Add comment for outlines

											
										
										
											2020-05-17 18:12:16 +03:00
+								        # Outlines
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								        outlines, count = create_bookmarks(root, pdf)
-												Fix crash when there’s no outline

											
										
										
											2020-04-22 00:07:35 +03:00
+								        if outlines:
-												Fix outlines

											
										
										
											2020-05-12 22:53:54 +03:00
+								            outlines_dictionary = pydyf.Dictionary({
-												Fix crash when there’s no outline

											
										
										
											2020-04-22 00:07:35 +03:00
+								                'Count': count,
 								                'First': outlines[0].reference,
 								                'Last': outlines[-1].reference,
 								            })
-												Fix outlines

											
										
										
											2020-05-12 22:53:54 +03:00
+								            pdf.add_object(outlines_dictionary)
 								            for outline in outlines:
 								                outline['Parent'] = outlines_dictionary.reference
-												Fix outlines

											
										
										
											2020-05-16 17:19:28 +03:00
+								            pdf.catalog['Outlines'] = outlines_dictionary.reference
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
-												Go through PDF pages only once during generation

											
										
										
											2020-04-21 23:47:55 +03:00
+								        PROGRESS_LOGGER.info('Step 7 - Adding PDF metadata')
-												Include font streams

											
										
										
											2020-05-06 08:42:45 +03:00
+								        # PDF information
-												Go through PDF pages only once during generation

											
										
										
											2020-04-21 23:47:55 +03:00
+								        if self.metadata.title:
 								            pdf.info['Title'] = pydyf.String(self.metadata.title)
 								        if self.metadata.authors:
 								            pdf.info['Author'] = pydyf.String(
 								                ', '.join(self.metadata.authors))
 								        if self.metadata.description:
 								            pdf.info['Subject'] = pydyf.String(self.metadata.description)
 								        if self.metadata.keywords:
 								            pdf.info['Keywords'] = pydyf.String(
 								                ', '.join(self.metadata.keywords))
 								        if self.metadata.generator:
 								            pdf.info['Creator'] = pydyf.String(self.metadata.generator)
 								        pdf.info['Producer'] = pydyf.String(f'WeasyPrint {__version__}')
 								        if self.metadata.created:
 								            pdf.info['CreationDate'] = pydyf.String(
 								                _w3c_date_to_pdf(self.metadata.created, 'created'))
 								        if self.metadata.modified:
 								            pdf.info['ModDate'] = pydyf.String(
 								                _w3c_date_to_pdf(self.metadata.modified, 'modified'))
-												Include font streams

											
										
										
											2020-05-06 08:42:45 +03:00
+								        # Embedded files
-												Add inline attachments

											
										
										
											2020-04-21 23:30:38 +03:00
+								        attachments = self.metadata.attachments + (attachments or [])
-												Include font streams

											
										
										
											2020-05-06 08:42:45 +03:00
+								        pdf_attachments = []
 								        for attachment in attachments:
 								            pdf_attachment = _write_pdf_attachment(
 								                pdf, attachment, self.url_fetcher)
 								            if pdf_attachment is not None:
 								                pdf_attachments.append(pdf_attachment)
 								        if pdf_attachments:
 								            content = pydyf.Dictionary({'Names': pydyf.Array()})
 								            for i, pdf_attachment in enumerate(pdf_attachments):
 								                content['Names'].append(pydyf.String(f'attachment{i}'))
 								                content['Names'].append(pdf_attachment.reference)
 								            pdf.add_object(content)
-												Remove embedded files dictionary when empty

											
										
										
											2020-05-16 17:25:06 +03:00
+								            if 'Names' not in pdf.catalog:
-												Add various dictionaries only when needed

											
										
										
											2020-05-16 18:05:48 +03:00
+								                pdf.catalog['Names'] = pydyf.Dictionary()
-												Include font streams

											
										
										
											2020-05-06 08:42:45 +03:00
+								            pdf.catalog['Names']['EmbeddedFiles'] = content.reference
 								        # Embeded fonts
 								        resources['Font'] = pydyf.Dictionary()
-												Improve text rendering speed

											
										
										
											2020-06-01 02:12:32 +03:00
+								        for font in self.fonts.values():
-												Optimize fonts

											
										
										
											2020-05-29 20:43:56 +03:00
+								            # Optimize font
-												Don’t crash when font can’t be optimized

											
										
										
											2020-05-30 01:30:13 +03:00
+								            try:
 								                full_font = io.BytesIO(font.file_content)
 								                optimized_font = io.BytesIO()
 								                ttfont = TTFont(full_font)
 								                options = subset.Options(
 								                    retain_gids=True, passthrough_tables=True)
 								                subsetter = subset.Subsetter(options)
 								                subsetter.populate(gids=font.cmap)
 								                subsetter.subset(ttfont)
 								                ttfont.save(optimized_font)
 								                content = optimized_font.getvalue()
 								            except TTLibError:
 								                content = font.file_content
-												Optimize fonts

											
										
										
											2020-05-29 20:43:56 +03:00
 								            # Include font
 								            font_type = 'otf' if content[:4] == b'OTTO' else 'ttf'
-												Handle TTF and OTF font files with dedicated PDF syntaxes

											
										
										
											2020-05-10 01:14:56 +03:00
+								            if font_type == 'otf':
-												Compress streams

											
										
										
											2020-06-01 12:48:17 +03:00
+								                font_extra = pydyf.Dictionary({'Subtype': '/OpenType'})
-												Fix embedded OpenType fonts

											
										
										
											2020-05-12 14:11:52 +03:00
+								            else:
-												Compress streams

											
										
										
											2020-06-01 12:48:17 +03:00
+								                font_extra = pydyf.Dictionary({'Length1': len(content)})
 								            font_stream = pydyf.Stream([content], font_extra, compress=True)
-												Include font streams

											
										
										
											2020-05-06 08:42:45 +03:00
+								            pdf.add_object(font_stream)
-												Fix arrays in char widths

											
										
										
											2020-05-12 03:00:07 +03:00
+								            widths = pydyf.Array()
-												Use a smaller array for characters widths

											
										
										
											2020-05-10 19:09:06 +03:00
+								            for i in sorted(font.widths):
 								                if i - 1 not in font.widths:
 								                    widths.append(i)
-												Fix arrays in char widths

											
										
										
											2020-05-12 03:00:07 +03:00
+								                    current_widths = pydyf.Array()
-												Use a smaller array for characters widths

											
										
										
											2020-05-10 19:09:06 +03:00
+								                    widths.append(current_widths)
 								                current_widths.append(font.widths[i])
-												Use font codepoints instead of Unicode codepoints

											
										
										
											2020-05-08 18:14:45 +03:00
+								            subfont_dictionary = pydyf.Dictionary({
-												Include font streams

											
										
										
											2020-05-06 08:42:45 +03:00
+								                'Type': '/Font',
-												Clean document.py

											
										
										
											2020-05-30 02:11:30 +03:00
+								                'Subtype': f'/CIDFontType{"0" if font_type == "otf" else "2"}',
-												Fix font name

											
										
										
											2020-05-08 02:55:50 +03:00
+								                'BaseFont': font.name,
-												Use font codepoints instead of Unicode codepoints

											
										
										
											2020-05-08 18:14:45 +03:00
+								                'CIDSystemInfo': pydyf.Dictionary({
 								                    'Registry': pydyf.String('Adobe'),
 								                    'Ordering': pydyf.String('Identity'),
 								                    'Supplement': 0,
 								                }),
-												Fix arrays in char widths

											
										
										
											2020-05-12 03:00:07 +03:00
+								                'W': widths,
-												Include font streams

											
										
										
											2020-05-06 08:42:45 +03:00
+								                'FontDescriptor': pydyf.Dictionary({
-												Handle TTF and OTF font files with dedicated PDF syntaxes

											
										
										
											2020-05-10 01:14:56 +03:00
+								                    'Type': '/FontDescriptor',
-												Fix font name

											
										
										
											2020-05-08 02:55:50 +03:00
+								                    'FontName': font.name,
 								                    'FontFamily': pydyf.String(font.family),
-												Include font streams

											
										
										
											2020-05-06 08:42:45 +03:00
+								                    'Flags': 32,
-												Change some font attributes names

											
										
										
											2020-05-08 02:50:41 +03:00
+								                    'FontBBox': pydyf.Array(font.bbox),
-												Use computed values

											
										
										
											2020-05-08 01:40:01 +03:00
+								                    'ItalicAngle': font.italic_angle,
 								                    'Ascent': font.ascent,
 								                    'Descent': font.descent,
-												Fix insconsistent kerning and font metrics values

											
										
										
											2020-05-10 18:51:37 +03:00
+								                    'CapHeight': font.bbox[3],
-												Use computed values

											
										
										
											2020-05-08 01:40:01 +03:00
+								                    'StemV': font.stemv,
 								                    'StemH': font.stemh,
-												Clean document.py

											
										
										
											2020-05-30 02:11:30 +03:00
+								                    (f'FontFile{"3" if font_type == "otf" else "2"}'):
-												Handle TTF and OTF font files with dedicated PDF syntaxes

											
										
										
											2020-05-10 01:14:56 +03:00
+								                        font_stream.reference,
-												Use font codepoints instead of Unicode codepoints

											
										
										
											2020-05-08 18:14:45 +03:00
+								                }),
 								            })
-												Fix embedded OpenType fonts

											
										
										
											2020-05-12 14:11:52 +03:00
+								            if font_type == 'otf':
 								                subfont_dictionary['FontDescriptor']['Subtype'] = '/OpenType'
-												Use font codepoints instead of Unicode codepoints

											
										
										
											2020-05-08 18:14:45 +03:00
+								            pdf.add_object(subfont_dictionary)
-												Cmaps into font

											
										
										
											2020-05-12 19:38:12 +03:00
+								            to_unicode = pydyf.Stream([
-												Improve text rendering speed

											
										
										
											2020-06-01 02:12:32 +03:00
+								                b'/CIDInit /ProcSet findresource begin',
 								                b'12 dict begin',
 								                b'begincmap',
 								                b'/CIDSystemInfo',
 								                b'<< /Registry (Adobe)',
 								                b'/Ordering (UCS)',
 								                b'/Supplement 0',
 								                b'>> def',
 								                b'/CMapName /Adobe-Identity-UCS def',
 								                b'/CMapType 2 def',
 								                b'1 begincodespacerange',
 								                b'<0000> <ffff>',
 								                b'endcodespacerange',
 								                f'{len(font.cmap)} beginbfchar'.encode('ascii')])
-												Cmaps into font

											
										
										
											2020-05-12 19:38:12 +03:00
+								            for glyph, text in font.cmap.items():
 								                unicode_codepoints = ''.join(
 								                    f'{letter.encode("utf-16-be").hex()}' for letter in text)
 								                to_unicode.stream.append(
-												Improve text rendering speed

											
										
										
											2020-06-01 02:12:32 +03:00
+								                    f'<{glyph:04x}> <{unicode_codepoints}>'.encode('ascii'))
-												Cmaps into font

											
										
										
											2020-05-12 19:38:12 +03:00
+								            to_unicode.stream.extend([
-												Improve text rendering speed

											
										
										
											2020-06-01 02:12:32 +03:00
+								                b'endbfchar',
 								                b'endcmap',
 								                b'CMapName currentdict /CMap defineresource pop',
 								                b'end',
 								                b'end'])
-												Use a reference for ToUnicode

											
										
										
											2020-05-13 00:47:22 +03:00
+								            pdf.add_object(to_unicode)
-												Use font codepoints instead of Unicode codepoints

											
										
										
											2020-05-08 18:14:45 +03:00
+								            font_dictionary = pydyf.Dictionary({
 								                'Type': '/Font',
 								                'Subtype': '/Type0',
 								                'BaseFont': font.name,
 								                'Encoding': '/Identity-H',
 								                'DescendantFonts': pydyf.Array([subfont_dictionary.reference]),
-												Use a reference for ToUnicode

											
										
										
											2020-05-13 00:47:22 +03:00
+								                'ToUnicode': to_unicode.reference,
-												Include font streams

											
										
										
											2020-05-06 08:42:45 +03:00
+								            })
 								            pdf.add_object(font_dictionary)
-												Improve text rendering speed

											
										
										
											2020-06-01 02:12:32 +03:00
+								            resources['Font'][font.hash] = font_dictionary.reference
-												Add inline attachments

											
										
										
											2020-04-21 23:30:38 +03:00
-												Use dedicated resources for x-objects and patterns

											
										
										
											2020-06-07 16:18:00 +03:00
+								        self._use_references(pdf, resources)
-												Handle opacity

											
										
										
											2020-05-17 15:46:41 +03:00
-												Fix anchors

											
										
										
											2020-05-17 18:12:23 +03:00
+								        # Anchors
 								        if pdf_names:
 								            pdf.catalog['Names'] = pydyf.Dictionary(
 								                {'Dests': pydyf.Dictionary({'Names': pdf_names})})
-												Call finisher

											
										
										
											2020-04-19 11:01:27 +03:00
+								        if finisher:
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								            finisher(self, pdf)
-												Call finisher

											
										
										
											2020-04-19 11:01:27 +03:00
-												First steps to replace cairo with pydyf

											
										
										
											2020-04-18 01:19:35 +03:00
+								        file_obj = io.BytesIO()
-												Merge document and pdf modules

											
										
										
											2020-04-19 17:49:37 +03:00
+								        pdf.write(file_obj)
-												The Document is back! (But different.)

											
										
										
											2012-10-02 20:59:02 +04:00
 								        if target is None:
 								            return file_obj.getvalue()
 								        else:
 								            file_obj.seek(0)
 								            if hasattr(target, 'write'):
 								                shutil.copyfileobj(file_obj, target)
 								            else:
 								                with open(target, 'wb') as fd:
 								                    shutil.copyfileobj(file_obj, fd)
-												Disable antialiasing by default

											
										
										
											2020-05-18 02:29:37 +03:00
+								    def write_png(self, target=None, resolution=96, antialiasing=1):
-												More WIP: cairocffi and pango cffi.

											
										
										
											2012-12-29 04:00:30 +04:00
+								        """Paint the pages vertically to a single PNG image.
 								        There is no decoration around pages other than those specified in CSS
 								        with ``@page`` rules. The final image is as wide as the widest page.
 								        Each page is below the previous one, centered horizontally.
 								        :param target:
 								            A filename, file-like object, or :obj:`None`.
 								        :type resolution: float
 								        :param resolution:
 								            The output resolution in PNG pixels per CSS inch. At 96 dpi
 								            (the default), PNG pixels match the CSS ``px`` unit.
-												Add an option to enable antialiasing for PNG generation

											
										
										
											2020-05-17 16:06:17 +03:00
+								        :type antialiasing: int
 								        :param antialiasing:
-												Disable antialiasing by default

											
										
										
											2020-05-18 02:29:37 +03:00
+								            The antialiasing subsampling box size. Default is 1 (disabled), can
 								            be set to 4 for optimal (but slow) antialiasing.
-												More WIP: cairocffi and pango cffi.

											
										
										
											2012-12-29 04:00:30 +04:00
+								        :returns:
-												Clean a lot of things in API documentation

											
										
										
											2019-02-22 13:34:46 +03:00
+								            A ``(png_bytes, png_width, png_height)`` tuple. ``png_bytes`` is a
 								            byte string if ``target`` is :obj:`None`, otherwise :obj:`None`
 								            (the image is written to ``target``).  ``png_width`` and
 								            ``png_height`` are the size of the final image, in PNG pixels.
-												More WIP: cairocffi and pango cffi.

											
										
										
											2012-12-29 04:00:30 +04:00
 								        """
-												Fix comment in PNG generation function

Using GhostScript as a library seems to be awful. Let’s forget this idea.

											
										
										
											2020-05-18 23:42:27 +03:00
+								        # TODO: don’t crash if GhostScript can’t be found
 								        # TODO: fix that for Windows
-												Fix many tests

											
										
										
											2020-05-13 02:02:43 +03:00
+								        command = [
-												Use subprocess.run

It’s just more simple than Popen.

											
										
										
											2020-05-19 00:53:19 +03:00
+								            'gs', '-q', '-sstdout=%stderr', '-dNOPAUSE', '-dSAFER',
-												Add an option to enable antialiasing for PNG generation

											
										
										
											2020-05-17 16:06:17 +03:00
+								            f'-dTextAlphaBits={antialiasing}',
 								            f'-dGraphicsAlphaBits={antialiasing}', '-sDEVICE=png16m',
-												Fix multi-page PNG generation

											
										
										
											2020-05-16 01:04:09 +03:00
+								            f'-r{resolution}', '-sOutputFile=-', '-']
-												Use subprocess.run

It’s just more simple than Popen.

											
										
										
											2020-05-19 00:53:19 +03:00
+								        command = run(command, input=self.write_pdf(), capture_output=True)
 								        pngs = command.stdout
-												Don’t make write_png return size

											
										
										
											2020-05-18 02:36:48 +03:00
+								        magic_number = b'\x89\x50\x4e\x47\x0d\x0a\x1a\x0a'
-												Fix comment in PNG generation function

Using GhostScript as a library seems to be awful. Let’s forget this idea.

											
										
										
											2020-05-18 23:42:27 +03:00
+								        # TODO: use a different way to find PNG files in stream
-												Don’t make write_png return size

											
										
										
											2020-05-18 02:36:48 +03:00
+								        if pngs.count(magic_number) == 1:
 								            if target is None:
 								                return pngs
 								            png = io.BytesIO(pngs)
 								        else:
 								            images = []
 								            for i, png in enumerate(pngs[8:].split(magic_number)):
 								                images.append(Image.open(io.BytesIO(magic_number + png)))
 								            width = max(image.width for image in images)
 								            height = sum(image.height for image in images)
 								            output_image = Image.new('RGBA', (width, height))
 								            top = 0
 								            for image in images:
 								                output_image.paste(
 								                    image, (int((width - image.width) / 2), top))
 								                top += image.height
 								            png = io.BytesIO()
 								            output_image.save(png, format='png')
 								        png.seek(0)
-												Fix many tests

											
										
										
											2020-05-13 02:02:43 +03:00
-												Clean return condition in document.py

											
										
										
											2020-05-18 12:54:37 +03:00
+								        if target is None:
 								            return png.read()
-												Fix multi-page PNG generation

											
										
										
											2020-05-16 01:04:09 +03:00
-												Clean return condition in document.py

											
										
										
											2020-05-18 12:54:37 +03:00
+								        if hasattr(target, 'write'):
 								            shutil.copyfileobj(png, target)
 								        else:
 								            with open(target, 'wb') as fd:
 								                shutil.copyfileobj(png, fd)