Reorganize anchors management

2024-09-11 20:47:56 +03:00 · 2023-01-15 21:59:13 +01:00 · 2023-01-15 21:59:13 +01:00 · 0b1617edc8
commit 0b1617edc8
parent 168ed3b9a3
8 changed files with 416 additions and 418 deletions
--- a/tests/test_api.py
+++ b/tests/test_api.py
@ -13,7 +13,7 @@ import py
 import pytest
 from PIL import Image
 from weasyprint import CSS, HTML, __main__, default_url_fetcher
-from weasyprint.links import resolve_links
+from weasyprint.pdf.anchors import resolve_links
 from weasyprint.urls import path2url

 from .draw import parse_pixels
--- a/weasyprint/anchors.py
+++ b/weasyprint/anchors.py
@ -1,53 +1,12 @@
-"""PDF links and bookmarks management."""
+"""Find anchors, links, bookmarks and inputs in documents."""

 import math

 from .formatting_structure import boxes
 from .layout.percent import percentage
-from .logger import LOGGER
 from .matrix import Matrix


-def resolve_links(pages):
-    """Resolve internal hyperlinks.
-
-    Links to a missing anchor are removed with a warning.
-
-    If multiple anchors have the same name, the first one is used.
-
-    :returns:
-        A generator yielding lists (one per page) like :attr:`Page.links`,
-        except that ``target`` for internal hyperlinks is
-        ``(page_number, x, y)`` instead of an anchor name.
-        The page number is a 0-based index into the :attr:`pages` list,
-        and ``x, y`` are in CSS pixels from the top-left of the page.
-
-    """
-    anchors = set()
-    paged_anchors = []
-    for i, page in enumerate(pages):
-        paged_anchors.append([])
-        for anchor_name, (point_x, point_y) in page.anchors.items():
-            if anchor_name not in anchors:
-                paged_anchors[-1].append((anchor_name, point_x, point_y))
-                anchors.add(anchor_name)
-    for page in pages:
-        page_links = []
-        for link in page.links:
-            link_type, anchor_name, _, _ = link
-            if link_type == 'internal':
-                if anchor_name not in anchors:
-                    LOGGER.error(
-                        'No anchor #%s for internal URI reference',
-                        anchor_name)
-                else:
-                    page_links.append(link)
-            else:
-                # External link
-                page_links.append(link)
-        yield page_links, paged_anchors.pop(0)
-
-
 def rectangle_aabb(matrix, pos_x, pos_y, width, height):
    """Apply a transformation matrix to an axis-aligned rectangle.

@ -68,8 +27,12 @@ def rectangle_aabb(matrix, pos_x, pos_y, width, height):
    return box_x1, box_y1, box_x2, box_y2


-def gather_links_and_bookmarks(box, anchors, links, bookmarks, inputs,
-                               parent_matrix=None):
+def gather_anchors(box, anchors, links, bookmarks, inputs, parent_matrix=None):
+    """Gather anchors and other data related to specific positions in PDF.
+
+    Currently finds anchors, links, bookmarks and inputs.
+
+    """
    # Get box transformation matrix.
    # "Transforms apply to block-level and atomic inline-level elements,
    #  but do not apply to elements which may be split into
@ -149,8 +112,7 @@ def gather_links_and_bookmarks(box, anchors, links, bookmarks, inputs,
            anchors[anchor_name] = pos_x, pos_y

    for child in box.all_children():
-        gather_links_and_bookmarks(
-            child, anchors, links, bookmarks, inputs, matrix)
+        gather_anchors(child, anchors, links, bookmarks, inputs, matrix)


 def make_page_bookmark_tree(page, skipped_levels, last_by_depth,
--- a/weasyprint/document.py
+++ b/weasyprint/document.py
@ -5,6 +5,7 @@ import io
 import shutil

 from . import CSS
+from .anchors import gather_anchors, make_page_bookmark_tree
 from .css import get_all_computed_styles
 from .css.counters import CounterStyle
 from .css.targets import TargetCollector
@ -13,7 +14,6 @@ from .formatting_structure.build import build_formatting_structure
 from .html import get_html_metadata
 from .images import get_image_from_uri as original_get_image_from_uri
 from .layout import LayoutContext, layout_document
-from .links import gather_links_and_bookmarks, make_page_bookmark_tree
 from .logger import PROGRESS_LOGGER
 from .matrix import Matrix
 from .pdf import generate_pdf
@ -72,7 +72,7 @@ class Page:
        #: :ojb:`dict` of HTML tag attributes and values.
        self.inputs = []

-        gather_links_and_bookmarks(
+        gather_anchors(
            page_box, self.anchors, self.links, self.bookmarks, self.inputs)
        self._page_box = page_box

@ -247,7 +247,7 @@ class Document:
        # Keep a reference to font_config to avoid its garbage collection until
        # rendering is destroyed. This is needed as font_config.__del__ removes
        # fonts that may be used when rendering
-        self._font_config = font_config
+        self.font_config = font_config
        # Set of flags for PDF size optimization. Can contain "images" and
        # "fonts".
        self._optimize_size = optimize_size
@ -290,7 +290,7 @@ class Document:
        elif not isinstance(pages, list):
            pages = list(pages)
        return type(self)(
-            pages, self.metadata, self.url_fetcher, self._font_config,
+            pages, self.metadata, self.url_fetcher, self.font_config,
            self._optimize_size)

    def make_bookmark_tree(self):
--- a/weasyprint/pdf/init.py
+++ b/weasyprint/pdf/init.py
@ -1,20 +1,15 @@
 """PDF generation management."""

-import hashlib
-import io
-import zlib
-from os.path import basename
-from urllib.parse import unquote, urlsplit
-
 import pydyf

-from .. import Attachment, __version__
+from .. import VERSION
 from ..html import W3C_DATE_RE
-from ..links import make_page_bookmark_tree, resolve_links
 from ..logger import LOGGER, PROGRESS_LOGGER
 from ..matrix import Matrix
-from ..urls import URLFetchingError
 from . import pdfa, pdfua
+from .anchors import (
+    add_annotations, add_inputs, add_links, add_outlines, resolve_links,
+    write_pdf_attachment)
 from .fonts import build_fonts_dictionary
 from .stream import Stream

@ -53,71 +48,6 @@ def _w3c_date_to_pdf(string, attr_name):
    return pdf_date


-def _write_pdf_attachment(pdf, attachment, url_fetcher):
-    """Write an attachment to the PDF stream.
-
-    :return:
-        the attachment PDF dictionary.
-
-    """
-    # Attachments from document links like <link> or <a> can only be URLs.
-    # They're passed in as tuples
-    url = ''
-    if isinstance(attachment, tuple):
-        url, description = attachment
-        attachment = Attachment(
-            url=url, url_fetcher=url_fetcher, description=description)
-    elif not isinstance(attachment, Attachment):
-        attachment = Attachment(guess=attachment, url_fetcher=url_fetcher)
-
-    try:
-        with attachment.source as (source_type, source, url, _):
-            if isinstance(source, bytes):
-                source = io.BytesIO(source)
-            uncompressed_length = 0
-            stream = b''
-            md5 = hashlib.md5()
-            compress = zlib.compressobj()
-            for data in iter(lambda: source.read(4096), b''):
-                uncompressed_length += len(data)
-                md5.update(data)
-                compressed = compress.compress(data)
-                stream += compressed
-            compressed = compress.flush(zlib.Z_FINISH)
-            stream += compressed
-            file_extra = pydyf.Dictionary({
-                'Type': '/EmbeddedFile',
-                'Filter': '/FlateDecode',
-                'Params': pydyf.Dictionary({
-                    'CheckSum': f'<{md5.hexdigest()}>',
-                    'Size': uncompressed_length,
-                })
-            })
-            file_stream = pydyf.Stream([stream], file_extra)
-            pdf.add_object(file_stream)
-
-    except URLFetchingError as exception:
-        LOGGER.error('Failed to load attachment: %s', exception)
-        return
-
-    # TODO: Use the result object from a URL fetch operation to provide more
-    # details on the possible filename.
-    if url and urlsplit(url).path:
-        filename = basename(unquote(urlsplit(url).path))
-    else:
-        filename = 'attachment.bin'
-
-    attachment = pydyf.Dictionary({
-        'Type': '/Filespec',
-        'F': pydyf.String(),
-        'UF': pydyf.String(filename),
-        'EF': pydyf.Dictionary({'F': file_stream.reference}),
-        'Desc': pydyf.String(attachment.description or ''),
-    })
-    pdf.add_object(attachment)
-    return attachment
-
-
 def _reference_resources(pdf, resources, images, fonts):
    if 'Font' in resources:
        assert resources['Font'] is None
@ -170,67 +100,6 @@ def _use_references(pdf, resources, images):
            alpha['SMask']['G'] = alpha['SMask']['G'].reference


-def _add_links(links, anchors, matrix, pdf, page, names, mark):
-    """Include hyperlinks in given PDF page."""
-    for link_type, link_target, rectangle, box in links:
-        x1, y1 = matrix.transform_point(*rectangle[:2])
-        x2, y2 = matrix.transform_point(*rectangle[2:])
-        if link_type in ('internal', 'external'):
-            box.link_annotation = pydyf.Dictionary({
-                'Type': '/Annot',
-                'Subtype': '/Link',
-                'Rect': pydyf.Array([x1, y1, x2, y2]),
-                'BS': pydyf.Dictionary({'W': 0}),
-            })
-            if mark:
-                box.link_annotation['Contents'] = pydyf.String(link_target)
-            if link_type == 'internal':
-                box.link_annotation['Dest'] = pydyf.String(link_target)
-            else:
-                box.link_annotation['A'] = pydyf.Dictionary({
-                    'Type': '/Action',
-                    'S': '/URI',
-                    'URI': pydyf.String(link_target),
-                })
-            pdf.add_object(box.link_annotation)
-            if 'Annots' not in page:
-                page['Annots'] = pydyf.Array()
-            page['Annots'].append(box.link_annotation.reference)
-
-    for anchor in anchors:
-        anchor_name, x, y = anchor
-        x, y = matrix.transform_point(x, y)
-        names.append([
-            anchor_name, pydyf.Array([page.reference, '/XYZ', x, y, 0])])
-
-
-def _create_bookmarks(bookmarks, pdf, parent=None):
-    count = len(bookmarks)
-    outlines = []
-    for title, (page, x, y), children, state in bookmarks:
-        destination = pydyf.Array((pdf.page_references[page], '/XYZ', x, y, 0))
-        outline = pydyf.Dictionary({
-            'Title': pydyf.String(title), 'Dest': destination})
-        pdf.add_object(outline)
-        children_outlines, children_count = _create_bookmarks(
-            children, pdf, parent=outline)
-        outline['Count'] = children_count
-        if state == 'closed':
-            outline['Count'] *= -1
-        else:
-            count += children_count
-        if outlines:
-            outline['Prev'] = outlines[-1].reference
-            outlines[-1]['Next'] = outline.reference
-        if children_outlines:
-            outline['First'] = children_outlines[0].reference
-            outline['Last'] = children_outlines[-1].reference
-        if parent is not None:
-            outline['Parent'] = parent.reference
-        outlines.append(outline)
-    return outlines, count
-
-
 def generate_pdf(document, target, zoom, attachments, optimize_size,
                 identifier, variant, version, custom_metadata):
    # 0.75 = 72 PDF point per inch / 96 CSS pixel per inch
@ -264,43 +133,14 @@ def generate_pdf(document, target, zoom, attachments, optimize_size,

    # Links and anchors
    page_links_and_anchors = list(resolve_links(document.pages))
-    attachment_links = [
-        [link for link in page_links if link[0] == 'attachment']
-        for page_links, page_anchors in page_links_and_anchors]

-    # Annotations
    annot_files = {}
-    # A single link can be split in multiple regions. We don't want to embed a
-    # file multiple times of course, so keep a reference to every embedded URL
-    # and reuse the object number.
-    for page_links in attachment_links:
-        for link_type, annot_target, rectangle, _ in page_links:
-            if link_type == 'attachment' and target not in annot_files:
-                # TODO: Use the title attribute as description. The comment
-                # above about multiple regions won't always be correct, because
-                # two links might have the same href, but different titles.
-                annot_files[annot_target] = _write_pdf_attachment(
-                    pdf, (annot_target, None), document.url_fetcher)
-
-    # Bookmarks
-    root = []
-    # At one point in the document, for each "output" depth, how much to add to
-    # get the source level (CSS values of bookmark-level).
-    # E.g. with <h1> then <h3>, level_shifts == [0, 1]
-    # 1 means that <h3> has depth 3 - 1 = 2 in the output.
-    skipped_levels = []
-    last_by_depth = [root]
-    previous_level = 0
-    page_streams = []
-
-    for page_number, (page, links_and_anchors, page_links) in enumerate(
-            zip(document.pages, page_links_and_anchors, attachment_links)):
+    pdf_pages, page_streams = [], []
+    for page_number, (page, links_and_anchors) in enumerate(
+            zip(document.pages, page_links_and_anchors)):
        # Draw from the top-left corner
        matrix = Matrix(scale, 0, 0, -scale, 0, page.height * scale)

-        # Links and anchors
-        links, anchors = links_and_anchors
-
        page_width = scale * (
            page.width + page.bleed['left'] + page.bleed['right'])
        page_height = scale * (
@ -331,8 +171,14 @@ def generate_pdf(document, target, zoom, attachments, optimize_size,
            pdf_page['Tabs'] = '/S'
            pdf_page['StructParents'] = page_number
        pdf.add_page(pdf_page)
+        pdf_pages.append(pdf_page)

-        _add_links(links, anchors, matrix, pdf, pdf_page, pdf_names, mark)
+        add_links(links_and_anchors, matrix, pdf, pdf_page, pdf_names, mark)
+        add_inputs(
+            page.inputs, matrix, pdf, pdf_page, resources, stream,
+            document.font_config.font_map)
+        add_annotations(
+            links_and_anchors[0], matrix, document, pdf, pdf_page, annot_files)
        page.paint(stream, scale=scale)

        # Bleed
@ -355,175 +201,13 @@ def generate_pdf(document, target, zoom, attachments, optimize_size,
        pdf_page['BleedBox'] = pydyf.Array([
            bleed_left, bleed_top, bleed_right, bleed_bottom])

-        # Inputs
-        if page.inputs:
-            if 'Annots' not in pdf_page:
-                pdf_page['Annots'] = pydyf.Array()
-            if 'AcroForm' not in pdf.catalog:
-                pdf.catalog['AcroForm'] = pydyf.Dictionary({
-                    'Fields': pydyf.Array(),
-                    'DR': resources.reference,
-                })
-        for element, style, rectangle in page.inputs:
-            rectangle = (
-                *matrix.transform_point(*rectangle[:2]),
-                *matrix.transform_point(*rectangle[2:]))
-            font_map = document._font_config.font_map
-            context = ffi.gc(
-                pango.pango_font_map_create_context(font_map),
-                gobject.g_object_unref)
-            font_description = ffi.gc(
-                pango.pango_font_description_new(),
-                pango.pango_font_description_free)
-            family_p, _ = unicode_to_char_p(','.join(style['font_family']))
-            pango.pango_font_description_set_family(font_description, family_p)
-            pango.pango_font_description_set_style(
-                font_description, PANGO_STYLE[style['font_style']])
-            pango.pango_font_description_set_stretch(
-                font_description, PANGO_STRETCH[style['font_stretch']])
-            pango.pango_font_description_set_weight(
-                font_description, style['font_weight'])
-            font = pango.pango_font_map_load_font(
-                font_map, context, font_description)
-            font = stream.add_font(font)
-
-            input_type = element.attrib.get('type')
-            if input_type == 'checkbox':
-                # Checkboxes
-                width = rectangle[2] - rectangle[0]
-                height = rectangle[1] - rectangle[3]
-                checked_stream = pydyf.Stream(extra={
-                    'Resources': resources.reference,
-                    'Type': '/XObject',
-                    'Subtype': '/Form',
-                    'BBox': pydyf.Array((0, 0, width, height)),
-                })
-                checked_stream.push_state()
-                checked_stream.begin_text()
-                checked_stream.set_color_rgb(*style['color'][:3])
-                checked_stream.set_font_size('ZaDi', style['font_size'])
-                x = (width - style['font_size']) / 1.3
-                y = (height - style['font_size']) / 1.3
-                checked_stream.stream.append(f'{x} {y} Td')
-                checked_stream.stream.append('(8) Tj')
-                checked_stream.end_text()
-                checked_stream.pop_state()
-                pdf.add_object(checked_stream)
-
-                unchecked_stream = pydyf.Stream()
-                unchecked_stream.push_state()
-                unchecked_stream.pop_state()
-                pdf.add_object(unchecked_stream)
-
-                checked = 'checked' in element.attrib
-                # field_stream = pydyf.Stream()
-                # field_stream.set_color_rgb(*style['color'][:3])
-                # field_stream.set_font_size('ZaDi', style['font_size'])
-                field = pydyf.Dictionary({
-                    'Type': '/Annot',
-                    'Subtype': '/Widget',
-                    # 'F': 4,
-                    'Rect': pydyf.Array(rectangle),
-                    'FT': '/Btn',
-                    'P': pdf_page.reference,
-                    'T': pydyf.String(element.attrib.get('name', '')),
-                    'V': '/Yes' if checked else '/Off',
-                    # 'DV': '/Yes' if checked else '/Off',
-                    'DR': resources.reference,
-                    # 'DA': pydyf.String(b' '.join(field_stream.stream)),
-                    # 'MK': pydyf.Dictionary({'CA': pydyf.String('8')}),
-                    'AP': pydyf.Dictionary({'N': pydyf.Dictionary({
-                        'Yes': checked_stream.reference,
-                        'Off': unchecked_stream.reference,
-                    })}),
-                    'AS': '/Yes' if checked else '/Off',
-                })
-            else:
-                # Text, password, textarea, files, and unknown
-                field_stream = pydyf.Stream()
-                field_stream.set_color_rgb(*style['color'][:3])
-                field_stream.set_font_size(font.hash, style['font_size'])
-                value = (
-                    element.attrib.get('value', '') if element.tag == 'input'
-                    else element.text)
-                field = pydyf.Dictionary({
-                    'FT': '/Tx',
-                    'DA': pydyf.String(b' '.join(field_stream.stream)),
-                    'Type': '/Annot',
-                    'Subtype': '/Widget',
-                    'Rect': pydyf.Array(rectangle),
-                    'T': pydyf.String(element.attrib.get('name', 'unknown')),
-                    'V': pydyf.String(value),
-                    'P': pdf_page.reference,
-                })
-                if element.tag == 'textarea':
-                    field['Ff'] = 2 ** (13 - 1)
-                elif input_type == 'password':
-                    field['Ff'] = 2 ** (14 - 1)
-                elif input_type == 'file':
-                    field['Ff'] = 2 ** (21 - 1)
-
-            pdf.add_object(field)
-            pdf_page['Annots'].append(field.reference)
-            pdf.catalog['AcroForm']['Fields'].append(field.reference)
-
-        # Annotations
-        # TODO: splitting a link into multiple independent rectangular
-        # annotations works well for pure links, but rather mediocre for
-        # other annotations and fails completely for transformed (CSS) or
-        # complex link shapes (area). It would be better to use /AP for all
-        # links and coalesce link shapes that originate from the same HTML
-        # link. This would give a feeling similiar to what browsers do with
-        # links that span multiple lines.
-        for link_type, annot_target, rectangle, _ in page_links:
-            annot_file = annot_files[annot_target]
-            if link_type == 'attachment' and annot_file is not None:
-                rectangle = (
-                    *matrix.transform_point(*rectangle[:2]),
-                    *matrix.transform_point(*rectangle[2:]))
-                stream = pydyf.Stream([], {
-                    'Type': '/XObject',
-                    'Subtype': '/Form',
-                    'BBox': pydyf.Array(rectangle),
-                    'Length': 0,
-                })
-                pdf.add_object(stream)
-                annot = pydyf.Dictionary({
-                    'Type': '/Annot',
-                    'Rect': pydyf.Array(rectangle),
-                    'Subtype': '/FileAttachment',
-                    'T': pydyf.String(),
-                    'FS': annot_file.reference,
-                    'AP': pydyf.Dictionary({'N': stream.reference}),
-                    'AS': '/N',
-                })
-                pdf.add_object(annot)
-                if 'Annots' not in pdf_page:
-                    pdf_page['Annots'] = pydyf.Array()
-                pdf_page['Annots'].append(annot.reference)
-
-        # Bookmarks
-        previous_level = make_page_bookmark_tree(
-            page, skipped_levels, last_by_depth, previous_level, page_number,
-            matrix)
-
    # Outlines
-    outlines, count = _create_bookmarks(root, pdf)
-    if outlines:
-        outlines_dictionary = pydyf.Dictionary({
-            'Count': count,
-            'First': outlines[0].reference,
-            'Last': outlines[-1].reference,
-        })
-        pdf.add_object(outlines_dictionary)
-        for outline in outlines:
-            outline['Parent'] = outlines_dictionary.reference
-        pdf.catalog['Outlines'] = outlines_dictionary.reference
+    add_outlines(pdf, document.make_bookmark_tree())

    PROGRESS_LOGGER.info('Step 7 - Adding PDF metadata')

    # PDF information
-    pdf.info['Producer'] = pydyf.String(f'WeasyPrint {__version__}')
+    pdf.info['Producer'] = pydyf.String(f'WeasyPrint {VERSION}')
    metadata = document.metadata
    if metadata.title:
        pdf.info['Title'] = pydyf.String(metadata.title)
@ -554,7 +238,7 @@ def generate_pdf(document, target, zoom, attachments, optimize_size,
    attachments = metadata.attachments + (attachments or [])
    pdf_attachments = []
    for attachment in attachments:
-        pdf_attachment = _write_pdf_attachment(
+        pdf_attachment = write_pdf_attachment(
            pdf, attachment, document.url_fetcher)
        if pdf_attachment is not None:
            pdf_attachments.append(pdf_attachment)
@ -591,10 +275,9 @@ def generate_pdf(document, target, zoom, attachments, optimize_size,
            name_array.append(pydyf.String(anchor[0]))
            name_array.append(anchor[1])
        dests = pydyf.Dictionary({'Names': name_array})
-        if 'Names' in pdf.catalog:
-            pdf.catalog['Names']['Dests'] = dests
-        else:
-            pdf.catalog['Names'] = pydyf.Dictionary({'Dests': dests})
+        if 'Names' not in pdf.catalog:
+            pdf.catalog['Names'] = pydyf.Dictionary()
+        pdf.catalog['Names']['Dests'] = dests

    # Apply PDF variants functions
    if variant:
--- a/weasyprint/pdf/anchors.py
+++ b/weasyprint/pdf/anchors.py
@ -0,0 +1,345 @@
+"""Insert anchors, links, bookmarks and inputs in PDFs."""
+
+import hashlib
+import io
+import zlib
+from os.path import basename
+from urllib.parse import unquote, urlsplit
+
+import pydyf
+
+from .. import Attachment
+from ..logger import LOGGER
+from ..text.ffi import ffi, gobject, pango
+from ..text.fonts import get_font_description
+from ..urls import URLFetchingError
+
+
+def add_links(links_and_anchors, matrix, pdf, page, names, mark):
+    """Include hyperlinks in given PDF page."""
+    links, anchors = links_and_anchors
+
+    for link_type, link_target, rectangle, box in links:
+        x1, y1 = matrix.transform_point(*rectangle[:2])
+        x2, y2 = matrix.transform_point(*rectangle[2:])
+        if link_type in ('internal', 'external'):
+            box.link_annotation = pydyf.Dictionary({
+                'Type': '/Annot',
+                'Subtype': '/Link',
+                'Rect': pydyf.Array([x1, y1, x2, y2]),
+                'BS': pydyf.Dictionary({'W': 0}),
+            })
+            if mark:
+                box.link_annotation['Contents'] = pydyf.String(link_target)
+            if link_type == 'internal':
+                box.link_annotation['Dest'] = pydyf.String(link_target)
+            else:
+                box.link_annotation['A'] = pydyf.Dictionary({
+                    'Type': '/Action',
+                    'S': '/URI',
+                    'URI': pydyf.String(link_target),
+                })
+            pdf.add_object(box.link_annotation)
+            if 'Annots' not in page:
+                page['Annots'] = pydyf.Array()
+            page['Annots'].append(box.link_annotation.reference)
+
+    for anchor in anchors:
+        anchor_name, x, y = anchor
+        x, y = matrix.transform_point(x, y)
+        names.append([
+            anchor_name, pydyf.Array([page.reference, '/XYZ', x, y, 0])])
+
+
+def add_outlines(pdf, bookmarks, parent=None):
+    """Include bookmark outlines in PDF."""
+    count = len(bookmarks)
+    outlines = []
+    for title, (page, x, y), children, state in bookmarks:
+        destination = pydyf.Array((pdf.page_references[page], '/XYZ', x, y, 0))
+        outline = pydyf.Dictionary({
+            'Title': pydyf.String(title), 'Dest': destination})
+        pdf.add_object(outline)
+        children_outlines, children_count = add_outlines(
+            pdf, children, parent=outline)
+        outline['Count'] = children_count
+        if state == 'closed':
+            outline['Count'] *= -1
+        else:
+            count += children_count
+        if outlines:
+            outline['Prev'] = outlines[-1].reference
+            outlines[-1]['Next'] = outline.reference
+        if children_outlines:
+            outline['First'] = children_outlines[0].reference
+            outline['Last'] = children_outlines[-1].reference
+        if parent is not None:
+            outline['Parent'] = parent.reference
+        outlines.append(outline)
+
+    if parent is None and outlines:
+        outlines_dictionary = pydyf.Dictionary({
+            'Count': count,
+            'First': outlines[0].reference,
+            'Last': outlines[-1].reference,
+        })
+        pdf.add_object(outlines_dictionary)
+        for outline in outlines:
+            outline['Parent'] = outlines_dictionary.reference
+        pdf.catalog['Outlines'] = outlines_dictionary.reference
+
+    return outlines, count
+
+
+def add_inputs(inputs, matrix, pdf, page, resources, stream, font_map):
+    """Include form inputs in PDF."""
+    if not inputs:
+        return
+
+    if 'Annots' not in page:
+        page['Annots'] = pydyf.Array()
+    if 'AcroForm' not in pdf.catalog:
+        pdf.catalog['AcroForm'] = pydyf.Dictionary({
+            'Fields': pydyf.Array(),
+            'DR': resources.reference,
+        })
+    context = ffi.gc(
+        pango.pango_font_map_create_context(font_map),
+        gobject.g_object_unref)
+    for element, style, rectangle in inputs:
+        rectangle = (
+            *matrix.transform_point(*rectangle[:2]),
+            *matrix.transform_point(*rectangle[2:]))
+        font_description = get_font_description(style)
+        font = pango.pango_font_map_load_font(
+            font_map, context, font_description)
+        font = stream.add_font(font)
+
+        input_type = element.attrib.get('type')
+        if input_type == 'checkbox':
+            # Checkboxes
+            width = rectangle[2] - rectangle[0]
+            height = rectangle[1] - rectangle[3]
+            checked_stream = pydyf.Stream(extra={
+                'Resources': resources.reference,
+                'Type': '/XObject',
+                'Subtype': '/Form',
+                'BBox': pydyf.Array((0, 0, width, height)),
+            })
+            checked_stream.push_state()
+            checked_stream.begin_text()
+            checked_stream.set_color_rgb(*style['color'][:3])
+            checked_stream.set_font_size('ZaDi', style['font_size'])
+            x = (width - style['font_size']) / 1.3
+            y = (height - style['font_size']) / 1.3
+            checked_stream.stream.append(f'{x} {y} Td')
+            checked_stream.stream.append('(8) Tj')
+            checked_stream.end_text()
+            checked_stream.pop_state()
+            pdf.add_object(checked_stream)
+
+            unchecked_stream = pydyf.Stream()
+            unchecked_stream.push_state()
+            unchecked_stream.pop_state()
+            pdf.add_object(unchecked_stream)
+
+            checked = 'checked' in element.attrib
+            # field_stream = pydyf.Stream()
+            # field_stream.set_color_rgb(*style['color'][:3])
+            # field_stream.set_font_size('ZaDi', style['font_size'])
+            field = pydyf.Dictionary({
+                'Type': '/Annot',
+                'Subtype': '/Widget',
+                # 'F': 4,
+                'Rect': pydyf.Array(rectangle),
+                'FT': '/Btn',
+                'P': page.reference,
+                'T': pydyf.String(element.attrib.get('name', '')),
+                'V': '/Yes' if checked else '/Off',
+                # 'DV': '/Yes' if checked else '/Off',
+                'DR': resources.reference,
+                # 'DA': pydyf.String(b' '.join(field_stream.stream)),
+                # 'MK': pydyf.Dictionary({'CA': pydyf.String('8')}),
+                'AP': pydyf.Dictionary({'N': pydyf.Dictionary({
+                    'Yes': checked_stream.reference,
+                    'Off': unchecked_stream.reference,
+                })}),
+                'AS': '/Yes' if checked else '/Off',
+            })
+        else:
+            # Text, password, textarea, files, and unknown
+            field_stream = pydyf.Stream()
+            field_stream.set_color_rgb(*style['color'][:3])
+            field_stream.set_font_size(font.hash, style['font_size'])
+            value = (
+                element.attrib.get('value', '') if element.tag == 'input'
+                else element.text)
+            field = pydyf.Dictionary({
+                'FT': '/Tx',
+                'DA': pydyf.String(b' '.join(field_stream.stream)),
+                'Type': '/Annot',
+                'Subtype': '/Widget',
+                'Rect': pydyf.Array(rectangle),
+                'T': pydyf.String(element.attrib.get('name', 'unknown')),
+                'V': pydyf.String(value),
+                'P': page.reference,
+            })
+            if element.tag == 'textarea':
+                field['Ff'] = 2 ** (13 - 1)
+            elif input_type == 'password':
+                field['Ff'] = 2 ** (14 - 1)
+            elif input_type == 'file':
+                field['Ff'] = 2 ** (21 - 1)
+
+        pdf.add_object(field)
+        page['Annots'].append(field.reference)
+        pdf.catalog['AcroForm']['Fields'].append(field.reference)
+
+
+def add_annotations(links, matrix, document, pdf, page, annot_files):
+    """Include annotations in PDF."""
+    # TODO: splitting a link into multiple independent rectangular
+    # annotations works well for pure links, but rather mediocre for
+    # other annotations and fails completely for transformed (CSS) or
+    # complex link shapes (area). It would be better to use /AP for all
+    # links and coalesce link shapes that originate from the same HTML
+    # link. This would give a feeling similiar to what browsers do with
+    # links that span multiple lines.
+    for link_type, annot_target, rectangle, _ in links:
+        if link_type != 'attachment':
+            continue
+        if annot_target not in annot_files:
+            # A single link can be split in multiple regions. We don't want
+            # to embed a file multiple times of course, so keep a reference
+            # to every embedded URL and reuse the object number.
+            # TODO: Use the title attribute as description. The comment
+            # above about multiple regions won't always be correct, because
+            # two links might have the same href, but different titles.
+            annot_files[annot_target] = write_pdf_attachment(
+                pdf, (annot_target, None), document.url_fetcher)
+        annot_file = annot_files[annot_target]
+        if annot_file is None:
+            continue
+        rectangle = (
+            *matrix.transform_point(*rectangle[:2]),
+            *matrix.transform_point(*rectangle[2:]))
+        stream = pydyf.Stream([], {
+            'Type': '/XObject',
+            'Subtype': '/Form',
+            'BBox': pydyf.Array(rectangle),
+            'Length': 0,
+        })
+        pdf.add_object(stream)
+        annot = pydyf.Dictionary({
+            'Type': '/Annot',
+            'Rect': pydyf.Array(rectangle),
+            'Subtype': '/FileAttachment',
+            'T': pydyf.String(),
+            'FS': annot_file.reference,
+            'AP': pydyf.Dictionary({'N': stream.reference}),
+            'AS': '/N',
+        })
+        pdf.add_object(annot)
+        if 'Annots' not in page:
+            page['Annots'] = pydyf.Array()
+        page['Annots'].append(annot.reference)
+
+
+def write_pdf_attachment(pdf, attachment, url_fetcher):
+    """Write an attachment to the PDF stream."""
+    # Attachments from document links like <link> or <a> can only be URLs.
+    # They're passed in as tuples
+    url = ''
+    if isinstance(attachment, tuple):
+        url, description = attachment
+        attachment = Attachment(
+            url=url, url_fetcher=url_fetcher, description=description)
+    elif not isinstance(attachment, Attachment):
+        attachment = Attachment(guess=attachment, url_fetcher=url_fetcher)
+
+    try:
+        with attachment.source as (source_type, source, url, _):
+            if isinstance(source, bytes):
+                source = io.BytesIO(source)
+            uncompressed_length = 0
+            stream = b''
+            md5 = hashlib.md5()
+            compress = zlib.compressobj()
+            for data in iter(lambda: source.read(4096), b''):
+                uncompressed_length += len(data)
+                md5.update(data)
+                compressed = compress.compress(data)
+                stream += compressed
+            compressed = compress.flush(zlib.Z_FINISH)
+            stream += compressed
+            file_extra = pydyf.Dictionary({
+                'Type': '/EmbeddedFile',
+                'Filter': '/FlateDecode',
+                'Params': pydyf.Dictionary({
+                    'CheckSum': f'<{md5.hexdigest()}>',
+                    'Size': uncompressed_length,
+                })
+            })
+            file_stream = pydyf.Stream([stream], file_extra)
+            pdf.add_object(file_stream)
+
+    except URLFetchingError as exception:
+        LOGGER.error('Failed to load attachment: %s', exception)
+        return
+
+    # TODO: Use the result object from a URL fetch operation to provide more
+    # details on the possible filename.
+    if url and urlsplit(url).path:
+        filename = basename(unquote(urlsplit(url).path))
+    else:
+        filename = 'attachment.bin'
+
+    attachment = pydyf.Dictionary({
+        'Type': '/Filespec',
+        'F': pydyf.String(),
+        'UF': pydyf.String(filename),
+        'EF': pydyf.Dictionary({'F': file_stream.reference}),
+        'Desc': pydyf.String(attachment.description or ''),
+    })
+    pdf.add_object(attachment)
+    return attachment
+
+
+def resolve_links(pages):
+    """Resolve internal hyperlinks.
+
+    Links to a missing anchor are removed with a warning.
+
+    If multiple anchors have the same name, the first one is used.
+
+    :returns:
+        A generator yielding lists (one per page) like :attr:`Page.links`,
+        except that ``target`` for internal hyperlinks is
+        ``(page_number, x, y)`` instead of an anchor name.
+        The page number is a 0-based index into the :attr:`pages` list,
+        and ``x, y`` are in CSS pixels from the top-left of the page.
+
+    """
+    anchors = set()
+    paged_anchors = []
+    for i, page in enumerate(pages):
+        paged_anchors.append([])
+        for anchor_name, (point_x, point_y) in page.anchors.items():
+            if anchor_name not in anchors:
+                paged_anchors[-1].append((anchor_name, point_x, point_y))
+                anchors.add(anchor_name)
+    for page in pages:
+        page_links = []
+        for link in page.links:
+            link_type, anchor_name, _, _ = link
+            if link_type == 'internal':
+                if anchor_name not in anchors:
+                    LOGGER.error(
+                        'No anchor #%s for internal URI reference',
+                        anchor_name)
+                else:
+                    page_links.append(link)
+            else:
+                # External link
+                page_links.append(link)
+        yield page_links, paged_anchors.pop(0)
--- a/weasyprint/svg/utils.py
+++ b/weasyprint/svg/utils.py
@ -140,8 +140,7 @@ def color(string):

 def transform(transform_string, font_size, normalized_diagonal):
    """Get a matrix corresponding to the transform string."""
-    # TODO: merge with Page._gather_links_and_bookmarks and
-    # css.validation.properties.transform
+    # TODO: merge with gather_anchors and css.validation.properties.transform
    transformations = re.findall(
        r'(\w+) ?\( ?(.*?) ?\)', normalize(transform_string))
    matrix = Matrix()
--- a/weasyprint/text/fonts.py
+++ b/weasyprint/text/fonts.py
@ -13,8 +13,10 @@ from ..logger import LOGGER
 from ..urls import FILESYSTEM_ENCODING, fetch
 from .constants import (
    CAPS_KEYS, EAST_ASIAN_KEYS, FONTCONFIG_STRETCH, FONTCONFIG_STYLE,
-    FONTCONFIG_WEIGHT, LIGATURE_KEYS, NUMERIC_KEYS)
-from .ffi import ffi, fontconfig, gobject, pangoft2
+    FONTCONFIG_WEIGHT, LIGATURE_KEYS, NUMERIC_KEYS, PANGO_STRETCH, PANGO_STYLE)
+from .ffi import (
+    ffi, fontconfig, gobject, pango, pangoft2, unicode_to_char_p,
+    units_from_double)


 def _check_font_configuration(font_config):  # pragma: no cover
@ -326,3 +328,27 @@ def font_features(font_kerning='normal', font_variant_ligatures='normal',
        features.update(dict(font_feature_settings))

    return features
+
+
+def get_font_description(style, font_size=None):
+    font_description = ffi.gc(
+        pango.pango_font_description_new(),
+        pango.pango_font_description_free)
+    family_p, family = unicode_to_char_p(','.join(style['font_family']))
+    pango.pango_font_description_set_family(font_description, family_p)
+    pango.pango_font_description_set_style(
+        font_description, PANGO_STYLE[style['font_style']])
+    pango.pango_font_description_set_stretch(
+        font_description, PANGO_STRETCH[style['font_stretch']])
+    pango.pango_font_description_set_weight(
+        font_description, style['font_weight'])
+    if font_size is not None:
+        pango.pango_font_description_set_absolute_size(
+            font_description, units_from_double(font_size))
+    if style['font_variation_settings'] != 'normal':
+        string = ','.join(
+            f'{key}={value}' for key, value in
+            style['font_variation_settings']).encode()
+        pango.pango_font_description_set_variations(
+            font_description, string)
+    return font_description
--- a/weasyprint/text/line_break.py
+++ b/weasyprint/text/line_break.py
@ -5,11 +5,11 @@ from math import inf

 import pyphen

-from .constants import LST_TO_ISO, PANGO_STRETCH, PANGO_STYLE, PANGO_WRAP_MODE
+from .constants import LST_TO_ISO, PANGO_WRAP_MODE
 from .ffi import (
    ffi, gobject, pango, pangoft2, unicode_to_char_p, units_from_double,
    units_to_double)
-from .fonts import font_features
+from .fonts import font_features, get_font_description


 def line_size(line, style):
@ -78,9 +78,6 @@ class Layout:
            pango.pango_font_map_create_context(font_map),
            gobject.g_object_unref)
        pango.pango_context_set_round_glyph_positions(pango_context, False)
-        self.layout = ffi.gc(
-            pango.pango_layout_new(pango_context),
-            gobject.g_object_unref)

        if style['font_language_override'] != 'normal':
            lang_p, lang = unicode_to_char_p(LST_TO_ISO.get(
@ -97,31 +94,17 @@ class Layout:

        assert not isinstance(style['font_family'], str), (
            'font_family should be a list')
-        self.font = ffi.gc(
-            pango.pango_font_description_new(),
-            pango.pango_font_description_free)
-        family_p, family = unicode_to_char_p(','.join(style['font_family']))
-        pango.pango_font_description_set_family(self.font, family_p)
-        pango.pango_font_description_set_style(
-            self.font, PANGO_STYLE[style['font_style']])
-        pango.pango_font_description_set_stretch(
-            self.font, PANGO_STRETCH[style['font_stretch']])
-        pango.pango_font_description_set_weight(
-            self.font, style['font_weight'])
-        pango.pango_font_description_set_absolute_size(
-            self.font, units_from_double(font_size))
-        if style['font_variation_settings'] != 'normal':
-            string = ','.join(
-                f'{key}={value}' for key, value in
-                style['font_variation_settings']).encode()
-            pango.pango_font_description_set_variations(self.font, string)
-        pango.pango_layout_set_font_description(self.layout, self.font)
+        font_description = get_font_description(style, font_size)
+        self.layout = ffi.gc(
+            pango.pango_layout_new(pango_context),
+            gobject.g_object_unref)
+        pango.pango_layout_set_font_description(self.layout, font_description)

        text_decoration = style['text_decoration_line']
        if text_decoration != 'none':
            metrics = ffi.gc(
                pango.pango_context_get_metrics(
-                    pango_context, self.font, self.language),
+                    pango_context, font_description, self.language),
                pango.pango_font_metrics_unref)
            self.ascent = units_to_double(
                pango.pango_font_metrics_get_ascent(metrics))
@ -236,7 +219,7 @@ class Layout:
        pango.pango_layout_set_tabs(self.layout, array)

    def deactivate(self):
-        del self.layout, self.font, self.language, self.style
+        del self.layout, self.language, self.style

    def reactivate(self, style):
        self.setup(self.context, style['font_size'], style)