mirror of
https://github.com/Kozea/WeasyPrint.git
synced 2024-09-11 20:47:56 +03:00
Reorganize anchors management
This commit is contained in:
parent
168ed3b9a3
commit
0b1617edc8
@ -13,7 +13,7 @@ import py
|
||||
import pytest
|
||||
from PIL import Image
|
||||
from weasyprint import CSS, HTML, __main__, default_url_fetcher
|
||||
from weasyprint.links import resolve_links
|
||||
from weasyprint.pdf.anchors import resolve_links
|
||||
from weasyprint.urls import path2url
|
||||
|
||||
from .draw import parse_pixels
|
||||
|
@ -1,53 +1,12 @@
|
||||
"""PDF links and bookmarks management."""
|
||||
"""Find anchors, links, bookmarks and inputs in documents."""
|
||||
|
||||
import math
|
||||
|
||||
from .formatting_structure import boxes
|
||||
from .layout.percent import percentage
|
||||
from .logger import LOGGER
|
||||
from .matrix import Matrix
|
||||
|
||||
|
||||
def resolve_links(pages):
|
||||
"""Resolve internal hyperlinks.
|
||||
|
||||
Links to a missing anchor are removed with a warning.
|
||||
|
||||
If multiple anchors have the same name, the first one is used.
|
||||
|
||||
:returns:
|
||||
A generator yielding lists (one per page) like :attr:`Page.links`,
|
||||
except that ``target`` for internal hyperlinks is
|
||||
``(page_number, x, y)`` instead of an anchor name.
|
||||
The page number is a 0-based index into the :attr:`pages` list,
|
||||
and ``x, y`` are in CSS pixels from the top-left of the page.
|
||||
|
||||
"""
|
||||
anchors = set()
|
||||
paged_anchors = []
|
||||
for i, page in enumerate(pages):
|
||||
paged_anchors.append([])
|
||||
for anchor_name, (point_x, point_y) in page.anchors.items():
|
||||
if anchor_name not in anchors:
|
||||
paged_anchors[-1].append((anchor_name, point_x, point_y))
|
||||
anchors.add(anchor_name)
|
||||
for page in pages:
|
||||
page_links = []
|
||||
for link in page.links:
|
||||
link_type, anchor_name, _, _ = link
|
||||
if link_type == 'internal':
|
||||
if anchor_name not in anchors:
|
||||
LOGGER.error(
|
||||
'No anchor #%s for internal URI reference',
|
||||
anchor_name)
|
||||
else:
|
||||
page_links.append(link)
|
||||
else:
|
||||
# External link
|
||||
page_links.append(link)
|
||||
yield page_links, paged_anchors.pop(0)
|
||||
|
||||
|
||||
def rectangle_aabb(matrix, pos_x, pos_y, width, height):
|
||||
"""Apply a transformation matrix to an axis-aligned rectangle.
|
||||
|
||||
@ -68,8 +27,12 @@ def rectangle_aabb(matrix, pos_x, pos_y, width, height):
|
||||
return box_x1, box_y1, box_x2, box_y2
|
||||
|
||||
|
||||
def gather_links_and_bookmarks(box, anchors, links, bookmarks, inputs,
|
||||
parent_matrix=None):
|
||||
def gather_anchors(box, anchors, links, bookmarks, inputs, parent_matrix=None):
|
||||
"""Gather anchors and other data related to specific positions in PDF.
|
||||
|
||||
Currently finds anchors, links, bookmarks and inputs.
|
||||
|
||||
"""
|
||||
# Get box transformation matrix.
|
||||
# "Transforms apply to block-level and atomic inline-level elements,
|
||||
# but do not apply to elements which may be split into
|
||||
@ -149,8 +112,7 @@ def gather_links_and_bookmarks(box, anchors, links, bookmarks, inputs,
|
||||
anchors[anchor_name] = pos_x, pos_y
|
||||
|
||||
for child in box.all_children():
|
||||
gather_links_and_bookmarks(
|
||||
child, anchors, links, bookmarks, inputs, matrix)
|
||||
gather_anchors(child, anchors, links, bookmarks, inputs, matrix)
|
||||
|
||||
|
||||
def make_page_bookmark_tree(page, skipped_levels, last_by_depth,
|
@ -5,6 +5,7 @@ import io
|
||||
import shutil
|
||||
|
||||
from . import CSS
|
||||
from .anchors import gather_anchors, make_page_bookmark_tree
|
||||
from .css import get_all_computed_styles
|
||||
from .css.counters import CounterStyle
|
||||
from .css.targets import TargetCollector
|
||||
@ -13,7 +14,6 @@ from .formatting_structure.build import build_formatting_structure
|
||||
from .html import get_html_metadata
|
||||
from .images import get_image_from_uri as original_get_image_from_uri
|
||||
from .layout import LayoutContext, layout_document
|
||||
from .links import gather_links_and_bookmarks, make_page_bookmark_tree
|
||||
from .logger import PROGRESS_LOGGER
|
||||
from .matrix import Matrix
|
||||
from .pdf import generate_pdf
|
||||
@ -72,7 +72,7 @@ class Page:
|
||||
#: :ojb:`dict` of HTML tag attributes and values.
|
||||
self.inputs = []
|
||||
|
||||
gather_links_and_bookmarks(
|
||||
gather_anchors(
|
||||
page_box, self.anchors, self.links, self.bookmarks, self.inputs)
|
||||
self._page_box = page_box
|
||||
|
||||
@ -247,7 +247,7 @@ class Document:
|
||||
# Keep a reference to font_config to avoid its garbage collection until
|
||||
# rendering is destroyed. This is needed as font_config.__del__ removes
|
||||
# fonts that may be used when rendering
|
||||
self._font_config = font_config
|
||||
self.font_config = font_config
|
||||
# Set of flags for PDF size optimization. Can contain "images" and
|
||||
# "fonts".
|
||||
self._optimize_size = optimize_size
|
||||
@ -290,7 +290,7 @@ class Document:
|
||||
elif not isinstance(pages, list):
|
||||
pages = list(pages)
|
||||
return type(self)(
|
||||
pages, self.metadata, self.url_fetcher, self._font_config,
|
||||
pages, self.metadata, self.url_fetcher, self.font_config,
|
||||
self._optimize_size)
|
||||
|
||||
def make_bookmark_tree(self):
|
||||
|
@ -1,20 +1,15 @@
|
||||
"""PDF generation management."""
|
||||
|
||||
import hashlib
|
||||
import io
|
||||
import zlib
|
||||
from os.path import basename
|
||||
from urllib.parse import unquote, urlsplit
|
||||
|
||||
import pydyf
|
||||
|
||||
from .. import Attachment, __version__
|
||||
from .. import VERSION
|
||||
from ..html import W3C_DATE_RE
|
||||
from ..links import make_page_bookmark_tree, resolve_links
|
||||
from ..logger import LOGGER, PROGRESS_LOGGER
|
||||
from ..matrix import Matrix
|
||||
from ..urls import URLFetchingError
|
||||
from . import pdfa, pdfua
|
||||
from .anchors import (
|
||||
add_annotations, add_inputs, add_links, add_outlines, resolve_links,
|
||||
write_pdf_attachment)
|
||||
from .fonts import build_fonts_dictionary
|
||||
from .stream import Stream
|
||||
|
||||
@ -53,71 +48,6 @@ def _w3c_date_to_pdf(string, attr_name):
|
||||
return pdf_date
|
||||
|
||||
|
||||
def _write_pdf_attachment(pdf, attachment, url_fetcher):
|
||||
"""Write an attachment to the PDF stream.
|
||||
|
||||
:return:
|
||||
the attachment PDF dictionary.
|
||||
|
||||
"""
|
||||
# Attachments from document links like <link> or <a> can only be URLs.
|
||||
# They're passed in as tuples
|
||||
url = ''
|
||||
if isinstance(attachment, tuple):
|
||||
url, description = attachment
|
||||
attachment = Attachment(
|
||||
url=url, url_fetcher=url_fetcher, description=description)
|
||||
elif not isinstance(attachment, Attachment):
|
||||
attachment = Attachment(guess=attachment, url_fetcher=url_fetcher)
|
||||
|
||||
try:
|
||||
with attachment.source as (source_type, source, url, _):
|
||||
if isinstance(source, bytes):
|
||||
source = io.BytesIO(source)
|
||||
uncompressed_length = 0
|
||||
stream = b''
|
||||
md5 = hashlib.md5()
|
||||
compress = zlib.compressobj()
|
||||
for data in iter(lambda: source.read(4096), b''):
|
||||
uncompressed_length += len(data)
|
||||
md5.update(data)
|
||||
compressed = compress.compress(data)
|
||||
stream += compressed
|
||||
compressed = compress.flush(zlib.Z_FINISH)
|
||||
stream += compressed
|
||||
file_extra = pydyf.Dictionary({
|
||||
'Type': '/EmbeddedFile',
|
||||
'Filter': '/FlateDecode',
|
||||
'Params': pydyf.Dictionary({
|
||||
'CheckSum': f'<{md5.hexdigest()}>',
|
||||
'Size': uncompressed_length,
|
||||
})
|
||||
})
|
||||
file_stream = pydyf.Stream([stream], file_extra)
|
||||
pdf.add_object(file_stream)
|
||||
|
||||
except URLFetchingError as exception:
|
||||
LOGGER.error('Failed to load attachment: %s', exception)
|
||||
return
|
||||
|
||||
# TODO: Use the result object from a URL fetch operation to provide more
|
||||
# details on the possible filename.
|
||||
if url and urlsplit(url).path:
|
||||
filename = basename(unquote(urlsplit(url).path))
|
||||
else:
|
||||
filename = 'attachment.bin'
|
||||
|
||||
attachment = pydyf.Dictionary({
|
||||
'Type': '/Filespec',
|
||||
'F': pydyf.String(),
|
||||
'UF': pydyf.String(filename),
|
||||
'EF': pydyf.Dictionary({'F': file_stream.reference}),
|
||||
'Desc': pydyf.String(attachment.description or ''),
|
||||
})
|
||||
pdf.add_object(attachment)
|
||||
return attachment
|
||||
|
||||
|
||||
def _reference_resources(pdf, resources, images, fonts):
|
||||
if 'Font' in resources:
|
||||
assert resources['Font'] is None
|
||||
@ -170,67 +100,6 @@ def _use_references(pdf, resources, images):
|
||||
alpha['SMask']['G'] = alpha['SMask']['G'].reference
|
||||
|
||||
|
||||
def _add_links(links, anchors, matrix, pdf, page, names, mark):
|
||||
"""Include hyperlinks in given PDF page."""
|
||||
for link_type, link_target, rectangle, box in links:
|
||||
x1, y1 = matrix.transform_point(*rectangle[:2])
|
||||
x2, y2 = matrix.transform_point(*rectangle[2:])
|
||||
if link_type in ('internal', 'external'):
|
||||
box.link_annotation = pydyf.Dictionary({
|
||||
'Type': '/Annot',
|
||||
'Subtype': '/Link',
|
||||
'Rect': pydyf.Array([x1, y1, x2, y2]),
|
||||
'BS': pydyf.Dictionary({'W': 0}),
|
||||
})
|
||||
if mark:
|
||||
box.link_annotation['Contents'] = pydyf.String(link_target)
|
||||
if link_type == 'internal':
|
||||
box.link_annotation['Dest'] = pydyf.String(link_target)
|
||||
else:
|
||||
box.link_annotation['A'] = pydyf.Dictionary({
|
||||
'Type': '/Action',
|
||||
'S': '/URI',
|
||||
'URI': pydyf.String(link_target),
|
||||
})
|
||||
pdf.add_object(box.link_annotation)
|
||||
if 'Annots' not in page:
|
||||
page['Annots'] = pydyf.Array()
|
||||
page['Annots'].append(box.link_annotation.reference)
|
||||
|
||||
for anchor in anchors:
|
||||
anchor_name, x, y = anchor
|
||||
x, y = matrix.transform_point(x, y)
|
||||
names.append([
|
||||
anchor_name, pydyf.Array([page.reference, '/XYZ', x, y, 0])])
|
||||
|
||||
|
||||
def _create_bookmarks(bookmarks, pdf, parent=None):
|
||||
count = len(bookmarks)
|
||||
outlines = []
|
||||
for title, (page, x, y), children, state in bookmarks:
|
||||
destination = pydyf.Array((pdf.page_references[page], '/XYZ', x, y, 0))
|
||||
outline = pydyf.Dictionary({
|
||||
'Title': pydyf.String(title), 'Dest': destination})
|
||||
pdf.add_object(outline)
|
||||
children_outlines, children_count = _create_bookmarks(
|
||||
children, pdf, parent=outline)
|
||||
outline['Count'] = children_count
|
||||
if state == 'closed':
|
||||
outline['Count'] *= -1
|
||||
else:
|
||||
count += children_count
|
||||
if outlines:
|
||||
outline['Prev'] = outlines[-1].reference
|
||||
outlines[-1]['Next'] = outline.reference
|
||||
if children_outlines:
|
||||
outline['First'] = children_outlines[0].reference
|
||||
outline['Last'] = children_outlines[-1].reference
|
||||
if parent is not None:
|
||||
outline['Parent'] = parent.reference
|
||||
outlines.append(outline)
|
||||
return outlines, count
|
||||
|
||||
|
||||
def generate_pdf(document, target, zoom, attachments, optimize_size,
|
||||
identifier, variant, version, custom_metadata):
|
||||
# 0.75 = 72 PDF point per inch / 96 CSS pixel per inch
|
||||
@ -264,43 +133,14 @@ def generate_pdf(document, target, zoom, attachments, optimize_size,
|
||||
|
||||
# Links and anchors
|
||||
page_links_and_anchors = list(resolve_links(document.pages))
|
||||
attachment_links = [
|
||||
[link for link in page_links if link[0] == 'attachment']
|
||||
for page_links, page_anchors in page_links_and_anchors]
|
||||
|
||||
# Annotations
|
||||
annot_files = {}
|
||||
# A single link can be split in multiple regions. We don't want to embed a
|
||||
# file multiple times of course, so keep a reference to every embedded URL
|
||||
# and reuse the object number.
|
||||
for page_links in attachment_links:
|
||||
for link_type, annot_target, rectangle, _ in page_links:
|
||||
if link_type == 'attachment' and target not in annot_files:
|
||||
# TODO: Use the title attribute as description. The comment
|
||||
# above about multiple regions won't always be correct, because
|
||||
# two links might have the same href, but different titles.
|
||||
annot_files[annot_target] = _write_pdf_attachment(
|
||||
pdf, (annot_target, None), document.url_fetcher)
|
||||
|
||||
# Bookmarks
|
||||
root = []
|
||||
# At one point in the document, for each "output" depth, how much to add to
|
||||
# get the source level (CSS values of bookmark-level).
|
||||
# E.g. with <h1> then <h3>, level_shifts == [0, 1]
|
||||
# 1 means that <h3> has depth 3 - 1 = 2 in the output.
|
||||
skipped_levels = []
|
||||
last_by_depth = [root]
|
||||
previous_level = 0
|
||||
page_streams = []
|
||||
|
||||
for page_number, (page, links_and_anchors, page_links) in enumerate(
|
||||
zip(document.pages, page_links_and_anchors, attachment_links)):
|
||||
pdf_pages, page_streams = [], []
|
||||
for page_number, (page, links_and_anchors) in enumerate(
|
||||
zip(document.pages, page_links_and_anchors)):
|
||||
# Draw from the top-left corner
|
||||
matrix = Matrix(scale, 0, 0, -scale, 0, page.height * scale)
|
||||
|
||||
# Links and anchors
|
||||
links, anchors = links_and_anchors
|
||||
|
||||
page_width = scale * (
|
||||
page.width + page.bleed['left'] + page.bleed['right'])
|
||||
page_height = scale * (
|
||||
@ -331,8 +171,14 @@ def generate_pdf(document, target, zoom, attachments, optimize_size,
|
||||
pdf_page['Tabs'] = '/S'
|
||||
pdf_page['StructParents'] = page_number
|
||||
pdf.add_page(pdf_page)
|
||||
pdf_pages.append(pdf_page)
|
||||
|
||||
_add_links(links, anchors, matrix, pdf, pdf_page, pdf_names, mark)
|
||||
add_links(links_and_anchors, matrix, pdf, pdf_page, pdf_names, mark)
|
||||
add_inputs(
|
||||
page.inputs, matrix, pdf, pdf_page, resources, stream,
|
||||
document.font_config.font_map)
|
||||
add_annotations(
|
||||
links_and_anchors[0], matrix, document, pdf, pdf_page, annot_files)
|
||||
page.paint(stream, scale=scale)
|
||||
|
||||
# Bleed
|
||||
@ -355,175 +201,13 @@ def generate_pdf(document, target, zoom, attachments, optimize_size,
|
||||
pdf_page['BleedBox'] = pydyf.Array([
|
||||
bleed_left, bleed_top, bleed_right, bleed_bottom])
|
||||
|
||||
# Inputs
|
||||
if page.inputs:
|
||||
if 'Annots' not in pdf_page:
|
||||
pdf_page['Annots'] = pydyf.Array()
|
||||
if 'AcroForm' not in pdf.catalog:
|
||||
pdf.catalog['AcroForm'] = pydyf.Dictionary({
|
||||
'Fields': pydyf.Array(),
|
||||
'DR': resources.reference,
|
||||
})
|
||||
for element, style, rectangle in page.inputs:
|
||||
rectangle = (
|
||||
*matrix.transform_point(*rectangle[:2]),
|
||||
*matrix.transform_point(*rectangle[2:]))
|
||||
font_map = document._font_config.font_map
|
||||
context = ffi.gc(
|
||||
pango.pango_font_map_create_context(font_map),
|
||||
gobject.g_object_unref)
|
||||
font_description = ffi.gc(
|
||||
pango.pango_font_description_new(),
|
||||
pango.pango_font_description_free)
|
||||
family_p, _ = unicode_to_char_p(','.join(style['font_family']))
|
||||
pango.pango_font_description_set_family(font_description, family_p)
|
||||
pango.pango_font_description_set_style(
|
||||
font_description, PANGO_STYLE[style['font_style']])
|
||||
pango.pango_font_description_set_stretch(
|
||||
font_description, PANGO_STRETCH[style['font_stretch']])
|
||||
pango.pango_font_description_set_weight(
|
||||
font_description, style['font_weight'])
|
||||
font = pango.pango_font_map_load_font(
|
||||
font_map, context, font_description)
|
||||
font = stream.add_font(font)
|
||||
|
||||
input_type = element.attrib.get('type')
|
||||
if input_type == 'checkbox':
|
||||
# Checkboxes
|
||||
width = rectangle[2] - rectangle[0]
|
||||
height = rectangle[1] - rectangle[3]
|
||||
checked_stream = pydyf.Stream(extra={
|
||||
'Resources': resources.reference,
|
||||
'Type': '/XObject',
|
||||
'Subtype': '/Form',
|
||||
'BBox': pydyf.Array((0, 0, width, height)),
|
||||
})
|
||||
checked_stream.push_state()
|
||||
checked_stream.begin_text()
|
||||
checked_stream.set_color_rgb(*style['color'][:3])
|
||||
checked_stream.set_font_size('ZaDi', style['font_size'])
|
||||
x = (width - style['font_size']) / 1.3
|
||||
y = (height - style['font_size']) / 1.3
|
||||
checked_stream.stream.append(f'{x} {y} Td')
|
||||
checked_stream.stream.append('(8) Tj')
|
||||
checked_stream.end_text()
|
||||
checked_stream.pop_state()
|
||||
pdf.add_object(checked_stream)
|
||||
|
||||
unchecked_stream = pydyf.Stream()
|
||||
unchecked_stream.push_state()
|
||||
unchecked_stream.pop_state()
|
||||
pdf.add_object(unchecked_stream)
|
||||
|
||||
checked = 'checked' in element.attrib
|
||||
# field_stream = pydyf.Stream()
|
||||
# field_stream.set_color_rgb(*style['color'][:3])
|
||||
# field_stream.set_font_size('ZaDi', style['font_size'])
|
||||
field = pydyf.Dictionary({
|
||||
'Type': '/Annot',
|
||||
'Subtype': '/Widget',
|
||||
# 'F': 4,
|
||||
'Rect': pydyf.Array(rectangle),
|
||||
'FT': '/Btn',
|
||||
'P': pdf_page.reference,
|
||||
'T': pydyf.String(element.attrib.get('name', '')),
|
||||
'V': '/Yes' if checked else '/Off',
|
||||
# 'DV': '/Yes' if checked else '/Off',
|
||||
'DR': resources.reference,
|
||||
# 'DA': pydyf.String(b' '.join(field_stream.stream)),
|
||||
# 'MK': pydyf.Dictionary({'CA': pydyf.String('8')}),
|
||||
'AP': pydyf.Dictionary({'N': pydyf.Dictionary({
|
||||
'Yes': checked_stream.reference,
|
||||
'Off': unchecked_stream.reference,
|
||||
})}),
|
||||
'AS': '/Yes' if checked else '/Off',
|
||||
})
|
||||
else:
|
||||
# Text, password, textarea, files, and unknown
|
||||
field_stream = pydyf.Stream()
|
||||
field_stream.set_color_rgb(*style['color'][:3])
|
||||
field_stream.set_font_size(font.hash, style['font_size'])
|
||||
value = (
|
||||
element.attrib.get('value', '') if element.tag == 'input'
|
||||
else element.text)
|
||||
field = pydyf.Dictionary({
|
||||
'FT': '/Tx',
|
||||
'DA': pydyf.String(b' '.join(field_stream.stream)),
|
||||
'Type': '/Annot',
|
||||
'Subtype': '/Widget',
|
||||
'Rect': pydyf.Array(rectangle),
|
||||
'T': pydyf.String(element.attrib.get('name', 'unknown')),
|
||||
'V': pydyf.String(value),
|
||||
'P': pdf_page.reference,
|
||||
})
|
||||
if element.tag == 'textarea':
|
||||
field['Ff'] = 2 ** (13 - 1)
|
||||
elif input_type == 'password':
|
||||
field['Ff'] = 2 ** (14 - 1)
|
||||
elif input_type == 'file':
|
||||
field['Ff'] = 2 ** (21 - 1)
|
||||
|
||||
pdf.add_object(field)
|
||||
pdf_page['Annots'].append(field.reference)
|
||||
pdf.catalog['AcroForm']['Fields'].append(field.reference)
|
||||
|
||||
# Annotations
|
||||
# TODO: splitting a link into multiple independent rectangular
|
||||
# annotations works well for pure links, but rather mediocre for
|
||||
# other annotations and fails completely for transformed (CSS) or
|
||||
# complex link shapes (area). It would be better to use /AP for all
|
||||
# links and coalesce link shapes that originate from the same HTML
|
||||
# link. This would give a feeling similiar to what browsers do with
|
||||
# links that span multiple lines.
|
||||
for link_type, annot_target, rectangle, _ in page_links:
|
||||
annot_file = annot_files[annot_target]
|
||||
if link_type == 'attachment' and annot_file is not None:
|
||||
rectangle = (
|
||||
*matrix.transform_point(*rectangle[:2]),
|
||||
*matrix.transform_point(*rectangle[2:]))
|
||||
stream = pydyf.Stream([], {
|
||||
'Type': '/XObject',
|
||||
'Subtype': '/Form',
|
||||
'BBox': pydyf.Array(rectangle),
|
||||
'Length': 0,
|
||||
})
|
||||
pdf.add_object(stream)
|
||||
annot = pydyf.Dictionary({
|
||||
'Type': '/Annot',
|
||||
'Rect': pydyf.Array(rectangle),
|
||||
'Subtype': '/FileAttachment',
|
||||
'T': pydyf.String(),
|
||||
'FS': annot_file.reference,
|
||||
'AP': pydyf.Dictionary({'N': stream.reference}),
|
||||
'AS': '/N',
|
||||
})
|
||||
pdf.add_object(annot)
|
||||
if 'Annots' not in pdf_page:
|
||||
pdf_page['Annots'] = pydyf.Array()
|
||||
pdf_page['Annots'].append(annot.reference)
|
||||
|
||||
# Bookmarks
|
||||
previous_level = make_page_bookmark_tree(
|
||||
page, skipped_levels, last_by_depth, previous_level, page_number,
|
||||
matrix)
|
||||
|
||||
# Outlines
|
||||
outlines, count = _create_bookmarks(root, pdf)
|
||||
if outlines:
|
||||
outlines_dictionary = pydyf.Dictionary({
|
||||
'Count': count,
|
||||
'First': outlines[0].reference,
|
||||
'Last': outlines[-1].reference,
|
||||
})
|
||||
pdf.add_object(outlines_dictionary)
|
||||
for outline in outlines:
|
||||
outline['Parent'] = outlines_dictionary.reference
|
||||
pdf.catalog['Outlines'] = outlines_dictionary.reference
|
||||
add_outlines(pdf, document.make_bookmark_tree())
|
||||
|
||||
PROGRESS_LOGGER.info('Step 7 - Adding PDF metadata')
|
||||
|
||||
# PDF information
|
||||
pdf.info['Producer'] = pydyf.String(f'WeasyPrint {__version__}')
|
||||
pdf.info['Producer'] = pydyf.String(f'WeasyPrint {VERSION}')
|
||||
metadata = document.metadata
|
||||
if metadata.title:
|
||||
pdf.info['Title'] = pydyf.String(metadata.title)
|
||||
@ -554,7 +238,7 @@ def generate_pdf(document, target, zoom, attachments, optimize_size,
|
||||
attachments = metadata.attachments + (attachments or [])
|
||||
pdf_attachments = []
|
||||
for attachment in attachments:
|
||||
pdf_attachment = _write_pdf_attachment(
|
||||
pdf_attachment = write_pdf_attachment(
|
||||
pdf, attachment, document.url_fetcher)
|
||||
if pdf_attachment is not None:
|
||||
pdf_attachments.append(pdf_attachment)
|
||||
@ -591,10 +275,9 @@ def generate_pdf(document, target, zoom, attachments, optimize_size,
|
||||
name_array.append(pydyf.String(anchor[0]))
|
||||
name_array.append(anchor[1])
|
||||
dests = pydyf.Dictionary({'Names': name_array})
|
||||
if 'Names' in pdf.catalog:
|
||||
pdf.catalog['Names']['Dests'] = dests
|
||||
else:
|
||||
pdf.catalog['Names'] = pydyf.Dictionary({'Dests': dests})
|
||||
if 'Names' not in pdf.catalog:
|
||||
pdf.catalog['Names'] = pydyf.Dictionary()
|
||||
pdf.catalog['Names']['Dests'] = dests
|
||||
|
||||
# Apply PDF variants functions
|
||||
if variant:
|
||||
|
345
weasyprint/pdf/anchors.py
Normal file
345
weasyprint/pdf/anchors.py
Normal file
@ -0,0 +1,345 @@
|
||||
"""Insert anchors, links, bookmarks and inputs in PDFs."""
|
||||
|
||||
import hashlib
|
||||
import io
|
||||
import zlib
|
||||
from os.path import basename
|
||||
from urllib.parse import unquote, urlsplit
|
||||
|
||||
import pydyf
|
||||
|
||||
from .. import Attachment
|
||||
from ..logger import LOGGER
|
||||
from ..text.ffi import ffi, gobject, pango
|
||||
from ..text.fonts import get_font_description
|
||||
from ..urls import URLFetchingError
|
||||
|
||||
|
||||
def add_links(links_and_anchors, matrix, pdf, page, names, mark):
|
||||
"""Include hyperlinks in given PDF page."""
|
||||
links, anchors = links_and_anchors
|
||||
|
||||
for link_type, link_target, rectangle, box in links:
|
||||
x1, y1 = matrix.transform_point(*rectangle[:2])
|
||||
x2, y2 = matrix.transform_point(*rectangle[2:])
|
||||
if link_type in ('internal', 'external'):
|
||||
box.link_annotation = pydyf.Dictionary({
|
||||
'Type': '/Annot',
|
||||
'Subtype': '/Link',
|
||||
'Rect': pydyf.Array([x1, y1, x2, y2]),
|
||||
'BS': pydyf.Dictionary({'W': 0}),
|
||||
})
|
||||
if mark:
|
||||
box.link_annotation['Contents'] = pydyf.String(link_target)
|
||||
if link_type == 'internal':
|
||||
box.link_annotation['Dest'] = pydyf.String(link_target)
|
||||
else:
|
||||
box.link_annotation['A'] = pydyf.Dictionary({
|
||||
'Type': '/Action',
|
||||
'S': '/URI',
|
||||
'URI': pydyf.String(link_target),
|
||||
})
|
||||
pdf.add_object(box.link_annotation)
|
||||
if 'Annots' not in page:
|
||||
page['Annots'] = pydyf.Array()
|
||||
page['Annots'].append(box.link_annotation.reference)
|
||||
|
||||
for anchor in anchors:
|
||||
anchor_name, x, y = anchor
|
||||
x, y = matrix.transform_point(x, y)
|
||||
names.append([
|
||||
anchor_name, pydyf.Array([page.reference, '/XYZ', x, y, 0])])
|
||||
|
||||
|
||||
def add_outlines(pdf, bookmarks, parent=None):
|
||||
"""Include bookmark outlines in PDF."""
|
||||
count = len(bookmarks)
|
||||
outlines = []
|
||||
for title, (page, x, y), children, state in bookmarks:
|
||||
destination = pydyf.Array((pdf.page_references[page], '/XYZ', x, y, 0))
|
||||
outline = pydyf.Dictionary({
|
||||
'Title': pydyf.String(title), 'Dest': destination})
|
||||
pdf.add_object(outline)
|
||||
children_outlines, children_count = add_outlines(
|
||||
pdf, children, parent=outline)
|
||||
outline['Count'] = children_count
|
||||
if state == 'closed':
|
||||
outline['Count'] *= -1
|
||||
else:
|
||||
count += children_count
|
||||
if outlines:
|
||||
outline['Prev'] = outlines[-1].reference
|
||||
outlines[-1]['Next'] = outline.reference
|
||||
if children_outlines:
|
||||
outline['First'] = children_outlines[0].reference
|
||||
outline['Last'] = children_outlines[-1].reference
|
||||
if parent is not None:
|
||||
outline['Parent'] = parent.reference
|
||||
outlines.append(outline)
|
||||
|
||||
if parent is None and outlines:
|
||||
outlines_dictionary = pydyf.Dictionary({
|
||||
'Count': count,
|
||||
'First': outlines[0].reference,
|
||||
'Last': outlines[-1].reference,
|
||||
})
|
||||
pdf.add_object(outlines_dictionary)
|
||||
for outline in outlines:
|
||||
outline['Parent'] = outlines_dictionary.reference
|
||||
pdf.catalog['Outlines'] = outlines_dictionary.reference
|
||||
|
||||
return outlines, count
|
||||
|
||||
|
||||
def add_inputs(inputs, matrix, pdf, page, resources, stream, font_map):
|
||||
"""Include form inputs in PDF."""
|
||||
if not inputs:
|
||||
return
|
||||
|
||||
if 'Annots' not in page:
|
||||
page['Annots'] = pydyf.Array()
|
||||
if 'AcroForm' not in pdf.catalog:
|
||||
pdf.catalog['AcroForm'] = pydyf.Dictionary({
|
||||
'Fields': pydyf.Array(),
|
||||
'DR': resources.reference,
|
||||
})
|
||||
context = ffi.gc(
|
||||
pango.pango_font_map_create_context(font_map),
|
||||
gobject.g_object_unref)
|
||||
for element, style, rectangle in inputs:
|
||||
rectangle = (
|
||||
*matrix.transform_point(*rectangle[:2]),
|
||||
*matrix.transform_point(*rectangle[2:]))
|
||||
font_description = get_font_description(style)
|
||||
font = pango.pango_font_map_load_font(
|
||||
font_map, context, font_description)
|
||||
font = stream.add_font(font)
|
||||
|
||||
input_type = element.attrib.get('type')
|
||||
if input_type == 'checkbox':
|
||||
# Checkboxes
|
||||
width = rectangle[2] - rectangle[0]
|
||||
height = rectangle[1] - rectangle[3]
|
||||
checked_stream = pydyf.Stream(extra={
|
||||
'Resources': resources.reference,
|
||||
'Type': '/XObject',
|
||||
'Subtype': '/Form',
|
||||
'BBox': pydyf.Array((0, 0, width, height)),
|
||||
})
|
||||
checked_stream.push_state()
|
||||
checked_stream.begin_text()
|
||||
checked_stream.set_color_rgb(*style['color'][:3])
|
||||
checked_stream.set_font_size('ZaDi', style['font_size'])
|
||||
x = (width - style['font_size']) / 1.3
|
||||
y = (height - style['font_size']) / 1.3
|
||||
checked_stream.stream.append(f'{x} {y} Td')
|
||||
checked_stream.stream.append('(8) Tj')
|
||||
checked_stream.end_text()
|
||||
checked_stream.pop_state()
|
||||
pdf.add_object(checked_stream)
|
||||
|
||||
unchecked_stream = pydyf.Stream()
|
||||
unchecked_stream.push_state()
|
||||
unchecked_stream.pop_state()
|
||||
pdf.add_object(unchecked_stream)
|
||||
|
||||
checked = 'checked' in element.attrib
|
||||
# field_stream = pydyf.Stream()
|
||||
# field_stream.set_color_rgb(*style['color'][:3])
|
||||
# field_stream.set_font_size('ZaDi', style['font_size'])
|
||||
field = pydyf.Dictionary({
|
||||
'Type': '/Annot',
|
||||
'Subtype': '/Widget',
|
||||
# 'F': 4,
|
||||
'Rect': pydyf.Array(rectangle),
|
||||
'FT': '/Btn',
|
||||
'P': page.reference,
|
||||
'T': pydyf.String(element.attrib.get('name', '')),
|
||||
'V': '/Yes' if checked else '/Off',
|
||||
# 'DV': '/Yes' if checked else '/Off',
|
||||
'DR': resources.reference,
|
||||
# 'DA': pydyf.String(b' '.join(field_stream.stream)),
|
||||
# 'MK': pydyf.Dictionary({'CA': pydyf.String('8')}),
|
||||
'AP': pydyf.Dictionary({'N': pydyf.Dictionary({
|
||||
'Yes': checked_stream.reference,
|
||||
'Off': unchecked_stream.reference,
|
||||
})}),
|
||||
'AS': '/Yes' if checked else '/Off',
|
||||
})
|
||||
else:
|
||||
# Text, password, textarea, files, and unknown
|
||||
field_stream = pydyf.Stream()
|
||||
field_stream.set_color_rgb(*style['color'][:3])
|
||||
field_stream.set_font_size(font.hash, style['font_size'])
|
||||
value = (
|
||||
element.attrib.get('value', '') if element.tag == 'input'
|
||||
else element.text)
|
||||
field = pydyf.Dictionary({
|
||||
'FT': '/Tx',
|
||||
'DA': pydyf.String(b' '.join(field_stream.stream)),
|
||||
'Type': '/Annot',
|
||||
'Subtype': '/Widget',
|
||||
'Rect': pydyf.Array(rectangle),
|
||||
'T': pydyf.String(element.attrib.get('name', 'unknown')),
|
||||
'V': pydyf.String(value),
|
||||
'P': page.reference,
|
||||
})
|
||||
if element.tag == 'textarea':
|
||||
field['Ff'] = 2 ** (13 - 1)
|
||||
elif input_type == 'password':
|
||||
field['Ff'] = 2 ** (14 - 1)
|
||||
elif input_type == 'file':
|
||||
field['Ff'] = 2 ** (21 - 1)
|
||||
|
||||
pdf.add_object(field)
|
||||
page['Annots'].append(field.reference)
|
||||
pdf.catalog['AcroForm']['Fields'].append(field.reference)
|
||||
|
||||
|
||||
def add_annotations(links, matrix, document, pdf, page, annot_files):
|
||||
"""Include annotations in PDF."""
|
||||
# TODO: splitting a link into multiple independent rectangular
|
||||
# annotations works well for pure links, but rather mediocre for
|
||||
# other annotations and fails completely for transformed (CSS) or
|
||||
# complex link shapes (area). It would be better to use /AP for all
|
||||
# links and coalesce link shapes that originate from the same HTML
|
||||
# link. This would give a feeling similiar to what browsers do with
|
||||
# links that span multiple lines.
|
||||
for link_type, annot_target, rectangle, _ in links:
|
||||
if link_type != 'attachment':
|
||||
continue
|
||||
if annot_target not in annot_files:
|
||||
# A single link can be split in multiple regions. We don't want
|
||||
# to embed a file multiple times of course, so keep a reference
|
||||
# to every embedded URL and reuse the object number.
|
||||
# TODO: Use the title attribute as description. The comment
|
||||
# above about multiple regions won't always be correct, because
|
||||
# two links might have the same href, but different titles.
|
||||
annot_files[annot_target] = write_pdf_attachment(
|
||||
pdf, (annot_target, None), document.url_fetcher)
|
||||
annot_file = annot_files[annot_target]
|
||||
if annot_file is None:
|
||||
continue
|
||||
rectangle = (
|
||||
*matrix.transform_point(*rectangle[:2]),
|
||||
*matrix.transform_point(*rectangle[2:]))
|
||||
stream = pydyf.Stream([], {
|
||||
'Type': '/XObject',
|
||||
'Subtype': '/Form',
|
||||
'BBox': pydyf.Array(rectangle),
|
||||
'Length': 0,
|
||||
})
|
||||
pdf.add_object(stream)
|
||||
annot = pydyf.Dictionary({
|
||||
'Type': '/Annot',
|
||||
'Rect': pydyf.Array(rectangle),
|
||||
'Subtype': '/FileAttachment',
|
||||
'T': pydyf.String(),
|
||||
'FS': annot_file.reference,
|
||||
'AP': pydyf.Dictionary({'N': stream.reference}),
|
||||
'AS': '/N',
|
||||
})
|
||||
pdf.add_object(annot)
|
||||
if 'Annots' not in page:
|
||||
page['Annots'] = pydyf.Array()
|
||||
page['Annots'].append(annot.reference)
|
||||
|
||||
|
||||
def write_pdf_attachment(pdf, attachment, url_fetcher):
|
||||
"""Write an attachment to the PDF stream."""
|
||||
# Attachments from document links like <link> or <a> can only be URLs.
|
||||
# They're passed in as tuples
|
||||
url = ''
|
||||
if isinstance(attachment, tuple):
|
||||
url, description = attachment
|
||||
attachment = Attachment(
|
||||
url=url, url_fetcher=url_fetcher, description=description)
|
||||
elif not isinstance(attachment, Attachment):
|
||||
attachment = Attachment(guess=attachment, url_fetcher=url_fetcher)
|
||||
|
||||
try:
|
||||
with attachment.source as (source_type, source, url, _):
|
||||
if isinstance(source, bytes):
|
||||
source = io.BytesIO(source)
|
||||
uncompressed_length = 0
|
||||
stream = b''
|
||||
md5 = hashlib.md5()
|
||||
compress = zlib.compressobj()
|
||||
for data in iter(lambda: source.read(4096), b''):
|
||||
uncompressed_length += len(data)
|
||||
md5.update(data)
|
||||
compressed = compress.compress(data)
|
||||
stream += compressed
|
||||
compressed = compress.flush(zlib.Z_FINISH)
|
||||
stream += compressed
|
||||
file_extra = pydyf.Dictionary({
|
||||
'Type': '/EmbeddedFile',
|
||||
'Filter': '/FlateDecode',
|
||||
'Params': pydyf.Dictionary({
|
||||
'CheckSum': f'<{md5.hexdigest()}>',
|
||||
'Size': uncompressed_length,
|
||||
})
|
||||
})
|
||||
file_stream = pydyf.Stream([stream], file_extra)
|
||||
pdf.add_object(file_stream)
|
||||
|
||||
except URLFetchingError as exception:
|
||||
LOGGER.error('Failed to load attachment: %s', exception)
|
||||
return
|
||||
|
||||
# TODO: Use the result object from a URL fetch operation to provide more
|
||||
# details on the possible filename.
|
||||
if url and urlsplit(url).path:
|
||||
filename = basename(unquote(urlsplit(url).path))
|
||||
else:
|
||||
filename = 'attachment.bin'
|
||||
|
||||
attachment = pydyf.Dictionary({
|
||||
'Type': '/Filespec',
|
||||
'F': pydyf.String(),
|
||||
'UF': pydyf.String(filename),
|
||||
'EF': pydyf.Dictionary({'F': file_stream.reference}),
|
||||
'Desc': pydyf.String(attachment.description or ''),
|
||||
})
|
||||
pdf.add_object(attachment)
|
||||
return attachment
|
||||
|
||||
|
||||
def resolve_links(pages):
|
||||
"""Resolve internal hyperlinks.
|
||||
|
||||
Links to a missing anchor are removed with a warning.
|
||||
|
||||
If multiple anchors have the same name, the first one is used.
|
||||
|
||||
:returns:
|
||||
A generator yielding lists (one per page) like :attr:`Page.links`,
|
||||
except that ``target`` for internal hyperlinks is
|
||||
``(page_number, x, y)`` instead of an anchor name.
|
||||
The page number is a 0-based index into the :attr:`pages` list,
|
||||
and ``x, y`` are in CSS pixels from the top-left of the page.
|
||||
|
||||
"""
|
||||
anchors = set()
|
||||
paged_anchors = []
|
||||
for i, page in enumerate(pages):
|
||||
paged_anchors.append([])
|
||||
for anchor_name, (point_x, point_y) in page.anchors.items():
|
||||
if anchor_name not in anchors:
|
||||
paged_anchors[-1].append((anchor_name, point_x, point_y))
|
||||
anchors.add(anchor_name)
|
||||
for page in pages:
|
||||
page_links = []
|
||||
for link in page.links:
|
||||
link_type, anchor_name, _, _ = link
|
||||
if link_type == 'internal':
|
||||
if anchor_name not in anchors:
|
||||
LOGGER.error(
|
||||
'No anchor #%s for internal URI reference',
|
||||
anchor_name)
|
||||
else:
|
||||
page_links.append(link)
|
||||
else:
|
||||
# External link
|
||||
page_links.append(link)
|
||||
yield page_links, paged_anchors.pop(0)
|
@ -140,8 +140,7 @@ def color(string):
|
||||
|
||||
def transform(transform_string, font_size, normalized_diagonal):
|
||||
"""Get a matrix corresponding to the transform string."""
|
||||
# TODO: merge with Page._gather_links_and_bookmarks and
|
||||
# css.validation.properties.transform
|
||||
# TODO: merge with gather_anchors and css.validation.properties.transform
|
||||
transformations = re.findall(
|
||||
r'(\w+) ?\( ?(.*?) ?\)', normalize(transform_string))
|
||||
matrix = Matrix()
|
||||
|
@ -13,8 +13,10 @@ from ..logger import LOGGER
|
||||
from ..urls import FILESYSTEM_ENCODING, fetch
|
||||
from .constants import (
|
||||
CAPS_KEYS, EAST_ASIAN_KEYS, FONTCONFIG_STRETCH, FONTCONFIG_STYLE,
|
||||
FONTCONFIG_WEIGHT, LIGATURE_KEYS, NUMERIC_KEYS)
|
||||
from .ffi import ffi, fontconfig, gobject, pangoft2
|
||||
FONTCONFIG_WEIGHT, LIGATURE_KEYS, NUMERIC_KEYS, PANGO_STRETCH, PANGO_STYLE)
|
||||
from .ffi import (
|
||||
ffi, fontconfig, gobject, pango, pangoft2, unicode_to_char_p,
|
||||
units_from_double)
|
||||
|
||||
|
||||
def _check_font_configuration(font_config): # pragma: no cover
|
||||
@ -326,3 +328,27 @@ def font_features(font_kerning='normal', font_variant_ligatures='normal',
|
||||
features.update(dict(font_feature_settings))
|
||||
|
||||
return features
|
||||
|
||||
|
||||
def get_font_description(style, font_size=None):
|
||||
font_description = ffi.gc(
|
||||
pango.pango_font_description_new(),
|
||||
pango.pango_font_description_free)
|
||||
family_p, family = unicode_to_char_p(','.join(style['font_family']))
|
||||
pango.pango_font_description_set_family(font_description, family_p)
|
||||
pango.pango_font_description_set_style(
|
||||
font_description, PANGO_STYLE[style['font_style']])
|
||||
pango.pango_font_description_set_stretch(
|
||||
font_description, PANGO_STRETCH[style['font_stretch']])
|
||||
pango.pango_font_description_set_weight(
|
||||
font_description, style['font_weight'])
|
||||
if font_size is not None:
|
||||
pango.pango_font_description_set_absolute_size(
|
||||
font_description, units_from_double(font_size))
|
||||
if style['font_variation_settings'] != 'normal':
|
||||
string = ','.join(
|
||||
f'{key}={value}' for key, value in
|
||||
style['font_variation_settings']).encode()
|
||||
pango.pango_font_description_set_variations(
|
||||
font_description, string)
|
||||
return font_description
|
||||
|
@ -5,11 +5,11 @@ from math import inf
|
||||
|
||||
import pyphen
|
||||
|
||||
from .constants import LST_TO_ISO, PANGO_STRETCH, PANGO_STYLE, PANGO_WRAP_MODE
|
||||
from .constants import LST_TO_ISO, PANGO_WRAP_MODE
|
||||
from .ffi import (
|
||||
ffi, gobject, pango, pangoft2, unicode_to_char_p, units_from_double,
|
||||
units_to_double)
|
||||
from .fonts import font_features
|
||||
from .fonts import font_features, get_font_description
|
||||
|
||||
|
||||
def line_size(line, style):
|
||||
@ -78,9 +78,6 @@ class Layout:
|
||||
pango.pango_font_map_create_context(font_map),
|
||||
gobject.g_object_unref)
|
||||
pango.pango_context_set_round_glyph_positions(pango_context, False)
|
||||
self.layout = ffi.gc(
|
||||
pango.pango_layout_new(pango_context),
|
||||
gobject.g_object_unref)
|
||||
|
||||
if style['font_language_override'] != 'normal':
|
||||
lang_p, lang = unicode_to_char_p(LST_TO_ISO.get(
|
||||
@ -97,31 +94,17 @@ class Layout:
|
||||
|
||||
assert not isinstance(style['font_family'], str), (
|
||||
'font_family should be a list')
|
||||
self.font = ffi.gc(
|
||||
pango.pango_font_description_new(),
|
||||
pango.pango_font_description_free)
|
||||
family_p, family = unicode_to_char_p(','.join(style['font_family']))
|
||||
pango.pango_font_description_set_family(self.font, family_p)
|
||||
pango.pango_font_description_set_style(
|
||||
self.font, PANGO_STYLE[style['font_style']])
|
||||
pango.pango_font_description_set_stretch(
|
||||
self.font, PANGO_STRETCH[style['font_stretch']])
|
||||
pango.pango_font_description_set_weight(
|
||||
self.font, style['font_weight'])
|
||||
pango.pango_font_description_set_absolute_size(
|
||||
self.font, units_from_double(font_size))
|
||||
if style['font_variation_settings'] != 'normal':
|
||||
string = ','.join(
|
||||
f'{key}={value}' for key, value in
|
||||
style['font_variation_settings']).encode()
|
||||
pango.pango_font_description_set_variations(self.font, string)
|
||||
pango.pango_layout_set_font_description(self.layout, self.font)
|
||||
font_description = get_font_description(style, font_size)
|
||||
self.layout = ffi.gc(
|
||||
pango.pango_layout_new(pango_context),
|
||||
gobject.g_object_unref)
|
||||
pango.pango_layout_set_font_description(self.layout, font_description)
|
||||
|
||||
text_decoration = style['text_decoration_line']
|
||||
if text_decoration != 'none':
|
||||
metrics = ffi.gc(
|
||||
pango.pango_context_get_metrics(
|
||||
pango_context, self.font, self.language),
|
||||
pango_context, font_description, self.language),
|
||||
pango.pango_font_metrics_unref)
|
||||
self.ascent = units_to_double(
|
||||
pango.pango_font_metrics_get_ascent(metrics))
|
||||
@ -236,7 +219,7 @@ class Layout:
|
||||
pango.pango_layout_set_tabs(self.layout, array)
|
||||
|
||||
def deactivate(self):
|
||||
del self.layout, self.font, self.language, self.style
|
||||
del self.layout, self.language, self.style
|
||||
|
||||
def reactivate(self, style):
|
||||
self.setup(self.context, style['font_size'], style)
|
||||
|
Loading…
Reference in New Issue
Block a user