1
1
mirror of https://github.com/Kozea/WeasyPrint.git synced 2024-10-05 08:27:22 +03:00
WeasyPrint/weasyprint/tests/test_pdf.py
2012-08-03 13:00:14 +02:00

277 lines
9.9 KiB
Python

# coding: utf8
"""
weasyprint.tests.test_metadata
------------------------------
Test metadata of the document (bookmarks, links and destinations).
:copyright: Copyright 2011-2012 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
from __future__ import division, unicode_literals
import io
import cairo
from .. import CSS
from .. import pdf
from .testing_utils import (
assert_no_logs, resource_filename, TestPDFDocument, capture_logs)
@assert_no_logs
def test_pdf_parser():
fileobj = io.BytesIO()
surface = cairo.PDFSurface(fileobj, 1, 1)
for width, height in [
(100, 100),
(200, 10),
(3.14, 987654321)
]:
surface.set_size(width, height)
surface.show_page()
surface.finish()
sizes = [page.get_value('MediaBox', '\[(.+?)\]').strip()
for page in pdf.PDFFile(fileobj).pages]
assert sizes == [b'0 0 100 100', b'0 0 200 10', b'0 0 3.14 987654321']
def get_metadata(html, base_url=resource_filename('<inline HTML>')):
document = TestPDFDocument(html, base_url=base_url,
user_stylesheets=[CSS(
string='@page { size: 500pt 1000pt; margin: 50pt }')])
return pdf.gather_metadata(document.render_pages())
def get_bookmarks(html, structure_only=False):
(root, bookmarks), _links = get_metadata(html)
for bookmark in bookmarks:
if structure_only:
bookmark.pop('destination')
bookmark.pop('label')
else:
# Eliminate errors of floating point arithmetic
# (eg. 499.99999999999994 instead of 500)
p, x, y = bookmark['destination']
bookmark['destination'] = p, round(x, 6), round(y, 6)
return root, bookmarks
def get_links(html, **kwargs):
_bookmarks, links = get_metadata(html, **kwargs)
return [
[
(
type_,
(target if type_ == 'external' else
(lambda page, x, y: (page, round(x, 6), round(y, 6)))
(*target)),
tuple(round(v, 6) for v in rect)
)
for type_, target, rect in page_links
]
for page_links in links]
@assert_no_logs
def test_bookmarks():
"""Test the structure of the document bookmarks.
Warning: the PDF output of this structure is not tested.
"""
root, bookmarks = get_bookmarks('''
<h1>a</h1> #
<h4>b</h4> ####
<h3>c</h3> ###
<h2>d</h2> ##
<h1>e</h1> #
''', structure_only=True)
assert root == dict(Count=5, First=1, Last=5)
assert bookmarks == [
dict(Count=3, First=2, Last=4, Next=5, Parent=0, Prev=None),
dict(Count=0, First=None, Last=None, Next=3, Parent=1, Prev=None),
dict(Count=0, First=None, Last=None, Next=4, Parent=1, Prev=2),
dict(Count=0, First=None, Last=None, Next=None, Parent=1, Prev=3),
dict(Count=0, First=None, Last=None, Next=None, Parent=0, Prev=1)]
root, bookmarks = get_bookmarks('<body>')
assert root == dict(Count=0)
assert bookmarks == []
root, bookmarks = get_bookmarks('''
<style>
* { height: 90pt; margin: 0 0 10pt 0 }
</style>
<h1>Title 1</h1>
<h1>Title 2</h1>
<h2 style="position: relative; left: 20pt">Title 3</h2>
<h2>Title 4</h2>
<h3>Title 5</h3>
<span style="display: block; page-break-before: always"></span>
<h2>Title 6</h2>
<h1>Title 7</h1>
<h2>Title 8</h2>
<h3>Title 9</h3>
<h1>Title 10</h1>
<h2>Title 11</h2>
''')
assert root == dict(Count=11, First=1, Last=10)
assert bookmarks == [
dict(Count=0, First=None, Last=None, Next=2, Parent=0, Prev=None,
label='Title 1', destination=(0, 50, 950)),
dict(Count=4, First=3, Last=6, Next=7, Parent=0, Prev=1,
label='Title 2', destination=(0, 50, 850)),
dict(Count=0, First=None, Last=None, Next=4, Parent=2, Prev=None,
label='Title 3', destination=(0, 70, 750)),
dict(Count=1, First=5, Last=5, Next=6, Parent=2, Prev=3,
label='Title 4', destination=(0, 50, 650)),
dict(Count=0, First=None, Last=None, Next=None, Parent=4, Prev=None,
label='Title 5', destination=(0, 50, 550)),
dict(Count=0, First=None, Last=None, Next=None, Parent=2, Prev=4,
label='Title 6', destination=(1, 50, 850)),
dict(Count=2, First=8, Last=8, Next=10, Parent=0, Prev=2,
label='Title 7', destination=(1, 50, 750)),
dict(Count=1, First=9, Last=9, Next=None, Parent=7, Prev=None,
label='Title 8', destination=(1, 50, 650)),
dict(Count=0, First=None, Last=None, Next=None, Parent=8, Prev=None,
label='Title 9', destination=(1, 50, 550)),
dict(Count=1, First=11, Last=11, Next=None, Parent=0, Prev=7,
label='Title 10', destination=(1, 50, 450)),
dict(Count=0, First=None, Last=None, Next=None, Parent=10, Prev=None,
label='Title 11', destination=(1, 50, 350))]
root, bookmarks = get_bookmarks('''
<h2>1</h2> level 1
<h4>2</h4> level 2
<h2>3</h2> level 1
<h3>4</h3> level 2
<h4>5</h4> level 3
''', structure_only=True)
assert root == dict(Count=5, First=1, Last=3)
assert bookmarks == [
dict(Count=1, First=2, Last=2, Next=3, Parent=0, Prev=None),
dict(Count=0, First=None, Last=None, Next=None, Parent=1, Prev=None),
dict(Count=2, First=4, Last=4, Next=None, Parent=0, Prev=1),
dict(Count=1, First=5, Last=5, Next=None, Parent=3, Prev=None),
dict(Count=0, First=None, Last=None, Next=None, Parent=4, Prev=None)]
root, bookmarks = get_bookmarks('''
<h2>1</h2> h2 level 1
<h4>2</h4> h4 level 2
<h3>3</h3> h3 level 2
<h5>4</h5> h5 level 3
<h1>5</h1> h1 level 1
<h2>6</h2> h2 level 2
<h2>7</h2> h2 level 2
<h4>8</h4> h4 level 3
<h1>9</h1> h1 level 1
''', structure_only=True)
assert root == dict(Count=9, First=1, Last=9)
assert bookmarks == [
dict(Count=3, First=2, Last=3, Next=5, Parent=0, Prev=None),
dict(Count=0, First=None, Last=None, Next=3, Parent=1, Prev=None),
dict(Count=1, First=4, Last=4, Next=None, Parent=1, Prev=2),
dict(Count=0, First=None, Last=None, Next=None, Parent=3, Prev=None),
dict(Count=3, First=6, Last=7, Next=9, Parent=0, Prev=1),
dict(Count=0, First=None, Last=None, Next=7, Parent=5, Prev=None),
dict(Count=1, First=8, Last=8, Next=None, Parent=5, Prev=6),
dict(Count=0, First=None, Last=None, Next=None, Parent=7, Prev=None),
dict(Count=0, First=None, Last=None, Next=None, Parent=0, Prev=5)]
@assert_no_logs
def test_links():
links = get_links('<body>')
assert links == [[]]
links = get_links('''
<style>
body { margin: 0; font-size: 10pt; line-height: 2 }
p { display: block; height: 90pt; margin: 0 0 10pt 0 }
img { width: 30pt; vertical-align: top }
</style>
<p><a href="http://weasyprint.org"><img src=pattern.png></a></p>
<p style="padding: 0 10pt"><a
href="#lipsum"><img style="border: solid 1pt"
src=pattern.png></a></p>
<p id=hello>Hello, World</p>
<p id=lipsum>
<a style="display: block; page-break-before: always; height: 30pt"
href="#hel%6Co"></a>
</p>
''')
assert links == [
[
# 30pt wide (like the image), 20pt high (like line-height)
('external', 'http://weasyprint.org', (50, 950, 80, 930)),
# The image itself: 30*30pt
('external', 'http://weasyprint.org', (50, 950, 80, 920)),
# 32pt wide (image + 2 * 1pt of border), 20pt high
('internal', (1, 50, 950), (60, 850, 92, 830)),
# The image itself: 32*32pt
('internal', (1, 50, 950), (60, 850, 92, 818)),
], [
# 400pt wide (block), 30pt high
('internal', (0, 50, 750), (50, 950, 450, 920)),
]
]
links = get_links(
'<a href="../lipsum" style="display: block">',
base_url='http://weasyprint.org/foo/bar/')
assert links == [[('external',
'http://weasyprint.org/foo/lipsum',
(50, 950, 450, 950))]]
@assert_no_logs
def test_relative_links():
# Relative URI reference without a base URI: not allowed
with capture_logs() as logs:
links = get_links(
'<a href="../lipsum" style="display: block">',
base_url=None)
assert links == [[]]
assert len(logs) == 1
assert 'WARNING: Relative URI reference without a base URI' in logs[0]
with capture_logs() as logs:
links = get_links(
'<div style="-weasy-link: url(../lipsum)">',
base_url=None)
assert links == [[]]
assert len(logs) == 1
assert 'WARNING: Ignored `-weasy-link: url(../lipsum)`' in logs[0]
assert 'Relative URI reference without a base URI' in logs[0]
# Internal URI reference without a base URI: OK
links = get_links(
'<a href="#lipsum" id="lipsum" style="display: block">',
base_url=None)
assert links == [[('internal', (0, 50, 950), (50, 950, 450, 950))]]
links = get_links(
'<div style="-weasy-link: url(#lipsum)" id="lipsum">',
base_url=None)
assert links == [[('internal', (0, 50, 950), (50, 950, 450, 950))]]
@assert_no_logs
def test_missing_links():
with capture_logs() as logs:
links = get_links('''
<style> a { display: block; height: 15pt; } </style>
<body>
<a href="#lipsum"></a>
<a href="#missing" id="lipsum"></a>
''', base_url=None)
assert links == [[('internal', (0, 50, 935), (50, 950, 450, 935))]]
assert len(logs) == 1
assert 'WARNING: No anchor #missing for internal URI reference' in logs[0]