2011-05-17 13:29:00 +04:00
|
|
|
# coding: utf8
|
|
|
|
|
|
|
|
# WeasyPrint converts web documents (HTML, CSS, ...) to PDF.
|
|
|
|
# Copyright (C) 2011 Simon Sapin
|
|
|
|
#
|
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU Affero General Public License as
|
|
|
|
# published by the Free Software Foundation, either version 3 of the
|
|
|
|
# License, or (at your option) any later version.
|
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU Affero General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
2011-08-16 18:01:50 +04:00
|
|
|
import contextlib
|
2011-05-17 13:29:00 +04:00
|
|
|
|
|
|
|
from attest import Tests, assert_hook
|
|
|
|
|
2011-08-16 18:01:50 +04:00
|
|
|
from ..css.values import get_single_keyword
|
|
|
|
from ..css import validation
|
2011-07-20 17:40:40 +04:00
|
|
|
from ..document import Document
|
2011-06-29 17:01:36 +04:00
|
|
|
from ..formatting_structure import boxes
|
2011-06-29 23:59:29 +04:00
|
|
|
from ..formatting_structure import build
|
2011-05-17 13:29:00 +04:00
|
|
|
|
|
|
|
|
|
|
|
suite = Tests()
|
|
|
|
|
|
|
|
|
2011-05-23 15:59:47 +04:00
|
|
|
def serialize(box_list):
|
2011-05-19 17:31:34 +04:00
|
|
|
"""
|
2011-05-23 15:59:47 +04:00
|
|
|
Transform a box list into a structure easier to compare for testing.
|
2011-05-19 17:31:34 +04:00
|
|
|
"""
|
2011-05-23 15:59:47 +04:00
|
|
|
types = {
|
2011-05-25 17:54:46 +04:00
|
|
|
boxes.TextBox: 'text',
|
|
|
|
boxes.LineBox: 'line',
|
2011-05-25 16:59:42 +04:00
|
|
|
boxes.BlockBox: 'block',
|
|
|
|
boxes.InlineBox: 'inline',
|
2011-05-25 17:27:30 +04:00
|
|
|
boxes.InlineBlockBox: 'inline_block',
|
2011-05-25 16:59:42 +04:00
|
|
|
boxes.AnonymousBlockBox: 'anon_block',
|
2011-05-25 17:54:46 +04:00
|
|
|
boxes.InlineLevelReplacedBox: 'inline_replaced',
|
2011-05-23 15:59:47 +04:00
|
|
|
}
|
|
|
|
return [
|
|
|
|
(box.element.tag, types[box.__class__], (
|
2011-07-11 14:47:00 +04:00
|
|
|
# All concrete boxes are either text, replaced or parent.
|
2011-05-23 15:59:47 +04:00
|
|
|
box.text if isinstance(box, boxes.TextBox)
|
2011-07-11 14:47:00 +04:00
|
|
|
else '<replaced>' if isinstance(box, boxes.ReplacedBox)
|
2011-05-23 15:59:47 +04:00
|
|
|
else serialize(box.children)))
|
|
|
|
for box in box_list
|
|
|
|
]
|
2011-05-19 17:31:34 +04:00
|
|
|
|
|
|
|
|
|
|
|
def unwrap_html_body(box):
|
|
|
|
"""
|
|
|
|
Test that the box tree starts with an <html> block and a <body> block
|
|
|
|
and remove them to simplify further tests. These are always at the root
|
|
|
|
of HTML documents.
|
|
|
|
"""
|
2011-05-23 15:59:47 +04:00
|
|
|
assert box.element.tag == 'html'
|
2011-07-20 13:35:43 +04:00
|
|
|
assert isinstance(box, boxes.BlockBox)
|
2011-05-23 15:59:47 +04:00
|
|
|
assert len(box.children) == 1
|
|
|
|
|
|
|
|
box = box.children[0]
|
2011-05-25 16:59:42 +04:00
|
|
|
assert isinstance(box, boxes.BlockBox)
|
2011-05-23 15:59:47 +04:00
|
|
|
assert box.element.tag == 'body'
|
2011-06-29 16:04:42 +04:00
|
|
|
|
2011-05-23 15:59:47 +04:00
|
|
|
return box.children
|
2011-05-19 17:31:34 +04:00
|
|
|
|
|
|
|
|
2011-05-19 18:03:50 +04:00
|
|
|
def to_lists(box_tree):
|
|
|
|
"""Serialize and unwrap <html> and <body>."""
|
2011-05-23 15:59:47 +04:00
|
|
|
return serialize(unwrap_html_body(box_tree))
|
2011-06-29 16:04:42 +04:00
|
|
|
|
2011-05-19 18:03:50 +04:00
|
|
|
|
2011-08-16 18:01:50 +04:00
|
|
|
@contextlib.contextmanager
|
|
|
|
def monkeypatch_validation(replacement):
|
|
|
|
real_non_shorthand = validation.validate_non_shorthand
|
|
|
|
|
|
|
|
def patched_non_shorthand(*args, **kwargs):
|
|
|
|
return replacement(real_non_shorthand, *args, **kwargs)
|
|
|
|
|
|
|
|
validation.validate_non_shorthand = patched_non_shorthand
|
|
|
|
try:
|
|
|
|
yield
|
|
|
|
finally:
|
|
|
|
validation.validate_non_shorthand = real_non_shorthand
|
|
|
|
|
|
|
|
|
|
|
|
def validate_inline_block(real_non_shorthand, name, values, required=False):
|
|
|
|
if name == 'display' and get_single_keyword(values) == 'inline-block':
|
|
|
|
return [(name, values)]
|
|
|
|
return real_non_shorthand(name, values, required)
|
|
|
|
|
|
|
|
|
2011-06-29 23:59:29 +04:00
|
|
|
def parse(html_content):
|
|
|
|
"""
|
|
|
|
Parse some HTML, apply stylesheets and transform to boxes.
|
|
|
|
"""
|
2011-08-16 18:01:50 +04:00
|
|
|
# TODO: remove this patching when inline-block is validated.
|
|
|
|
with monkeypatch_validation(validate_inline_block):
|
|
|
|
document = Document.from_string(html_content)
|
|
|
|
return build.dom_to_box(document, document.dom)
|
2011-05-19 17:31:34 +04:00
|
|
|
|
|
|
|
|
2011-05-23 18:14:51 +04:00
|
|
|
def prettify(tree_list):
|
2011-05-20 17:32:16 +04:00
|
|
|
"""Special formatting for printing serialized box trees."""
|
2011-05-23 18:14:51 +04:00
|
|
|
def lines(tree, indent=0):
|
|
|
|
tag, type_, content = tree
|
2011-07-11 14:47:00 +04:00
|
|
|
if type_ in ('text', 'inline_replaced'):
|
2011-05-23 18:14:51 +04:00
|
|
|
yield '%s%s %s %r' % (' ' * indent, tag, type_, content)
|
|
|
|
else:
|
|
|
|
yield '%s%s %s' % (' ' * indent, tag, type_)
|
|
|
|
for child in content:
|
|
|
|
for line in lines(child, indent + 1):
|
|
|
|
yield line
|
|
|
|
|
|
|
|
return '\n'.join(line for tree in tree_list for line in lines(tree))
|
|
|
|
|
|
|
|
|
|
|
|
def assert_tree(box, expected):
|
|
|
|
"""
|
|
|
|
Test box tree equality with the prettified obtained result in the message
|
|
|
|
in case of failure.
|
2011-06-29 16:04:42 +04:00
|
|
|
|
2011-05-23 18:14:51 +04:00
|
|
|
box: a Box object, starting with <html> and <body> blocks.
|
|
|
|
expected: a list of serialized <body> children as returned by to_lists().
|
|
|
|
"""
|
|
|
|
result = to_lists(box)
|
|
|
|
assert result == expected, 'Got\n' + prettify(result)
|
2011-05-19 20:22:06 +04:00
|
|
|
|
|
|
|
|
2011-05-19 17:31:34 +04:00
|
|
|
@suite.test
|
|
|
|
def test_box_tree():
|
2011-05-23 18:56:12 +04:00
|
|
|
assert_tree(parse('<p>'), [('p', 'block', [])])
|
2011-05-25 17:27:30 +04:00
|
|
|
assert_tree(parse('''
|
|
|
|
<style>
|
|
|
|
span { display: inline-block }
|
|
|
|
</style>
|
2011-05-25 17:54:46 +04:00
|
|
|
<p>Hello <em>World <img src="foo.png"><span>Lipsum</span></em>!</p>
|
2011-05-25 17:27:30 +04:00
|
|
|
'''), [
|
2011-05-19 17:31:34 +04:00
|
|
|
('p', 'block', [
|
|
|
|
('p', 'text', 'Hello '),
|
|
|
|
('em', 'inline', [
|
2011-05-25 17:27:30 +04:00
|
|
|
('em', 'text', 'World '),
|
2011-07-11 14:47:00 +04:00
|
|
|
('img', 'inline_replaced', '<replaced>'),
|
2011-05-25 17:27:30 +04:00
|
|
|
('span', 'inline_block', [
|
|
|
|
('span', 'text', 'Lipsum')])]),
|
2011-05-23 18:56:12 +04:00
|
|
|
('p', 'text', '!')])])
|
2011-05-19 17:31:34 +04:00
|
|
|
|
2011-05-19 18:03:50 +04:00
|
|
|
|
2011-08-16 14:00:59 +04:00
|
|
|
@suite.test
|
|
|
|
def test_html_entities():
|
|
|
|
for quote in ['"', '"', '"', '"']:
|
|
|
|
assert_tree(parse('<p>{}abc{}'.format(quote, quote)), [
|
|
|
|
('p', 'block', [
|
|
|
|
('p', 'text', '"abc"')])])
|
|
|
|
|
|
|
|
|
2011-05-19 18:03:50 +04:00
|
|
|
@suite.test
|
2011-05-19 20:22:06 +04:00
|
|
|
def test_inline_in_block():
|
2011-05-23 18:56:12 +04:00
|
|
|
source = '<div>Hello, <em>World</em>!\n<p>Lipsum.</p></div>'
|
2011-05-19 20:22:06 +04:00
|
|
|
expected = [
|
2011-05-19 18:03:50 +04:00
|
|
|
('div', 'block', [
|
|
|
|
('div', 'anon_block', [
|
|
|
|
('div', 'line', [
|
|
|
|
('div', 'text', 'Hello, '),
|
|
|
|
('em', 'inline', [
|
|
|
|
('em', 'text', 'World')]),
|
2011-05-23 18:56:12 +04:00
|
|
|
('div', 'text', '!\n')])]),
|
2011-05-19 18:03:50 +04:00
|
|
|
('p', 'block', [
|
|
|
|
('p', 'line', [
|
|
|
|
('p', 'text', 'Lipsum.')])])])]
|
2011-06-29 16:04:42 +04:00
|
|
|
|
2011-05-19 20:22:06 +04:00
|
|
|
box = parse(source)
|
2011-07-20 13:35:43 +04:00
|
|
|
box = build.inline_in_block(box)
|
2011-05-23 18:14:51 +04:00
|
|
|
assert_tree(box, expected)
|
2011-05-19 20:22:06 +04:00
|
|
|
|
|
|
|
box = parse(source)
|
|
|
|
# This should be idempotent: doing more than once does not change anything.
|
2011-07-20 13:35:43 +04:00
|
|
|
box = build.inline_in_block(box)
|
|
|
|
box = build.inline_in_block(box)
|
2011-05-23 18:14:51 +04:00
|
|
|
assert_tree(box, expected)
|
2011-05-19 20:22:06 +04:00
|
|
|
|
2011-05-19 18:03:50 +04:00
|
|
|
|
2011-05-19 20:58:39 +04:00
|
|
|
@suite.test
|
|
|
|
def test_block_in_inline():
|
2011-05-23 15:59:47 +04:00
|
|
|
box = parse('''
|
|
|
|
<style>
|
2011-05-25 17:55:08 +04:00
|
|
|
p { display: inline-block; }
|
2011-05-19 20:58:39 +04:00
|
|
|
span { display: block; }
|
|
|
|
</style>
|
|
|
|
<p>Lorem <em>ipsum <strong>dolor <span>sit</span>
|
|
|
|
<span>amet,</span></strong><span>consectetur</span></em></p>''')
|
2011-07-20 13:35:43 +04:00
|
|
|
box = build.inline_in_block(box)
|
2011-05-23 18:14:51 +04:00
|
|
|
assert_tree(box, [
|
2011-05-25 17:55:08 +04:00
|
|
|
('body', 'line', [
|
|
|
|
('p', 'inline_block', [
|
2011-05-19 20:58:39 +04:00
|
|
|
('p', 'line', [
|
|
|
|
('p', 'text', 'Lorem '),
|
|
|
|
('em', 'inline', [
|
|
|
|
('em', 'text', 'ipsum '),
|
|
|
|
('strong', 'inline', [
|
2011-05-25 17:55:08 +04:00
|
|
|
('strong', 'text', 'dolor '),
|
|
|
|
('span', 'block', [ # This block is "pulled up"
|
|
|
|
('span', 'line', [
|
|
|
|
('span', 'text', 'sit')])]),
|
2011-05-23 18:56:12 +04:00
|
|
|
# No whitespace processing here.
|
2011-05-25 17:55:08 +04:00
|
|
|
('strong', 'text', '\n '),
|
|
|
|
('span', 'block', [ # This block is "pulled up"
|
|
|
|
('span', 'line', [
|
|
|
|
('span', 'text', 'amet,')])])]),
|
|
|
|
('span', 'block', [ # This block is "pulled up"
|
|
|
|
('span', 'line', [
|
|
|
|
('span', 'text', 'consectetur')])])])])])])])
|
|
|
|
|
2011-07-20 13:35:43 +04:00
|
|
|
box = build.block_in_inline(box)
|
2011-05-25 17:55:08 +04:00
|
|
|
assert_tree(box, [
|
|
|
|
('body', 'line', [
|
|
|
|
('p', 'inline_block', [
|
|
|
|
('p', 'anon_block', [
|
|
|
|
('p', 'line', [
|
|
|
|
('p', 'text', 'Lorem '),
|
|
|
|
('em', 'inline', [
|
|
|
|
('em', 'text', 'ipsum '),
|
|
|
|
('strong', 'inline', [
|
|
|
|
('strong', 'text', 'dolor ')])])])]),
|
|
|
|
('span', 'block', [
|
|
|
|
('span', 'line', [
|
|
|
|
('span', 'text', 'sit')])]),
|
|
|
|
('p', 'anon_block', [
|
|
|
|
('p', 'line', [
|
|
|
|
('em', 'inline', [
|
|
|
|
('strong', 'inline', [
|
2011-07-20 13:35:43 +04:00
|
|
|
# Whitespace processing not done yet.
|
2011-05-25 17:55:08 +04:00
|
|
|
('strong', 'text', '\n ')])])])]),
|
|
|
|
('span', 'block', [
|
|
|
|
('span', 'line', [
|
|
|
|
('span', 'text', 'amet,')])]),
|
2011-06-29 16:04:42 +04:00
|
|
|
|
2011-05-25 17:55:08 +04:00
|
|
|
('p', 'anon_block', [
|
|
|
|
('p', 'line', [
|
|
|
|
('em', 'inline', [
|
|
|
|
('strong', 'inline', [])])])]),
|
|
|
|
('span', 'block', [
|
|
|
|
('span', 'line', [
|
|
|
|
('span', 'text', 'consectetur')])]),
|
|
|
|
('p', 'anon_block', [
|
|
|
|
('p', 'line', [
|
|
|
|
('em', 'inline', [])])])])])])
|
2011-05-19 18:03:50 +04:00
|
|
|
|
2011-05-23 15:59:47 +04:00
|
|
|
|
|
|
|
@suite.test
|
|
|
|
def test_styles():
|
|
|
|
box = parse('''
|
|
|
|
<style>
|
|
|
|
span { display: block; }
|
|
|
|
* { margin: 42px }
|
|
|
|
html { color: blue }
|
|
|
|
</style>
|
|
|
|
<p>Lorem <em>ipsum <strong>dolor <span>sit</span>
|
|
|
|
<span>amet,</span></strong><span>consectetur</span></em></p>''')
|
2011-07-20 13:35:43 +04:00
|
|
|
box = build.inline_in_block(box)
|
|
|
|
box = build.block_in_inline(box)
|
2011-06-29 16:04:42 +04:00
|
|
|
|
2011-05-23 18:56:12 +04:00
|
|
|
for child in box.descendants():
|
2011-05-23 15:59:47 +04:00
|
|
|
# All boxes inherit the color
|
2011-08-11 19:26:08 +04:00
|
|
|
assert child.style.color[0].value == 'blue'
|
2011-05-23 15:59:47 +04:00
|
|
|
# Only non-anonymous boxes have margins
|
|
|
|
if isinstance(child, boxes.AnonymousBox):
|
2011-08-11 19:26:08 +04:00
|
|
|
assert child.style.margin_top[0].value == 0
|
2011-05-23 15:59:47 +04:00
|
|
|
else:
|
2011-08-11 19:26:08 +04:00
|
|
|
assert child.style.margin_top[0].value == 42
|
2011-05-23 15:59:47 +04:00
|
|
|
|
|
|
|
|
2011-05-23 19:37:37 +04:00
|
|
|
@suite.test
|
|
|
|
def test_whitespace():
|
|
|
|
# TODO: test more cases
|
|
|
|
# http://www.w3.org/TR/CSS21/text.html#white-space-model
|
2011-08-05 18:19:22 +04:00
|
|
|
document = Document.from_string('''
|
2011-05-23 19:37:37 +04:00
|
|
|
<p>Lorem \t\r\n ipsum\t<strong> dolor </strong>.</p>
|
|
|
|
<pre>\t foo\n</pre>
|
|
|
|
<pre style="white-space: pre-wrap">\t foo\n</pre>
|
|
|
|
<pre style="white-space: pre-line">\t foo\n</pre>
|
|
|
|
''')
|
2011-06-29 23:59:29 +04:00
|
|
|
box = build.build_formatting_structure(document)
|
|
|
|
|
2011-05-23 19:37:37 +04:00
|
|
|
assert_tree(box, [
|
|
|
|
('p', 'block', [
|
|
|
|
('p', 'line', [
|
|
|
|
('p', 'text', 'Lorem ipsum '),
|
|
|
|
('strong', 'inline', [
|
|
|
|
('strong', 'text', 'dolor ')]),
|
|
|
|
('p', 'text', '.')])]),
|
|
|
|
('body', 'anon_block', [
|
|
|
|
('body', 'line', [
|
|
|
|
('body', 'text', ' ')])]),
|
|
|
|
('pre', 'block', [
|
|
|
|
('pre', 'line', [
|
|
|
|
# pre
|
|
|
|
('pre', 'text', u'\t\xA0\xA0foo\n')])]),
|
|
|
|
('body', 'anon_block', [
|
|
|
|
('body', 'line', [
|
|
|
|
('body', 'text', ' ')])]),
|
|
|
|
('pre', 'block', [
|
|
|
|
('pre', 'line', [
|
|
|
|
# pre-wrap
|
|
|
|
('pre', 'text', u'\t\xA0\xA0\u200Bfoo\n')])]),
|
|
|
|
('body', 'anon_block', [
|
|
|
|
('body', 'line', [
|
|
|
|
('body', 'text', ' ')])]),
|
|
|
|
('pre', 'block', [
|
|
|
|
('pre', 'line', [
|
|
|
|
# pre-line
|
|
|
|
('pre', 'text', u'foo\n')])])])
|
2011-07-05 13:32:16 +04:00
|
|
|
|
|
|
|
|
|
|
|
@suite.test
|
|
|
|
def test_page_style():
|
2011-08-05 18:19:22 +04:00
|
|
|
document = Document.from_string('''
|
2011-07-05 13:32:16 +04:00
|
|
|
<style>
|
|
|
|
@page { margin: 3px }
|
|
|
|
@page :first { margin-top: 20px }
|
|
|
|
@page :right { margin-right: 10px; margin-top: 10px }
|
|
|
|
@page :left { margin-left: 10px; margin-top: 10px }
|
|
|
|
</style>
|
|
|
|
''')
|
|
|
|
def assert_page_margins(page_number, top, right, bottom, left):
|
2011-07-20 20:23:54 +04:00
|
|
|
page = boxes.PageBox(
|
|
|
|
document, boxes.BlockBox(document, document.dom), page_number)
|
2011-08-11 19:26:08 +04:00
|
|
|
assert page.style.margin_top[0].value == top
|
|
|
|
assert page.style.margin_right[0].value == right
|
|
|
|
assert page.style.margin_bottom[0].value == bottom
|
|
|
|
assert page.style.margin_left[0].value == left
|
2011-07-05 13:32:16 +04:00
|
|
|
|
|
|
|
# odd numbers are :right pages, even are :left. 1 has :first as well
|
|
|
|
assert_page_margins(1, top=20, right=10, bottom=3, left=3)
|
|
|
|
assert_page_margins(2, top=10, right=3, bottom=3, left=10)
|
|
|
|
assert_page_margins(3, top=10, right=10, bottom=3, left=3)
|
|
|
|
assert_page_margins(4, top=10, right=3, bottom=3, left=10)
|
|
|
|
assert_page_margins(45, top=10, right=10, bottom=3, left=3)
|
|
|
|
assert_page_margins(122, top=10, right=3, bottom=3, left=10)
|
2011-07-07 16:27:46 +04:00
|
|
|
|
|
|
|
|
|
|
|
@suite.test
|
|
|
|
def test_containing_block():
|
|
|
|
"""Test the boxes containing block."""
|
2011-08-16 19:57:13 +04:00
|
|
|
box = None
|
|
|
|
# box = parse('''
|
|
|
|
# <html>
|
|
|
|
# <style>
|
|
|
|
# body { height: 297mm; width: 210mm }
|
|
|
|
# p { width: 100mm; height: 200mm }
|
|
|
|
# p span { position: absolute }
|
|
|
|
# p em { position: relative }
|
|
|
|
# li { position: fixed }
|
|
|
|
# li span { position: fixed }
|
|
|
|
# </style>
|
|
|
|
# <body>
|
|
|
|
# <p>
|
|
|
|
# Lorem <em>ipsum <strong>dolor <span>sit</span>
|
|
|
|
# <span>amet,</span></strong><span>consectetur</span></em>
|
|
|
|
# </p>
|
|
|
|
# <ul>
|
|
|
|
# <li>Lorem ipsum dolor sit amet</li>
|
|
|
|
# <li>Lorem ipsum <spam>dolor sit amet</span></li>
|
|
|
|
# </ul>
|
|
|
|
# </body>
|
|
|
|
# </html>
|
|
|
|
# ''')
|
|
|
|
# tree = to_lists(box)
|