1
1
mirror of https://github.com/Kozea/WeasyPrint.git synced 2024-10-05 00:21:15 +03:00
WeasyPrint/weasy/tests/test_boxes.py

199 lines
6.3 KiB
Python

# coding: utf8
# WeasyPrint converts web documents (HTML, CSS, ...) to PDF.
# Copyright (C) 2011 Simon Sapin
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import itertools
from attest import Tests, assert_hook
from cssutils.css import PropertyValue
from lxml import html
from . import parse_html
from .. import boxes
from .. import css
suite = Tests()
def serialize(box):
"""
Transform a box tree into a structure easier to compare for testing.
"""
if isinstance(box, boxes.TextBox):
content = box.text
else:
content = [serialize(child) for child in box.children]
type_ = {
boxes.BlockLevelBox: 'block',
boxes.InlineLevelBox: 'inline',
boxes.TextBox: 'text',
boxes.AnonymousBlockLevelBox: 'anon_block',
boxes.LineBox: 'line',
}[box.__class__]
return box.element.tag, type_, content
def unwrap_html_body(box):
"""
Test that the box tree starts with an <html> block and a <body> block
and remove them to simplify further tests. These are always at the root
of HTML documents.
"""
tag, type_, content = box
assert tag == 'html'
assert type_ == 'block'
assert len(content) == 1
tag, type_, content = content[0]
assert tag == 'body'
assert type_ == 'block'
return content
def to_lists(box_tree):
"""Serialize and unwrap <html> and <body>."""
return unwrap_html_body(serialize(box_tree))
def parse(html_content):
"""
Parse some HTML, apply stylesheets, transform to boxes, serialize.
"""
document = html.document_fromstring(html_content)
css.annotate_document(document)
return boxes.dom_to_box(document)
def diff(tree_1, tree_2):
"""Print a diff of to_lists() results. For debugging only."""
tag_1, type_1, content_1 = tree_1
tag_2, type_2, content_2 = tree_2
if (tag_1, type_1) == (tag_2, type_2):
if type_1 == 'text':
if content_1 == content_2:
return
else:
for child_1, child_2 in itertools.izip_longest(
content_1, content_2, fillvalue=(None, None, [])):
diff(child_1, child_2)
return
print 'Different:'
print ' ', tree_1
print ' ', tree_2
@suite.test
def test_box_tree():
assert to_lists(parse('<p>')) == [('p', 'block', [])]
assert to_lists(parse('<p>Hello <em>World</em>!</p>')) == [
('p', 'block', [
('p', 'text', 'Hello '),
('em', 'inline', [
('em', 'text', 'World')]),
('p', 'text', '!')])]
@suite.test
def test_inline_in_block():
source = '<div>Hello, <em>World</em>!\n<p>Lipsum.</p></div>'
expected = [
('div', 'block', [
('div', 'anon_block', [
('div', 'line', [
('div', 'text', 'Hello, '),
('em', 'inline', [
('em', 'text', 'World')]),
('div', 'text', '!\n')])]),
('p', 'block', [
('p', 'line', [
('p', 'text', 'Lipsum.')])])])]
box = parse(source)
boxes.inline_in_block(box)
assert to_lists(box) == expected
box = parse(source)
# This should be idempotent: doing more than once does not change anything.
boxes.inline_in_block(box)
boxes.inline_in_block(box)
assert to_lists(box) == expected
@suite.test
def test_block_in_inline():
box = parse('''<style>
span { display: block; }
em { display: inline; }
strong { display: inline; }
p { display: block; }
html { display: block; }
</style>
<p>Lorem <em>ipsum <strong>dolor <span>sit</span>
<span>amet,</span></strong><span>consectetur</span></em></p>''')
boxes.inline_in_block(box)
assert to_lists(box) == [
('p', 'block', [
('p', 'line', [
('p', 'text', 'Lorem '),
('em', 'inline', [
('em', 'text', 'ipsum '),
('strong', 'inline', [
('strong', 'text', 'dolor '),
('span', 'block', [ # This block is "pulled up"
('span', 'line', [
('span', 'text', 'sit')])]),
('strong', 'text', '\n '),
('span', 'block', [ # This block is "pulled up"
('span', 'line', [
('span', 'text', 'amet,')])])]),
('span', 'block', [ # This block is "pulled up"
('span', 'line', [
('span', 'text', 'consectetur')])])])])])]
boxes.block_in_inline(box)
expected = [
('p', 'block', [
('p', 'anon_block', [
('p', 'line', [
('p', 'text', 'Lorem '),
('em', 'inline', [
('em', 'text', 'ipsum '),
('strong', 'inline', [
('strong', 'text', 'dolor ')])])])]),
('span', 'block', [
('span', 'line', [
('span', 'text', 'sit')])]),
# TODO: this should disapear
('p', 'anon_block', [
('p', 'line', [
('strong', 'text', '\n ')])]),
('span', 'block', [
('span', 'line', [
('span', 'text', 'amet,')])]),
('p', 'anon_block', [
('p', 'line', [])]),
('span', 'block', [
('span', 'line', [
('span', 'text', 'consectetur')])])])]
diff(to_lists(box)[0], expected[0])
assert to_lists(box) == expected