""" weasyprint.tests.test_api ------------------------- Test the public API. """ import gzip import io import os import sys import unicodedata import zlib from pathlib import Path from urllib.parse import urljoin, uses_relative import py import pytest from PIL import Image from weasyprint import CSS, HTML, __main__, default_url_fetcher from weasyprint.document import resolve_links from weasyprint.urls import path2url from .test_draw import assert_pixels_equal, parse_pixels from .testing_utils import ( FakeHTML, assert_no_logs, capture_logs, http_server, resource_filename) def _test_resource(class_, basename, check, **kwargs): """Common code for testing the HTML and CSS classes.""" absolute_filename = resource_filename(basename) absolute_path = Path(absolute_filename) url = path2url(absolute_filename) check(class_(absolute_filename, **kwargs)) check(class_(absolute_path, **kwargs)) check(class_(guess=absolute_filename, **kwargs)) check(class_(guess=absolute_path, **kwargs)) check(class_(filename=absolute_filename, **kwargs)) check(class_(filename=absolute_path, **kwargs)) check(class_(url, **kwargs)) check(class_(guess=url, **kwargs)) url = path2url(absolute_filename.encode('utf-8')) check(class_(url=url, **kwargs)) with open(absolute_filename, 'rb') as fd: check(class_(fd, **kwargs)) with open(absolute_filename, 'rb') as fd: check(class_(guess=fd, **kwargs)) with open(absolute_filename, 'rb') as fd: check(class_(file_obj=fd, **kwargs)) with open(absolute_filename, 'rb') as fd: content = fd.read() py.path.local(os.path.dirname(__file__)).chdir() relative_filename = os.path.join('resources', basename) relative_path = Path(relative_filename) check(class_(relative_filename, **kwargs)) check(class_(relative_path, **kwargs)) kwargs.pop('base_url', None) check(class_(string=content, base_url=relative_filename, **kwargs)) encoding = kwargs.get('encoding') or 'utf8' check(class_(string=content.decode(encoding), # unicode base_url=relative_filename, **kwargs)) with pytest.raises(TypeError): class_(filename='foo', url='bar') def _check_doc1(html, has_base_url=True): """Check that a parsed HTML document looks like resources/doc1.html""" root = html.etree_element assert root.tag == 'html' assert [child.tag for child in root] == ['head', 'body'] _head, body = root assert [child.tag for child in body] == ['h1', 'p', 'ul', 'div'] h1, p, ul, div = body assert h1.text == 'WeasyPrint test document (with Ünicōde)' if has_base_url: url = urljoin(html.base_url, 'pattern.png') assert url.startswith('file:') assert url.endswith('tests/resources/pattern.png') else: assert html.base_url is None def _run(args, stdin=b''): stdin = io.BytesIO(stdin) stdout = io.BytesIO() try: __main__.HTML = FakeHTML __main__.main(args.split(), stdin=stdin, stdout=stdout) finally: __main__.HTML = HTML return stdout.getvalue() class _fake_file: def __init__(self): self.chunks = [] def write(self, data): self.chunks.append(bytes(data[:])) def getvalue(self): return b''.join(self.chunks) def _png_size(png_bytes): image = Image.open(io.BytesIO(png_bytes)) return image.width, image.height def _round_meta(pages): """Eliminate errors of floating point arithmetic for metadata.""" for page in pages: anchors = page.anchors for anchor_name, (pos_x, pos_y) in anchors.items(): anchors[anchor_name] = round(pos_x, 6), round(pos_y, 6) links = page.links for i, link in enumerate(links): link_type, target, rectangle, download_name = link pos_x, pos_y, width, height = rectangle link = ( link_type, target, (round(pos_x, 6), round(pos_y, 6), round(width, 6), round(height, 6)), download_name) links[i] = link bookmarks = page.bookmarks for i, (level, label, (pos_x, pos_y), state) in enumerate(bookmarks): bookmarks[i] = ( level, label, (round(pos_x, 6), round(pos_y, 6)), state) @assert_no_logs def test_html_parsing(): """Test the constructor for the HTML class.""" _test_resource(FakeHTML, 'doc1.html', _check_doc1) _test_resource(FakeHTML, 'doc1_UTF-16BE.html', _check_doc1, encoding='UTF-16BE') py.path.local(os.path.dirname(__file__)).chdir() filename = os.path.join('resources', 'doc1.html') with open(filename, encoding='utf-8') as fd: string = fd.read() _test_resource(FakeHTML, 'doc1.html', _check_doc1, base_url=filename) _check_doc1(FakeHTML(string=string, base_url=filename)) _check_doc1(FakeHTML(string=string), has_base_url=False) string_with_meta = string.replace( '{css_string}{html_string}', base_url=base_url, media_type='screen' ).write_png() == rotated_png_bytes @assert_no_logs def test_command_line_render(tmpdir): css = b''' @page { margin: 2px; size: 8px; background: #fff } @media screen { img { transform: rotate(-90deg) } } body { margin: 0; font-size: 0 } ''' html = b'
' combined = b'' + html linked = b'' + html py.path.local(resource_filename('')).chdir() # Reference html_obj = FakeHTML(string=combined, base_url='dummy.html') pdf_bytes = html_obj.write_pdf() rotated_pdf_bytes = FakeHTML( string=combined, base_url='dummy.html', media_type='screen').write_pdf() tmpdir.chdir() with open(resource_filename('pattern.png'), 'rb') as pattern_fd: pattern_bytes = pattern_fd.read() tmpdir.join('pattern.png').write_binary(pattern_bytes) tmpdir.join('no_css.html').write_binary(html) tmpdir.join('combined.html').write_binary(combined) tmpdir.join('combined-UTF-16BE.html').write_binary( combined.decode('ascii').encode('UTF-16BE')) tmpdir.join('linked.html').write_binary(linked) tmpdir.join('style.css').write_binary(css) _run('combined.html out2.pdf') assert tmpdir.join('out2.pdf').read_binary() == pdf_bytes _run('combined-UTF-16BE.html out3.pdf --encoding UTF-16BE') assert tmpdir.join('out3.pdf').read_binary() == pdf_bytes _run(tmpdir.join('combined.html').strpath + ' out4.pdf') assert tmpdir.join('out4.pdf').read_binary() == pdf_bytes _run(path2url(tmpdir.join('combined.html').strpath) + ' out5.pdf') assert tmpdir.join('out5.pdf').read_binary() == pdf_bytes _run('linked.html --debug out6.pdf') # test relative URLs assert tmpdir.join('out6.pdf').read_binary() == pdf_bytes _run('combined.html --verbose out7') _run('combined.html --quiet out8') assert tmpdir.join('out7').read_binary() == pdf_bytes assert tmpdir.join('out8').read_binary() == pdf_bytes _run('no_css.html out9.pdf') _run('no_css.html out10.pdf -s style.css') assert tmpdir.join('out9.pdf').read_binary() != pdf_bytes assert tmpdir.join('out10.pdf').read_binary() == pdf_bytes stdout = _run('combined.html -') assert stdout == pdf_bytes _run('- out11.pdf', stdin=combined) assert tmpdir.join('out11.pdf').read_binary() == pdf_bytes stdout = _run('- -', stdin=combined) assert stdout == pdf_bytes _run('combined.html out13.pdf --media-type screen') _run('combined.html out12.pdf -m screen') _run('linked.html out14.pdf -m screen') assert tmpdir.join('out12.pdf').read_binary() == rotated_pdf_bytes assert tmpdir.join('out13.pdf').read_binary() == rotated_pdf_bytes assert tmpdir.join('out14.pdf').read_binary() == rotated_pdf_bytes stdout = _run('combined.html -') assert stdout.count(b'attachment') == 0 stdout = _run('combined.html -') assert stdout.count(b'attachment') == 0 stdout = _run('-a pattern.png combined.html -') assert stdout.count(b'attachment') == 1 stdout = _run('-a style.css -a pattern.png combined.html -') assert stdout.count(b'attachment') == 2 os.mkdir('subdirectory') py.path.local('subdirectory').chdir() with capture_logs() as logs: stdout = _run('- -', stdin=combined) assert len(logs) == 1 assert logs[0].startswith('ERROR: Failed to load image') assert stdout.startswith(b'%PDF') with capture_logs() as logs: stdout = _run('--base-url= - -', stdin=combined) assert len(logs) == 1 assert logs[0].startswith( 'ERROR: Relative URI reference without a base URI') assert stdout.startswith(b'%PDF') stdout = _run('--base-url .. - -', stdin=combined) assert stdout == pdf_bytes with pytest.raises(SystemExit): _run('--info') with pytest.raises(SystemExit): _run('--version') @assert_no_logs def test_unicode_filenames(tmpdir): """Test non-ASCII filenames both in Unicode or bytes form.""" # Replicate pattern.png in CSS so that base_url does not matter. html = b''' ''' png_bytes = FakeHTML(string=html).write_png() check_png_pattern(png_bytes) unicode_filename = 'Unicödé' if sys.platform.startswith('darwin'): # pragma: no cover unicode_filename = unicodedata.normalize('NFD', unicode_filename) tmpdir.chdir() tmpdir.join(unicode_filename).write(html) bytes_file, = tmpdir.listdir() assert bytes_file.basename == unicode_filename assert FakeHTML(unicode_filename).write_png() == png_bytes assert FakeHTML(bytes_file.strpath).write_png() == png_bytes os.remove(unicode_filename) assert tmpdir.listdir() == [] FakeHTML(string=html).write_png(unicode_filename) assert bytes_file.read_binary() == png_bytes @assert_no_logs def test_low_level_api(): html = FakeHTML(string='') css = CSS(string=''' @page { margin: 2px; size: 8px; background: #fff } html { background: #00f; } body { background: #f00; width: 1px; height: 1px } ''') pdf_bytes = html.write_pdf(stylesheets=[css]) assert pdf_bytes.startswith(b'%PDF') # TODO: check PDF content? How? # assert html.render([css]).write_pdf() == pdf_bytes png_bytes = html.write_png(stylesheets=[css]) document = html.render([css]) page, = document.pages assert page.width == 8 assert page.height == 8 assert document.write_png() == png_bytes assert document.copy([page]).write_png() == png_bytes document = html.render([css]) page, = document.pages assert (page.width, page.height) == (8, 8) png_bytes = document.write_png(resolution=192) check_png_pattern(png_bytes, x2=True) document = html.render([css]) page, = document.pages assert (page.width, page.height) == (8, 8) # A resolution that is not multiple of 96: assert _png_size(document.write_png(resolution=145.2)) == (12, 12) document = FakeHTML(string=''' ''').render() page_1, page_2 = document.pages assert (page_1.width, page_1.height) == (5, 10) assert (page_2.width, page_2.height) == (6, 4) result = document.write_png() # (Max of both widths, Sum of both heights) assert _png_size(result) == (6, 14) assert document.copy([page_1, page_2]).write_png() == result assert _png_size(document.copy([page_1]).write_png()) == (5, 10) assert _png_size(document.copy([page_2]).write_png()) == (6, 4) @pytest.mark.parametrize('html, expected_by_page, round_', ( ('''depth 1
depth 2
depth 1
depth 2
depth 3
''', [[ (2, 'A', (0, 0), 'open'), (4, 'B', (0, 20), 'open'), (2, 'C', (0, 40), 'open'), (3, 'D', (0, 60), 'open'), (4, 'E', (0, 80), 'open'), ]], False), ('''h2 depth 1
h4 depth 2
h3 depth 2
h5 depth 3
h1 depth 1
h2 depth 2
h2 depth 2
h4 depth 3
h1 depth 1
''', [[ (2, 'A', (0, 0), 'open'), (4, 'B', (0, 20), 'open'), (3, 'C', (0, 40), 'open'), (5, 'D', (0, 60), 'open'), (1, 'E', (0, 70), 'open'), (2, 'F', (0, 90), 'open'), (2, 'G', (0, 110), 'open'), (4, 'H', (0, 130), 'open'), (1, 'I', (0, 150), 'open'), ]], False), ('Hello, World
''', [ [ ('external', 'http://weasyprint.org', (0, 0, 30, 20), None), ('external', 'http://weasyprint.org', (0, 0, 30, 30), None), ('internal', 'lipsum', (10, 100, 42, 120), None), ('internal', 'lipsum', (10, 100, 42, 132), None) ], [('internal', 'hello', (0, 0, 200, 30), None)], ], [ {'hello': (0, 200)}, {'lipsum': (0, 0)} ], [ ( [ ('external', 'http://weasyprint.org', (0, 0, 30, 20), None), ('external', 'http://weasyprint.org', (0, 0, 30, 30), None), ('internal', 'lipsum', (10, 100, 42, 120), None), ('internal', 'lipsum', (10, 100, 42, 132), None) ], [('hello', 0, 200)], ), ( [ ('internal', 'hello', (0, 0, 200, 30), None) ], [('lipsum', 0, 0)]), ]) assert_links( ''' ''', [[('external', 'http://weasyprint.org/foo/lipsum/%C3%A9_%E9', (5, 10, 195, 10), None)]], [{}], [([('external', 'http://weasyprint.org/foo/lipsum/%C3%A9_%E9', (5, 10, 195, 10), None)], [])], base_url='http://weasyprint.org/foo/bar/') assert_links( '''