1
1
mirror of https://github.com/Kozea/WeasyPrint.git synced 2024-10-05 16:37:47 +03:00

Clean PDF tests

This commit is contained in:
Guillaume Ayoub 2018-03-15 00:26:58 +01:00
parent 8eb5183bea
commit 03b47f6a75

View File

@ -15,6 +15,7 @@ import os
import zlib
import cairocffi
import pytest
from pdfrw import PdfReader
from .. import Attachment
@ -29,42 +30,35 @@ RIGHT = 595
@assert_no_logs
def test_pdf_parser():
fileobj = io.BytesIO()
surface = cairocffi.PDFSurface(fileobj, 1, 1)
for width, height in [
@pytest.mark.parametrize('width, height', (
(100, 100),
(200, 10),
(3.14, 987654321)
]:
(3.14, 987654321),
))
def test_pdf_parser(width, height):
fileobj = io.BytesIO()
surface = cairocffi.PDFSurface(fileobj, 1, 1)
surface.set_size(width, height)
surface.show_page()
surface.finish()
fileobj.seek(0)
sizes = [page.MediaBox for page in PdfReader(fileobj).Root.Pages.Kids]
assert sizes == [
['0', '0', '100', '100'],
['0', '0', '200', '10'],
['0', '0', '3.14', '987654321']
]
surface, = [page.MediaBox for page in PdfReader(fileobj).Root.Pages.Kids]
assert surface == ['0', '0', str(width), str(height)]
@assert_no_logs
def test_page_size():
pdf_bytes = FakeHTML(string='<style>@page{size:3in 4in').write_pdf()
@pytest.mark.parametrize('zoom', (1, 1.5, 0.5))
def test_page_size_zoom(zoom):
pdf_bytes = FakeHTML(
string='<style>@page{size:3in 4in').write_pdf(zoom=zoom)
pdf = PdfReader(fdata=pdf_bytes)
assert pdf.Root.Pages.Kids[0].MediaBox == ['0', '0', '216', '288']
pdf_bytes = FakeHTML(string='<style>@page{size:3in 4in').write_pdf(
zoom=1.5)
pdf = PdfReader(fdata=pdf_bytes)
assert pdf.Root.Pages.Kids[0].MediaBox == ['0', '0', '324', '432']
assert pdf.Root.Pages.Kids[0].MediaBox == [
'0', '0', str(int(216 * zoom)), str(int(288 * zoom))]
@assert_no_logs
def test_bookmarks():
"""Test the structure of the document bookmarks."""
def test_bookmarks_1():
pdf_bytes = FakeHTML(string='''
<h1>a</h1> #
<h4>b</h4> ####
@ -87,13 +81,22 @@ def test_bookmarks():
assert outlines.First.Next.Title == '(e)'
assert outlines.Last.Title == '(e)'
@assert_no_logs
def test_bookmarks_2():
pdf_bytes = FakeHTML(string='<body>').write_pdf()
assert PdfReader(fdata=pdf_bytes).Root.Outlines is None
@assert_no_logs
def test_bookmarks_3():
pdf_bytes = FakeHTML(string='<h1>a nbsp…</h1>').write_pdf()
outlines = PdfReader(fdata=pdf_bytes).Root.Outlines
assert outlines.First.Title.decode() == 'a nbsp…'
@assert_no_logs
def test_bookmarks_4():
pdf_bytes = FakeHTML(string='''
<style>
* { height: 90pt; margin: 0 0 10pt 0 }
@ -147,6 +150,9 @@ def test_bookmarks():
assert outlines.Last.First.Title == '(Title 11)'
assert outlines.Last.Last.Title == '(Title 11)'
@assert_no_logs
def test_bookmarks_5():
pdf_bytes = FakeHTML(string='''
<h2>1</h2> level 1
<h4>2</h4> level 2
@ -167,6 +173,9 @@ def test_bookmarks():
assert outlines.Last.First.Title == '(4)'
assert outlines.Last.First.First.Title == '(5)'
@assert_no_logs
def test_bookmarks_6():
pdf_bytes = FakeHTML(string='''
<h2>1</h2> h2 level 1
<h4>2</h4> h4 level 2
@ -199,6 +208,9 @@ def test_bookmarks():
assert outlines.First.Next.First.Next.First.Title == '(8)'
assert outlines.Last.Title == '(9)'
@assert_no_logs
def test_bookmarks_7():
# Reference for the next test. zoom=1
pdf_bytes = FakeHTML(string='<h2>a</h2>').write_pdf()
outlines = PdfReader(fdata=pdf_bytes).Root.Outlines
@ -286,7 +298,7 @@ def test_links():
@assert_no_logs
def test_relative_links():
def test_relative_links_relative():
# Relative URI reference without a base URI: allowed for anchors
pdf_bytes = FakeHTML(
string='<a href="../lipsum" style="display: block">',
@ -295,6 +307,9 @@ def test_relative_links():
assert link.A == {'/URI': '(../lipsum)', '/S': '/URI', '/Type': '/Action'}
assert [round(float(value)) for value in link.Rect] == [0, TOP, RIGHT, TOP]
@assert_no_logs
def test_relative_links_links():
# Relative URI reference without a base URI: not supported for -weasy-link
with capture_logs() as logs:
pdf_bytes = FakeHTML(
@ -305,6 +320,9 @@ def test_relative_links():
assert 'WARNING: Ignored `-weasy-link: url("../lipsum")`' in logs[0]
assert 'Relative URI reference without a base URI' in logs[0]
@assert_no_logs
def test_relative_links_internal():
# Internal URI reference without a base URI: OK
pdf_bytes = FakeHTML(
string='<a href="#lipsum" id="lipsum" style="display: block">',
@ -316,6 +334,9 @@ def test_relative_links():
assert round(float(link.A.D[3])) == TOP
assert [round(float(value)) for value in link.Rect] == [0, TOP, RIGHT, TOP]
@assert_no_logs
def test_relative_links_anchors():
pdf_bytes = FakeHTML(
string='<div style="-weasy-link: url(#lipsum)" id="lipsum">',
base_url=None).write_pdf()
@ -331,8 +352,7 @@ def test_relative_links():
def test_missing_links():
with capture_logs() as logs:
pdf_bytes = FakeHTML(string='''
<style> a { display: block; height: 15pt; } </style>
<body>
<style> a { display: block; height: 15pt } </style>
<a href="#lipsum"></a>
<a href="#missing" id="lipsum"></a>
''', base_url=None).write_pdf()
@ -348,13 +368,18 @@ def test_missing_links():
@assert_no_logs
def test_jpeg():
def render(html):
return FakeHTML(base_url=resource_filename('dummy.html'),
string=html).write_pdf()
assert b'/Filter /DCTDecode' not in render('<img src="pattern.gif">')
def test_embed_gif():
assert b'/Filter /DCTDecode' not in FakeHTML(
base_url=resource_filename('dummy.html'),
string='<img src="pattern.gif">').write_pdf()
@assert_no_logs
def test_embed_jpeg():
# JPEG-encoded image, embedded in PDF:
assert b'/Filter /DCTDecode' in render('<img src="blue.jpg">')
assert b'/Filter /DCTDecode' in FakeHTML(
base_url=resource_filename('dummy.html'),
string='<img src="blue.jpg">').write_pdf()
@assert_no_logs
@ -381,7 +406,7 @@ def test_document_info():
@assert_no_logs
def test_embedded_files(tmpdir):
def test_embedded_files_attachments(tmpdir):
absolute_tmp_file = tmpdir.join('some_file.txt')
adata = b'12345678'
with open(absolute_tmp_file, 'wb') as afile:
@ -454,12 +479,13 @@ def test_embedded_files(tmpdir):
assert embedded[11].EF.F.Params.CheckSum == (
'<{}>'.format(hashlib.md5(b'file like obj').hexdigest()))
@assert_no_logs
def test_attachments_data():
pdf_bytes = FakeHTML(string='''
<title>Test document 2</title>
<meta charset="utf-8">
<link
rel="attachment"
href="data:,some data">
<link rel="attachment" href="data:,some data">
''').write_pdf()
pdf = PdfReader(fdata=pdf_bytes)
embedded = pdf.Root.Names.EmbeddedFiles.Names
@ -467,6 +493,9 @@ def test_embedded_files(tmpdir):
assert embedded[1].EF.F.Params.CheckSum == (
'<{}>'.format(hashlib.md5(b'some data').hexdigest()))
@assert_no_logs
def test_attachments_none():
pdf_bytes = FakeHTML(string='''
<title>Test document 3</title>
<meta charset="utf-8">
@ -476,6 +505,9 @@ def test_embedded_files(tmpdir):
assert pdf.Root.Names is None
assert pdf.Root.Outlines is not None
@assert_no_logs
def test_attachments_none_empty():
pdf_bytes = FakeHTML(string='''
<title>Test document 4</title>
<meta charset="utf-8">
@ -486,7 +518,7 @@ def test_embedded_files(tmpdir):
@assert_no_logs
def test_annotation_files():
def test_annotations():
pdf_bytes = FakeHTML(string='''
<title>Test document</title>
<meta charset="utf-8">
@ -501,30 +533,24 @@ def test_annotation_files():
assert b'/EmbeddedFiles' not in pdf_bytes
@pytest.mark.parametrize('style, media, bleed, trim', (
('bleed: 30pt; size: 10pt',
['0', '0', '70', '70'],
['20', '20', '50', '50'],
['30', '30', '40', '40']),
('bleed: 15pt 3pt 6pt 18pt; size: 12pt 15pt',
['0', '0', '33', '36'],
['8', '5', '33', '36'],
['18', '15', '30', '30']),
))
@assert_no_logs
def test_bleed():
def test_bleed(style, media, bleed, trim):
pdf_bytes = FakeHTML(string='''
<title>Test document</title>
<style>
@page { bleed: 30pt; size: 10pt }
</style>
<style>@page { %s }</style>
<body>test
''').write_pdf()
''' % style).write_pdf()
pdf = PdfReader(fdata=pdf_bytes)
assert pdf.Root.Pages.Kids[0].MediaBox == ['0', '0', '70', '70']
assert pdf.Root.Pages.Kids[0].BleedBox == ['20', '20', '50', '50']
assert pdf.Root.Pages.Kids[0].TrimBox == ['30', '30', '40', '40']
pdf_bytes = FakeHTML(string='''
<title>Test document</title>
<style>
@page { bleed: 15pt 3pt 6pt 18pt; size: 12pt 15pt }
</style>
<body>test
''').write_pdf()
pdf = PdfReader(fdata=pdf_bytes)
assert pdf.Root.Pages.Kids[0].MediaBox == ['0', '0', '33', '36']
assert pdf.Root.Pages.Kids[0].BleedBox == ['8', '5', '33', '36']
assert pdf.Root.Pages.Kids[0].TrimBox == ['18', '15', '30', '30']
assert pdf.Root.Pages.Kids[0].MediaBox == media
assert pdf.Root.Pages.Kids[0].BleedBox == bleed
assert pdf.Root.Pages.Kids[0].TrimBox == trim