1
1
mirror of https://github.com/Kozea/WeasyPrint.git synced 2024-10-05 08:27:22 +03:00

Clean PDF tests

This commit is contained in:
Guillaume Ayoub 2018-03-15 00:26:58 +01:00
parent 8eb5183bea
commit 03b47f6a75

View File

@ -15,6 +15,7 @@ import os
import zlib
import cairocffi
import pytest
from pdfrw import PdfReader
from .. import Attachment
@ -29,48 +30,41 @@ RIGHT = 595
@assert_no_logs
def test_pdf_parser():
@pytest.mark.parametrize('width, height', (
(100, 100),
(200, 10),
(3.14, 987654321),
))
def test_pdf_parser(width, height):
fileobj = io.BytesIO()
surface = cairocffi.PDFSurface(fileobj, 1, 1)
for width, height in [
(100, 100),
(200, 10),
(3.14, 987654321)
]:
surface.set_size(width, height)
surface.show_page()
surface.set_size(width, height)
surface.show_page()
surface.finish()
fileobj.seek(0)
sizes = [page.MediaBox for page in PdfReader(fileobj).Root.Pages.Kids]
assert sizes == [
['0', '0', '100', '100'],
['0', '0', '200', '10'],
['0', '0', '3.14', '987654321']
]
surface, = [page.MediaBox for page in PdfReader(fileobj).Root.Pages.Kids]
assert surface == ['0', '0', str(width), str(height)]
@assert_no_logs
def test_page_size():
pdf_bytes = FakeHTML(string='<style>@page{size:3in 4in').write_pdf()
@pytest.mark.parametrize('zoom', (1, 1.5, 0.5))
def test_page_size_zoom(zoom):
pdf_bytes = FakeHTML(
string='<style>@page{size:3in 4in').write_pdf(zoom=zoom)
pdf = PdfReader(fdata=pdf_bytes)
assert pdf.Root.Pages.Kids[0].MediaBox == ['0', '0', '216', '288']
pdf_bytes = FakeHTML(string='<style>@page{size:3in 4in').write_pdf(
zoom=1.5)
pdf = PdfReader(fdata=pdf_bytes)
assert pdf.Root.Pages.Kids[0].MediaBox == ['0', '0', '324', '432']
assert pdf.Root.Pages.Kids[0].MediaBox == [
'0', '0', str(int(216 * zoom)), str(int(288 * zoom))]
@assert_no_logs
def test_bookmarks():
"""Test the structure of the document bookmarks."""
def test_bookmarks_1():
pdf_bytes = FakeHTML(string='''
<h1>a</h1> #
<h4>b</h4> ####
<h3>c</h3> ###
<h2>d</h2> ##
<h1>e</h1> #
<h1>a</h1> #
<h4>b</h4> ####
<h3>c</h3> ###
<h2>d</h2> ##
<h1>e</h1> #
''').write_pdf()
outlines = PdfReader(fdata=pdf_bytes).Root.Outlines
# a
@ -87,29 +81,38 @@ def test_bookmarks():
assert outlines.First.Next.Title == '(e)'
assert outlines.Last.Title == '(e)'
@assert_no_logs
def test_bookmarks_2():
pdf_bytes = FakeHTML(string='<body>').write_pdf()
assert PdfReader(fdata=pdf_bytes).Root.Outlines is None
@assert_no_logs
def test_bookmarks_3():
pdf_bytes = FakeHTML(string='<h1>a nbsp…</h1>').write_pdf()
outlines = PdfReader(fdata=pdf_bytes).Root.Outlines
assert outlines.First.Title.decode() == 'a nbsp…'
@assert_no_logs
def test_bookmarks_4():
pdf_bytes = FakeHTML(string='''
<style>
* { height: 90pt; margin: 0 0 10pt 0 }
</style>
<h1>Title 1</h1>
<h1>Title 2</h1>
<h2 style="position: relative; left: 20pt">Title 3</h2>
<h2>Title 4</h2>
<h3>Title 5</h3>
<span style="display: block; page-break-before: always"></span>
<h2>Title 6</h2>
<h1>Title 7</h1>
<h2>Title 8</h2>
<h3>Title 9</h3>
<h1>Title 10</h1>
<h2>Title 11</h2>
<style>
* { height: 90pt; margin: 0 0 10pt 0 }
</style>
<h1>Title 1</h1>
<h1>Title 2</h1>
<h2 style="position: relative; left: 20pt">Title 3</h2>
<h2>Title 4</h2>
<h3>Title 5</h3>
<span style="display: block; page-break-before: always"></span>
<h2>Title 6</h2>
<h1>Title 7</h1>
<h2>Title 8</h2>
<h3>Title 9</h3>
<h1>Title 10</h1>
<h2>Title 11</h2>
''').write_pdf()
outlines = PdfReader(fdata=pdf_bytes).Root.Outlines
# 1
@ -147,12 +150,15 @@ def test_bookmarks():
assert outlines.Last.First.Title == '(Title 11)'
assert outlines.Last.Last.Title == '(Title 11)'
@assert_no_logs
def test_bookmarks_5():
pdf_bytes = FakeHTML(string='''
<h2>1</h2> level 1
<h4>2</h4> level 2
<h2>3</h2> level 1
<h3>4</h3> level 2
<h4>5</h4> level 3
<h2>1</h2> level 1
<h4>2</h4> level 2
<h2>3</h2> level 1
<h3>4</h3> level 2
<h4>5</h4> level 3
''').write_pdf()
outlines = PdfReader(fdata=pdf_bytes).Root.Outlines
# 1
@ -167,16 +173,19 @@ def test_bookmarks():
assert outlines.Last.First.Title == '(4)'
assert outlines.Last.First.First.Title == '(5)'
@assert_no_logs
def test_bookmarks_6():
pdf_bytes = FakeHTML(string='''
<h2>1</h2> h2 level 1
<h4>2</h4> h4 level 2
<h3>3</h3> h3 level 2
<h5>4</h5> h5 level 3
<h1>5</h1> h1 level 1
<h2>6</h2> h2 level 2
<h2>7</h2> h2 level 2
<h4>8</h4> h4 level 3
<h1>9</h1> h1 level 1
<h2>1</h2> h2 level 1
<h4>2</h4> h4 level 2
<h3>3</h3> h3 level 2
<h5>4</h5> h5 level 3
<h1>5</h1> h1 level 1
<h2>6</h2> h2 level 2
<h2>7</h2> h2 level 2
<h4>8</h4> h4 level 3
<h1>9</h1> h1 level 1
''').write_pdf()
# 1
# |_ 2
@ -199,6 +208,9 @@ def test_bookmarks():
assert outlines.First.Next.First.Next.First.Title == '(8)'
assert outlines.Last.Title == '(9)'
@assert_no_logs
def test_bookmarks_7():
# Reference for the next test. zoom=1
pdf_bytes = FakeHTML(string='<h2>a</h2>').write_pdf()
outlines = PdfReader(fdata=pdf_bytes).Root.Outlines
@ -217,20 +229,20 @@ def test_links():
assert PdfReader(fdata=pdf_bytes).Root.Pages.Kids[0].Annots is None
pdf_bytes = FakeHTML(string='''
<style>
body { margin: 0; font-size: 10pt; line-height: 2 }
p { display: block; height: 90pt; margin: 0 0 10pt 0 }
img { width: 30pt; vertical-align: top }
</style>
<p><a href="http://weasyprint.org"><img src=pattern.png></a></p>
<p style="padding: 0 10pt"><a
href="#lipsum"><img style="border: solid 1pt"
src=pattern.png></a></p>
<p id=hello>Hello, World</p>
<p id=lipsum>
<a style="display: block; page-break-before: always; height: 30pt"
href="#hel%6Co"></a>
</p>
<style>
body { margin: 0; font-size: 10pt; line-height: 2 }
p { display: block; height: 90pt; margin: 0 0 10pt 0 }
img { width: 30pt; vertical-align: top }
</style>
<p><a href="http://weasyprint.org"><img src=pattern.png></a></p>
<p style="padding: 0 10pt"><a
href="#lipsum"><img style="border: solid 1pt"
src=pattern.png></a></p>
<p id=hello>Hello, World</p>
<p id=lipsum>
<a style="display: block; page-break-before: always; height: 30pt"
href="#hel%6Co"></a>
</p>
''', base_url=resource_filename('<inline HTML>')).write_pdf()
links = [
annot for page in PdfReader(fdata=pdf_bytes).Root.Pages.Kids
@ -286,7 +298,7 @@ def test_links():
@assert_no_logs
def test_relative_links():
def test_relative_links_relative():
# Relative URI reference without a base URI: allowed for anchors
pdf_bytes = FakeHTML(
string='<a href="../lipsum" style="display: block">',
@ -295,6 +307,9 @@ def test_relative_links():
assert link.A == {'/URI': '(../lipsum)', '/S': '/URI', '/Type': '/Action'}
assert [round(float(value)) for value in link.Rect] == [0, TOP, RIGHT, TOP]
@assert_no_logs
def test_relative_links_links():
# Relative URI reference without a base URI: not supported for -weasy-link
with capture_logs() as logs:
pdf_bytes = FakeHTML(
@ -305,6 +320,9 @@ def test_relative_links():
assert 'WARNING: Ignored `-weasy-link: url("../lipsum")`' in logs[0]
assert 'Relative URI reference without a base URI' in logs[0]
@assert_no_logs
def test_relative_links_internal():
# Internal URI reference without a base URI: OK
pdf_bytes = FakeHTML(
string='<a href="#lipsum" id="lipsum" style="display: block">',
@ -316,6 +334,9 @@ def test_relative_links():
assert round(float(link.A.D[3])) == TOP
assert [round(float(value)) for value in link.Rect] == [0, TOP, RIGHT, TOP]
@assert_no_logs
def test_relative_links_anchors():
pdf_bytes = FakeHTML(
string='<div style="-weasy-link: url(#lipsum)" id="lipsum">',
base_url=None).write_pdf()
@ -331,10 +352,9 @@ def test_relative_links():
def test_missing_links():
with capture_logs() as logs:
pdf_bytes = FakeHTML(string='''
<style> a { display: block; height: 15pt; } </style>
<body>
<a href="#lipsum"></a>
<a href="#missing" id="lipsum"></a>
<style> a { display: block; height: 15pt } </style>
<a href="#lipsum"></a>
<a href="#missing" id="lipsum"></a>
''', base_url=None).write_pdf()
link, = PdfReader(fdata=pdf_bytes).Root.Pages.Kids[0].Annots
assert link.A.S == '/GoTo'
@ -348,27 +368,32 @@ def test_missing_links():
@assert_no_logs
def test_jpeg():
def render(html):
return FakeHTML(base_url=resource_filename('dummy.html'),
string=html).write_pdf()
assert b'/Filter /DCTDecode' not in render('<img src="pattern.gif">')
def test_embed_gif():
assert b'/Filter /DCTDecode' not in FakeHTML(
base_url=resource_filename('dummy.html'),
string='<img src="pattern.gif">').write_pdf()
@assert_no_logs
def test_embed_jpeg():
# JPEG-encoded image, embedded in PDF:
assert b'/Filter /DCTDecode' in render('<img src="blue.jpg">')
assert b'/Filter /DCTDecode' in FakeHTML(
base_url=resource_filename('dummy.html'),
string='<img src="blue.jpg">').write_pdf()
@assert_no_logs
def test_document_info():
pdf_bytes = FakeHTML(string='''
<meta name=author content="I Me &amp; Myself">
<title>Test document</title>
<h1>Another title</h1>
<meta name=generator content="Human after all">
<meta name=keywords content="html ,\tcss,
pdf,css">
<meta name=description content="Blah… ">
<meta name=dcterms.created content=2011-04>
<meta name=dcterms.modified content=2013-07-21T23:46+01:00>
<meta name=author content="I Me &amp; Myself">
<title>Test document</title>
<h1>Another title</h1>
<meta name=generator content="Human after all">
<meta name=keywords content="html ,\tcss,
pdf,css">
<meta name=description content="Blah… ">
<meta name=dcterms.created content=2011-04>
<meta name=dcterms.modified content=2013-07-21T23:46+01:00>
''').write_pdf()
info = PdfReader(fdata=pdf_bytes).Info
assert info.Author.decode() == 'I Me & Myself'
@ -381,7 +406,7 @@ def test_document_info():
@assert_no_logs
def test_embedded_files(tmpdir):
def test_embedded_files_attachments(tmpdir):
absolute_tmp_file = tmpdir.join('some_file.txt')
adata = b'12345678'
with open(absolute_tmp_file, 'wb') as afile:
@ -396,16 +421,16 @@ def test_embedded_files(tmpdir):
pdf_bytes = FakeHTML(
string='''
<title>Test document</title>
<meta charset="utf-8">
<link
rel="attachment"
title="some file attachment äöü"
href="data:,hi%20there">
<link rel="attachment" href="{0}">
<link rel="attachment" href="{1}">
<h1>Heading 1</h1>
<h2>Heading 2</h2>
<title>Test document</title>
<meta charset="utf-8">
<link
rel="attachment"
title="some file attachment äöü"
href="data:,hi%20there">
<link rel="attachment" href="{0}">
<link rel="attachment" href="{1}">
<h1>Heading 1</h1>
<h2>Heading 2</h2>
'''.format(absolute_url, os.path.basename(relative_tmp_file)),
base_url=tmpdir.strpath,
).write_pdf(
@ -454,12 +479,13 @@ def test_embedded_files(tmpdir):
assert embedded[11].EF.F.Params.CheckSum == (
'<{}>'.format(hashlib.md5(b'file like obj').hexdigest()))
@assert_no_logs
def test_attachments_data():
pdf_bytes = FakeHTML(string='''
<title>Test document 2</title>
<meta charset="utf-8">
<link
rel="attachment"
href="data:,some data">
<title>Test document 2</title>
<meta charset="utf-8">
<link rel="attachment" href="data:,some data">
''').write_pdf()
pdf = PdfReader(fdata=pdf_bytes)
embedded = pdf.Root.Names.EmbeddedFiles.Names
@ -467,18 +493,24 @@ def test_embedded_files(tmpdir):
assert embedded[1].EF.F.Params.CheckSum == (
'<{}>'.format(hashlib.md5(b'some data').hexdigest()))
@assert_no_logs
def test_attachments_none():
pdf_bytes = FakeHTML(string='''
<title>Test document 3</title>
<meta charset="utf-8">
<h1>Heading</h1>
<title>Test document 3</title>
<meta charset="utf-8">
<h1>Heading</h1>
''').write_pdf()
pdf = PdfReader(fdata=pdf_bytes)
assert pdf.Root.Names is None
assert pdf.Root.Outlines is not None
@assert_no_logs
def test_attachments_none_empty():
pdf_bytes = FakeHTML(string='''
<title>Test document 4</title>
<meta charset="utf-8">
<title>Test document 4</title>
<meta charset="utf-8">
''').write_pdf()
pdf = PdfReader(fdata=pdf_bytes)
assert pdf.Root.Names is None
@ -486,14 +518,14 @@ def test_embedded_files(tmpdir):
@assert_no_logs
def test_annotation_files():
def test_annotations():
pdf_bytes = FakeHTML(string='''
<title>Test document</title>
<meta charset="utf-8">
<a
rel="attachment"
href="data:,some data"
download>A link that lets you download an attachment</a>
<title>Test document</title>
<meta charset="utf-8">
<a
rel="attachment"
href="data:,some data"
download>A link that lets you download an attachment</a>
''').write_pdf()
assert hashlib.md5(b'some data').hexdigest().encode('ascii') in pdf_bytes
@ -501,30 +533,24 @@ def test_annotation_files():
assert b'/EmbeddedFiles' not in pdf_bytes
@pytest.mark.parametrize('style, media, bleed, trim', (
('bleed: 30pt; size: 10pt',
['0', '0', '70', '70'],
['20', '20', '50', '50'],
['30', '30', '40', '40']),
('bleed: 15pt 3pt 6pt 18pt; size: 12pt 15pt',
['0', '0', '33', '36'],
['8', '5', '33', '36'],
['18', '15', '30', '30']),
))
@assert_no_logs
def test_bleed():
def test_bleed(style, media, bleed, trim):
pdf_bytes = FakeHTML(string='''
<title>Test document</title>
<style>
@page { bleed: 30pt; size: 10pt }
</style>
<body>test
''').write_pdf()
<title>Test document</title>
<style>@page { %s }</style>
<body>test
''' % style).write_pdf()
pdf = PdfReader(fdata=pdf_bytes)
assert pdf.Root.Pages.Kids[0].MediaBox == ['0', '0', '70', '70']
assert pdf.Root.Pages.Kids[0].BleedBox == ['20', '20', '50', '50']
assert pdf.Root.Pages.Kids[0].TrimBox == ['30', '30', '40', '40']
pdf_bytes = FakeHTML(string='''
<title>Test document</title>
<style>
@page { bleed: 15pt 3pt 6pt 18pt; size: 12pt 15pt }
</style>
<body>test
''').write_pdf()
pdf = PdfReader(fdata=pdf_bytes)
assert pdf.Root.Pages.Kids[0].MediaBox == ['0', '0', '33', '36']
assert pdf.Root.Pages.Kids[0].BleedBox == ['8', '5', '33', '36']
assert pdf.Root.Pages.Kids[0].TrimBox == ['18', '15', '30', '30']
assert pdf.Root.Pages.Kids[0].MediaBox == media
assert pdf.Root.Pages.Kids[0].BleedBox == bleed
assert pdf.Root.Pages.Kids[0].TrimBox == trim