mirror of
https://github.com/Kozea/WeasyPrint.git
synced 2024-10-05 08:27:22 +03:00
Clean PDF tests
This commit is contained in:
parent
8eb5183bea
commit
03b47f6a75
@ -15,6 +15,7 @@ import os
|
||||
import zlib
|
||||
|
||||
import cairocffi
|
||||
import pytest
|
||||
from pdfrw import PdfReader
|
||||
|
||||
from .. import Attachment
|
||||
@ -29,48 +30,41 @@ RIGHT = 595
|
||||
|
||||
|
||||
@assert_no_logs
|
||||
def test_pdf_parser():
|
||||
@pytest.mark.parametrize('width, height', (
|
||||
(100, 100),
|
||||
(200, 10),
|
||||
(3.14, 987654321),
|
||||
))
|
||||
def test_pdf_parser(width, height):
|
||||
fileobj = io.BytesIO()
|
||||
surface = cairocffi.PDFSurface(fileobj, 1, 1)
|
||||
for width, height in [
|
||||
(100, 100),
|
||||
(200, 10),
|
||||
(3.14, 987654321)
|
||||
]:
|
||||
surface.set_size(width, height)
|
||||
surface.show_page()
|
||||
surface.set_size(width, height)
|
||||
surface.show_page()
|
||||
surface.finish()
|
||||
|
||||
fileobj.seek(0)
|
||||
sizes = [page.MediaBox for page in PdfReader(fileobj).Root.Pages.Kids]
|
||||
assert sizes == [
|
||||
['0', '0', '100', '100'],
|
||||
['0', '0', '200', '10'],
|
||||
['0', '0', '3.14', '987654321']
|
||||
]
|
||||
surface, = [page.MediaBox for page in PdfReader(fileobj).Root.Pages.Kids]
|
||||
assert surface == ['0', '0', str(width), str(height)]
|
||||
|
||||
|
||||
@assert_no_logs
|
||||
def test_page_size():
|
||||
pdf_bytes = FakeHTML(string='<style>@page{size:3in 4in').write_pdf()
|
||||
@pytest.mark.parametrize('zoom', (1, 1.5, 0.5))
|
||||
def test_page_size_zoom(zoom):
|
||||
pdf_bytes = FakeHTML(
|
||||
string='<style>@page{size:3in 4in').write_pdf(zoom=zoom)
|
||||
pdf = PdfReader(fdata=pdf_bytes)
|
||||
assert pdf.Root.Pages.Kids[0].MediaBox == ['0', '0', '216', '288']
|
||||
|
||||
pdf_bytes = FakeHTML(string='<style>@page{size:3in 4in').write_pdf(
|
||||
zoom=1.5)
|
||||
pdf = PdfReader(fdata=pdf_bytes)
|
||||
assert pdf.Root.Pages.Kids[0].MediaBox == ['0', '0', '324', '432']
|
||||
assert pdf.Root.Pages.Kids[0].MediaBox == [
|
||||
'0', '0', str(int(216 * zoom)), str(int(288 * zoom))]
|
||||
|
||||
|
||||
@assert_no_logs
|
||||
def test_bookmarks():
|
||||
"""Test the structure of the document bookmarks."""
|
||||
def test_bookmarks_1():
|
||||
pdf_bytes = FakeHTML(string='''
|
||||
<h1>a</h1> #
|
||||
<h4>b</h4> ####
|
||||
<h3>c</h3> ###
|
||||
<h2>d</h2> ##
|
||||
<h1>e</h1> #
|
||||
<h1>a</h1> #
|
||||
<h4>b</h4> ####
|
||||
<h3>c</h3> ###
|
||||
<h2>d</h2> ##
|
||||
<h1>e</h1> #
|
||||
''').write_pdf()
|
||||
outlines = PdfReader(fdata=pdf_bytes).Root.Outlines
|
||||
# a
|
||||
@ -87,29 +81,38 @@ def test_bookmarks():
|
||||
assert outlines.First.Next.Title == '(e)'
|
||||
assert outlines.Last.Title == '(e)'
|
||||
|
||||
|
||||
@assert_no_logs
|
||||
def test_bookmarks_2():
|
||||
pdf_bytes = FakeHTML(string='<body>').write_pdf()
|
||||
assert PdfReader(fdata=pdf_bytes).Root.Outlines is None
|
||||
|
||||
|
||||
@assert_no_logs
|
||||
def test_bookmarks_3():
|
||||
pdf_bytes = FakeHTML(string='<h1>a nbsp…</h1>').write_pdf()
|
||||
outlines = PdfReader(fdata=pdf_bytes).Root.Outlines
|
||||
assert outlines.First.Title.decode() == 'a nbsp…'
|
||||
|
||||
|
||||
@assert_no_logs
|
||||
def test_bookmarks_4():
|
||||
pdf_bytes = FakeHTML(string='''
|
||||
<style>
|
||||
* { height: 90pt; margin: 0 0 10pt 0 }
|
||||
</style>
|
||||
<h1>Title 1</h1>
|
||||
<h1>Title 2</h1>
|
||||
<h2 style="position: relative; left: 20pt">Title 3</h2>
|
||||
<h2>Title 4</h2>
|
||||
<h3>Title 5</h3>
|
||||
<span style="display: block; page-break-before: always"></span>
|
||||
<h2>Title 6</h2>
|
||||
<h1>Title 7</h1>
|
||||
<h2>Title 8</h2>
|
||||
<h3>Title 9</h3>
|
||||
<h1>Title 10</h1>
|
||||
<h2>Title 11</h2>
|
||||
<style>
|
||||
* { height: 90pt; margin: 0 0 10pt 0 }
|
||||
</style>
|
||||
<h1>Title 1</h1>
|
||||
<h1>Title 2</h1>
|
||||
<h2 style="position: relative; left: 20pt">Title 3</h2>
|
||||
<h2>Title 4</h2>
|
||||
<h3>Title 5</h3>
|
||||
<span style="display: block; page-break-before: always"></span>
|
||||
<h2>Title 6</h2>
|
||||
<h1>Title 7</h1>
|
||||
<h2>Title 8</h2>
|
||||
<h3>Title 9</h3>
|
||||
<h1>Title 10</h1>
|
||||
<h2>Title 11</h2>
|
||||
''').write_pdf()
|
||||
outlines = PdfReader(fdata=pdf_bytes).Root.Outlines
|
||||
# 1
|
||||
@ -147,12 +150,15 @@ def test_bookmarks():
|
||||
assert outlines.Last.First.Title == '(Title 11)'
|
||||
assert outlines.Last.Last.Title == '(Title 11)'
|
||||
|
||||
|
||||
@assert_no_logs
|
||||
def test_bookmarks_5():
|
||||
pdf_bytes = FakeHTML(string='''
|
||||
<h2>1</h2> level 1
|
||||
<h4>2</h4> level 2
|
||||
<h2>3</h2> level 1
|
||||
<h3>4</h3> level 2
|
||||
<h4>5</h4> level 3
|
||||
<h2>1</h2> level 1
|
||||
<h4>2</h4> level 2
|
||||
<h2>3</h2> level 1
|
||||
<h3>4</h3> level 2
|
||||
<h4>5</h4> level 3
|
||||
''').write_pdf()
|
||||
outlines = PdfReader(fdata=pdf_bytes).Root.Outlines
|
||||
# 1
|
||||
@ -167,16 +173,19 @@ def test_bookmarks():
|
||||
assert outlines.Last.First.Title == '(4)'
|
||||
assert outlines.Last.First.First.Title == '(5)'
|
||||
|
||||
|
||||
@assert_no_logs
|
||||
def test_bookmarks_6():
|
||||
pdf_bytes = FakeHTML(string='''
|
||||
<h2>1</h2> h2 level 1
|
||||
<h4>2</h4> h4 level 2
|
||||
<h3>3</h3> h3 level 2
|
||||
<h5>4</h5> h5 level 3
|
||||
<h1>5</h1> h1 level 1
|
||||
<h2>6</h2> h2 level 2
|
||||
<h2>7</h2> h2 level 2
|
||||
<h4>8</h4> h4 level 3
|
||||
<h1>9</h1> h1 level 1
|
||||
<h2>1</h2> h2 level 1
|
||||
<h4>2</h4> h4 level 2
|
||||
<h3>3</h3> h3 level 2
|
||||
<h5>4</h5> h5 level 3
|
||||
<h1>5</h1> h1 level 1
|
||||
<h2>6</h2> h2 level 2
|
||||
<h2>7</h2> h2 level 2
|
||||
<h4>8</h4> h4 level 3
|
||||
<h1>9</h1> h1 level 1
|
||||
''').write_pdf()
|
||||
# 1
|
||||
# |_ 2
|
||||
@ -199,6 +208,9 @@ def test_bookmarks():
|
||||
assert outlines.First.Next.First.Next.First.Title == '(8)'
|
||||
assert outlines.Last.Title == '(9)'
|
||||
|
||||
|
||||
@assert_no_logs
|
||||
def test_bookmarks_7():
|
||||
# Reference for the next test. zoom=1
|
||||
pdf_bytes = FakeHTML(string='<h2>a</h2>').write_pdf()
|
||||
outlines = PdfReader(fdata=pdf_bytes).Root.Outlines
|
||||
@ -217,20 +229,20 @@ def test_links():
|
||||
assert PdfReader(fdata=pdf_bytes).Root.Pages.Kids[0].Annots is None
|
||||
|
||||
pdf_bytes = FakeHTML(string='''
|
||||
<style>
|
||||
body { margin: 0; font-size: 10pt; line-height: 2 }
|
||||
p { display: block; height: 90pt; margin: 0 0 10pt 0 }
|
||||
img { width: 30pt; vertical-align: top }
|
||||
</style>
|
||||
<p><a href="http://weasyprint.org"><img src=pattern.png></a></p>
|
||||
<p style="padding: 0 10pt"><a
|
||||
href="#lipsum"><img style="border: solid 1pt"
|
||||
src=pattern.png></a></p>
|
||||
<p id=hello>Hello, World</p>
|
||||
<p id=lipsum>
|
||||
<a style="display: block; page-break-before: always; height: 30pt"
|
||||
href="#hel%6Co"></a>
|
||||
</p>
|
||||
<style>
|
||||
body { margin: 0; font-size: 10pt; line-height: 2 }
|
||||
p { display: block; height: 90pt; margin: 0 0 10pt 0 }
|
||||
img { width: 30pt; vertical-align: top }
|
||||
</style>
|
||||
<p><a href="http://weasyprint.org"><img src=pattern.png></a></p>
|
||||
<p style="padding: 0 10pt"><a
|
||||
href="#lipsum"><img style="border: solid 1pt"
|
||||
src=pattern.png></a></p>
|
||||
<p id=hello>Hello, World</p>
|
||||
<p id=lipsum>
|
||||
<a style="display: block; page-break-before: always; height: 30pt"
|
||||
href="#hel%6Co"></a>
|
||||
</p>
|
||||
''', base_url=resource_filename('<inline HTML>')).write_pdf()
|
||||
links = [
|
||||
annot for page in PdfReader(fdata=pdf_bytes).Root.Pages.Kids
|
||||
@ -286,7 +298,7 @@ def test_links():
|
||||
|
||||
|
||||
@assert_no_logs
|
||||
def test_relative_links():
|
||||
def test_relative_links_relative():
|
||||
# Relative URI reference without a base URI: allowed for anchors
|
||||
pdf_bytes = FakeHTML(
|
||||
string='<a href="../lipsum" style="display: block">',
|
||||
@ -295,6 +307,9 @@ def test_relative_links():
|
||||
assert link.A == {'/URI': '(../lipsum)', '/S': '/URI', '/Type': '/Action'}
|
||||
assert [round(float(value)) for value in link.Rect] == [0, TOP, RIGHT, TOP]
|
||||
|
||||
|
||||
@assert_no_logs
|
||||
def test_relative_links_links():
|
||||
# Relative URI reference without a base URI: not supported for -weasy-link
|
||||
with capture_logs() as logs:
|
||||
pdf_bytes = FakeHTML(
|
||||
@ -305,6 +320,9 @@ def test_relative_links():
|
||||
assert 'WARNING: Ignored `-weasy-link: url("../lipsum")`' in logs[0]
|
||||
assert 'Relative URI reference without a base URI' in logs[0]
|
||||
|
||||
|
||||
@assert_no_logs
|
||||
def test_relative_links_internal():
|
||||
# Internal URI reference without a base URI: OK
|
||||
pdf_bytes = FakeHTML(
|
||||
string='<a href="#lipsum" id="lipsum" style="display: block">',
|
||||
@ -316,6 +334,9 @@ def test_relative_links():
|
||||
assert round(float(link.A.D[3])) == TOP
|
||||
assert [round(float(value)) for value in link.Rect] == [0, TOP, RIGHT, TOP]
|
||||
|
||||
|
||||
@assert_no_logs
|
||||
def test_relative_links_anchors():
|
||||
pdf_bytes = FakeHTML(
|
||||
string='<div style="-weasy-link: url(#lipsum)" id="lipsum">',
|
||||
base_url=None).write_pdf()
|
||||
@ -331,10 +352,9 @@ def test_relative_links():
|
||||
def test_missing_links():
|
||||
with capture_logs() as logs:
|
||||
pdf_bytes = FakeHTML(string='''
|
||||
<style> a { display: block; height: 15pt; } </style>
|
||||
<body>
|
||||
<a href="#lipsum"></a>
|
||||
<a href="#missing" id="lipsum"></a>
|
||||
<style> a { display: block; height: 15pt } </style>
|
||||
<a href="#lipsum"></a>
|
||||
<a href="#missing" id="lipsum"></a>
|
||||
''', base_url=None).write_pdf()
|
||||
link, = PdfReader(fdata=pdf_bytes).Root.Pages.Kids[0].Annots
|
||||
assert link.A.S == '/GoTo'
|
||||
@ -348,27 +368,32 @@ def test_missing_links():
|
||||
|
||||
|
||||
@assert_no_logs
|
||||
def test_jpeg():
|
||||
def render(html):
|
||||
return FakeHTML(base_url=resource_filename('dummy.html'),
|
||||
string=html).write_pdf()
|
||||
assert b'/Filter /DCTDecode' not in render('<img src="pattern.gif">')
|
||||
def test_embed_gif():
|
||||
assert b'/Filter /DCTDecode' not in FakeHTML(
|
||||
base_url=resource_filename('dummy.html'),
|
||||
string='<img src="pattern.gif">').write_pdf()
|
||||
|
||||
|
||||
@assert_no_logs
|
||||
def test_embed_jpeg():
|
||||
# JPEG-encoded image, embedded in PDF:
|
||||
assert b'/Filter /DCTDecode' in render('<img src="blue.jpg">')
|
||||
assert b'/Filter /DCTDecode' in FakeHTML(
|
||||
base_url=resource_filename('dummy.html'),
|
||||
string='<img src="blue.jpg">').write_pdf()
|
||||
|
||||
|
||||
@assert_no_logs
|
||||
def test_document_info():
|
||||
pdf_bytes = FakeHTML(string='''
|
||||
<meta name=author content="I Me & Myself">
|
||||
<title>Test document</title>
|
||||
<h1>Another title</h1>
|
||||
<meta name=generator content="Human after all">
|
||||
<meta name=keywords content="html ,\tcss,
|
||||
pdf,css">
|
||||
<meta name=description content="Blah… ">
|
||||
<meta name=dcterms.created content=2011-04>
|
||||
<meta name=dcterms.modified content=2013-07-21T23:46+01:00>
|
||||
<meta name=author content="I Me & Myself">
|
||||
<title>Test document</title>
|
||||
<h1>Another title</h1>
|
||||
<meta name=generator content="Human after all">
|
||||
<meta name=keywords content="html ,\tcss,
|
||||
pdf,css">
|
||||
<meta name=description content="Blah… ">
|
||||
<meta name=dcterms.created content=2011-04>
|
||||
<meta name=dcterms.modified content=2013-07-21T23:46+01:00>
|
||||
''').write_pdf()
|
||||
info = PdfReader(fdata=pdf_bytes).Info
|
||||
assert info.Author.decode() == 'I Me & Myself'
|
||||
@ -381,7 +406,7 @@ def test_document_info():
|
||||
|
||||
|
||||
@assert_no_logs
|
||||
def test_embedded_files(tmpdir):
|
||||
def test_embedded_files_attachments(tmpdir):
|
||||
absolute_tmp_file = tmpdir.join('some_file.txt')
|
||||
adata = b'12345678'
|
||||
with open(absolute_tmp_file, 'wb') as afile:
|
||||
@ -396,16 +421,16 @@ def test_embedded_files(tmpdir):
|
||||
|
||||
pdf_bytes = FakeHTML(
|
||||
string='''
|
||||
<title>Test document</title>
|
||||
<meta charset="utf-8">
|
||||
<link
|
||||
rel="attachment"
|
||||
title="some file attachment äöü"
|
||||
href="data:,hi%20there">
|
||||
<link rel="attachment" href="{0}">
|
||||
<link rel="attachment" href="{1}">
|
||||
<h1>Heading 1</h1>
|
||||
<h2>Heading 2</h2>
|
||||
<title>Test document</title>
|
||||
<meta charset="utf-8">
|
||||
<link
|
||||
rel="attachment"
|
||||
title="some file attachment äöü"
|
||||
href="data:,hi%20there">
|
||||
<link rel="attachment" href="{0}">
|
||||
<link rel="attachment" href="{1}">
|
||||
<h1>Heading 1</h1>
|
||||
<h2>Heading 2</h2>
|
||||
'''.format(absolute_url, os.path.basename(relative_tmp_file)),
|
||||
base_url=tmpdir.strpath,
|
||||
).write_pdf(
|
||||
@ -454,12 +479,13 @@ def test_embedded_files(tmpdir):
|
||||
assert embedded[11].EF.F.Params.CheckSum == (
|
||||
'<{}>'.format(hashlib.md5(b'file like obj').hexdigest()))
|
||||
|
||||
|
||||
@assert_no_logs
|
||||
def test_attachments_data():
|
||||
pdf_bytes = FakeHTML(string='''
|
||||
<title>Test document 2</title>
|
||||
<meta charset="utf-8">
|
||||
<link
|
||||
rel="attachment"
|
||||
href="data:,some data">
|
||||
<title>Test document 2</title>
|
||||
<meta charset="utf-8">
|
||||
<link rel="attachment" href="data:,some data">
|
||||
''').write_pdf()
|
||||
pdf = PdfReader(fdata=pdf_bytes)
|
||||
embedded = pdf.Root.Names.EmbeddedFiles.Names
|
||||
@ -467,18 +493,24 @@ def test_embedded_files(tmpdir):
|
||||
assert embedded[1].EF.F.Params.CheckSum == (
|
||||
'<{}>'.format(hashlib.md5(b'some data').hexdigest()))
|
||||
|
||||
|
||||
@assert_no_logs
|
||||
def test_attachments_none():
|
||||
pdf_bytes = FakeHTML(string='''
|
||||
<title>Test document 3</title>
|
||||
<meta charset="utf-8">
|
||||
<h1>Heading</h1>
|
||||
<title>Test document 3</title>
|
||||
<meta charset="utf-8">
|
||||
<h1>Heading</h1>
|
||||
''').write_pdf()
|
||||
pdf = PdfReader(fdata=pdf_bytes)
|
||||
assert pdf.Root.Names is None
|
||||
assert pdf.Root.Outlines is not None
|
||||
|
||||
|
||||
@assert_no_logs
|
||||
def test_attachments_none_empty():
|
||||
pdf_bytes = FakeHTML(string='''
|
||||
<title>Test document 4</title>
|
||||
<meta charset="utf-8">
|
||||
<title>Test document 4</title>
|
||||
<meta charset="utf-8">
|
||||
''').write_pdf()
|
||||
pdf = PdfReader(fdata=pdf_bytes)
|
||||
assert pdf.Root.Names is None
|
||||
@ -486,14 +518,14 @@ def test_embedded_files(tmpdir):
|
||||
|
||||
|
||||
@assert_no_logs
|
||||
def test_annotation_files():
|
||||
def test_annotations():
|
||||
pdf_bytes = FakeHTML(string='''
|
||||
<title>Test document</title>
|
||||
<meta charset="utf-8">
|
||||
<a
|
||||
rel="attachment"
|
||||
href="data:,some data"
|
||||
download>A link that lets you download an attachment</a>
|
||||
<title>Test document</title>
|
||||
<meta charset="utf-8">
|
||||
<a
|
||||
rel="attachment"
|
||||
href="data:,some data"
|
||||
download>A link that lets you download an attachment</a>
|
||||
''').write_pdf()
|
||||
|
||||
assert hashlib.md5(b'some data').hexdigest().encode('ascii') in pdf_bytes
|
||||
@ -501,30 +533,24 @@ def test_annotation_files():
|
||||
assert b'/EmbeddedFiles' not in pdf_bytes
|
||||
|
||||
|
||||
@pytest.mark.parametrize('style, media, bleed, trim', (
|
||||
('bleed: 30pt; size: 10pt',
|
||||
['0', '0', '70', '70'],
|
||||
['20', '20', '50', '50'],
|
||||
['30', '30', '40', '40']),
|
||||
('bleed: 15pt 3pt 6pt 18pt; size: 12pt 15pt',
|
||||
['0', '0', '33', '36'],
|
||||
['8', '5', '33', '36'],
|
||||
['18', '15', '30', '30']),
|
||||
))
|
||||
@assert_no_logs
|
||||
def test_bleed():
|
||||
def test_bleed(style, media, bleed, trim):
|
||||
pdf_bytes = FakeHTML(string='''
|
||||
<title>Test document</title>
|
||||
<style>
|
||||
@page { bleed: 30pt; size: 10pt }
|
||||
</style>
|
||||
<body>test
|
||||
''').write_pdf()
|
||||
|
||||
<title>Test document</title>
|
||||
<style>@page { %s }</style>
|
||||
<body>test
|
||||
''' % style).write_pdf()
|
||||
pdf = PdfReader(fdata=pdf_bytes)
|
||||
assert pdf.Root.Pages.Kids[0].MediaBox == ['0', '0', '70', '70']
|
||||
assert pdf.Root.Pages.Kids[0].BleedBox == ['20', '20', '50', '50']
|
||||
assert pdf.Root.Pages.Kids[0].TrimBox == ['30', '30', '40', '40']
|
||||
|
||||
pdf_bytes = FakeHTML(string='''
|
||||
<title>Test document</title>
|
||||
<style>
|
||||
@page { bleed: 15pt 3pt 6pt 18pt; size: 12pt 15pt }
|
||||
</style>
|
||||
<body>test
|
||||
''').write_pdf()
|
||||
|
||||
pdf = PdfReader(fdata=pdf_bytes)
|
||||
assert pdf.Root.Pages.Kids[0].MediaBox == ['0', '0', '33', '36']
|
||||
assert pdf.Root.Pages.Kids[0].BleedBox == ['8', '5', '33', '36']
|
||||
assert pdf.Root.Pages.Kids[0].TrimBox == ['18', '15', '30', '30']
|
||||
assert pdf.Root.Pages.Kids[0].MediaBox == media
|
||||
assert pdf.Root.Pages.Kids[0].BleedBox == bleed
|
||||
assert pdf.Root.Pages.Kids[0].TrimBox == trim
|
||||
|
Loading…
Reference in New Issue
Block a user