1
1
mirror of https://github.com/Kozea/WeasyPrint.git synced 2024-10-05 08:27:22 +03:00
WeasyPrint/weasyprint/tests/test_pdf.py
Guillaume Ayoub b40ad461be Remove repeated copyright assignments in files
Everything is replaced by a more explicit message in the COPYRIGHT file.

Inspired by rust-lang/rust#43498 and rust-lang/rust#57108.
2020-01-02 14:10:09 +01:00

746 lines
25 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
weasyprint.tests.test_pdf
-------------------------
Test PDF-related code, including metadata, bookmarks and hyperlinks.
"""
import hashlib
import io
import os
import re
import cairocffi
import pytest
from .. import Attachment, pdf
from ..urls import path2url
from .testing_utils import (
FakeHTML, assert_no_logs, capture_logs, requires, resource_filename)
# Top of the page is 297mm ~= 842pt
TOP = 842
# Right of the page is 210mm ~= 595pt
RIGHT = 595
def assert_rect_almost_equal(rect, values):
"""Test that PDF rect string equals given values.
We avoid rounding errors by allowing a delta of 1, as both WeasyPrint and
cairo round coordinates in unpredictable ways.
"""
if isinstance(rect, bytes):
rect = rect.decode('ascii')
for a, b in zip(rect.strip(' []').split(), values):
assert abs(int(a) - b) <= 1
@assert_no_logs
@pytest.mark.parametrize('width, height', (
(100, 100),
(200, 10),
(3.14, 987654321),
))
def test_pdf_parser(width, height):
fileobj = io.BytesIO()
surface = cairocffi.PDFSurface(fileobj, 1, 1)
surface.set_size(width, height)
surface.show_page()
surface.finish()
sizes = [page.get_value('MediaBox', '\\[(.+?)\\]').strip()
for page in pdf.PDFFile(fileobj).pages]
assert sizes == ['0 0 {} {}'.format(width, height).encode('ascii')]
@assert_no_logs
@pytest.mark.parametrize('zoom', (1, 1.5, 0.5))
def test_page_size_zoom(zoom):
pdf_bytes = FakeHTML(
string='<style>@page{size:3in 4in').write_pdf(zoom=zoom)
assert '/MediaBox [ 0 0 {} {} ]'.format(
int(216 * zoom), int(288 * zoom)).encode('ascii') in pdf_bytes
@assert_no_logs
@requires('cairo', (1, 15, 4))
def test_bookmarks_1():
fileobj = io.BytesIO()
FakeHTML(string='''
<h1>a</h1> #
<h4>b</h4> ####
<h3>c</h3> ###
<h2>d</h2> ##
<h1>e</h1> #
''').write_pdf(target=fileobj)
# a
# |_ b
# |_ c
# L_ d
# e
pdf_file = pdf.PDFFile(fileobj)
outlines = pdf_file.catalog.get_indirect_dict('Outlines', pdf_file)
assert outlines.get_type() == 'Outlines'
assert outlines.get_value('Count', '(.*)') == b'-5'
o1 = outlines.get_indirect_dict('First', pdf_file)
assert o1.get_value('Title', '(.*)') == b'(a)'
o11 = o1.get_indirect_dict('First', pdf_file)
assert o11.get_value('Title', '(.*)') == b'(b)'
o12 = o11.get_indirect_dict('Next', pdf_file)
assert o12.get_value('Title', '(.*)') == b'(c)'
o13 = o12.get_indirect_dict('Next', pdf_file)
assert o13.get_value('Title', '(.*)') == b'(d)'
o2 = o1.get_indirect_dict('Next', pdf_file)
assert o2.get_value('Title', '(.*)') == b'(e)'
@assert_no_logs
def test_bookmarks_2():
fileobj = io.BytesIO()
FakeHTML(string='<body>').write_pdf(target=fileobj)
pdf_file = pdf.PDFFile(fileobj)
with pytest.raises(AttributeError):
pdf_file.catalog.get_indirect_dict('Outlines', pdf_file)
@assert_no_logs
@requires('cairo', (1, 15, 4))
def test_bookmarks_3():
fileobj = io.BytesIO()
FakeHTML(string='<h1>a nbsp…</h1>').write_pdf(target=fileobj)
pdf_file = pdf.PDFFile(fileobj)
outlines = pdf_file.catalog.get_indirect_dict('Outlines', pdf_file)
o1 = outlines.get_indirect_dict('First', pdf_file)
# <FEFF006100A0006E0062007300702026> is the PDF representation of a nbsp…
assert (
o1.get_value('Title', '(.*)') == b'<FEFF006100A0006E0062007300702026>')
@assert_no_logs
@requires('cairo', (1, 15, 4))
def test_bookmarks_4():
fileobj = io.BytesIO()
FakeHTML(string='''
<style>
* { height: 90pt; margin: 0 0 10pt 0 }
</style>
<h1>Title 1</h1>
<h1>Title 2</h1>
<h2 style="position: relative; left: 20pt">Title 3</h2>
<h2>Title 4</h2>
<h3>Title 5</h3>
<span style="display: block; page-break-before: always"></span>
<h2>Title 6</h2>
<h1>Title 7</h1>
<h2>Title 8</h2>
<h3>Title 9</h3>
<h1>Title 10</h1>
<h2>Title 11</h2>
''').write_pdf(target=fileobj)
# 1
# 2
# |_ 3
# |_ 4
# | L_ 5
# L_ 6
# 7
# L_ 8
# L_ 9
# 10
# L_ 11
pdf_file = pdf.PDFFile(fileobj)
outlines = pdf_file.catalog.get_indirect_dict('Outlines', pdf_file)
assert outlines.get_type() == 'Outlines'
assert outlines.get_value('Count', '(.*)') == b'-11'
o1 = outlines.get_indirect_dict('First', pdf_file)
assert o1.get_value('Title', '(.*)') == b'(Title 1)'
o2 = o1.get_indirect_dict('Next', pdf_file)
assert o2.get_value('Title', '(.*)') == b'(Title 2)'
assert o2.get_value('Count', '(.*)') == b'4'
o3 = o2.get_indirect_dict('First', pdf_file)
assert o3.get_value('Title', '(.*)') == b'(Title 3)'
o4 = o3.get_indirect_dict('Next', pdf_file)
assert o4.get_value('Title', '(.*)') == b'(Title 4)'
assert o4.get_value('Count', '(.*)') == b'1'
o5 = o4.get_indirect_dict('First', pdf_file)
assert o5.get_value('Title', '(.*)') == b'(Title 5)'
o6 = o4.get_indirect_dict('Next', pdf_file)
assert o6.get_value('Title', '(.*)') == b'(Title 6)'
o7 = o2.get_indirect_dict('Next', pdf_file)
assert o7.get_value('Title', '(.*)') == b'(Title 7)'
assert o7.get_value('Count', '(.*)') == b'2'
o8 = o7.get_indirect_dict('First', pdf_file)
assert o8.get_value('Title', '(.*)') == b'(Title 8)'
assert o8.get_value('Count', '(.*)') == b'1'
o9 = o8.get_indirect_dict('First', pdf_file)
assert o9.get_value('Title', '(.*)') == b'(Title 9)'
o10 = o7.get_indirect_dict('Next', pdf_file)
assert o10.get_value('Title', '(.*)') == b'(Title 10)'
assert o10.get_value('Count', '(.*)') == b'1'
o11 = o10.get_indirect_dict('First', pdf_file)
assert o11.get_value('Title', '(.*)') == b'(Title 11)'
@assert_no_logs
@requires('cairo', (1, 15, 4))
def test_bookmarks_5():
fileobj = io.BytesIO()
FakeHTML(string='''
<h2>1</h2> level 1
<h4>2</h4> level 2
<h2>3</h2> level 1
<h3>4</h3> level 2
<h4>5</h4> level 3
''').write_pdf(target=fileobj)
# 1
# L_ 2
# 3
# L_ 4
# L_ 5
pdf_file = pdf.PDFFile(fileobj)
outlines = pdf_file.catalog.get_indirect_dict('Outlines', pdf_file)
assert outlines.get_type() == 'Outlines'
assert outlines.get_value('Count', '(.*)') == b'-5'
o1 = outlines.get_indirect_dict('First', pdf_file)
assert o1.get_value('Title', '(.*)') == b'(1)'
o2 = o1.get_indirect_dict('First', pdf_file)
assert o2.get_value('Title', '(.*)') == b'(2)'
o3 = o1.get_indirect_dict('Next', pdf_file)
assert o3.get_value('Title', '(.*)') == b'(3)'
o4 = o3.get_indirect_dict('First', pdf_file)
assert o4.get_value('Title', '(.*)') == b'(4)'
o5 = o4.get_indirect_dict('First', pdf_file)
assert o5.get_value('Title', '(.*)') == b'(5)'
@assert_no_logs
@requires('cairo', (1, 15, 4))
def test_bookmarks_6():
fileobj = io.BytesIO()
FakeHTML(string='''
<h2>1</h2> h2 level 1
<h4>2</h4> h4 level 2
<h3>3</h3> h3 level 2
<h5>4</h5> h5 level 3
<h1>5</h1> h1 level 1
<h2>6</h2> h2 level 2
<h2>7</h2> h2 level 2
<h4>8</h4> h4 level 3
<h1>9</h1> h1 level 1
''').write_pdf(target=fileobj)
# 1
# |_ 2
# L_ 3
# L_ 4
# 5
# |_ 6
# L_ 7
# L_ 8
# 9
pdf_file = pdf.PDFFile(fileobj)
outlines = pdf_file.catalog.get_indirect_dict('Outlines', pdf_file)
assert outlines.get_type() == 'Outlines'
assert outlines.get_value('Count', '(.*)') == b'-9'
o1 = outlines.get_indirect_dict('First', pdf_file)
assert o1.get_value('Title', '(.*)') == b'(1)'
o2 = o1.get_indirect_dict('First', pdf_file)
assert o2.get_value('Title', '(.*)') == b'(2)'
o3 = o2.get_indirect_dict('Next', pdf_file)
assert o3.get_value('Title', '(.*)') == b'(3)'
o4 = o3.get_indirect_dict('First', pdf_file)
assert o4.get_value('Title', '(.*)') == b'(4)'
o5 = o1.get_indirect_dict('Next', pdf_file)
assert o5.get_value('Title', '(.*)') == b'(5)'
o6 = o5.get_indirect_dict('First', pdf_file)
assert o6.get_value('Title', '(.*)') == b'(6)'
o7 = o6.get_indirect_dict('Next', pdf_file)
assert o7.get_value('Title', '(.*)') == b'(7)'
o8 = o7.get_indirect_dict('First', pdf_file)
assert o8.get_value('Title', '(.*)') == b'(8)'
o9 = o5.get_indirect_dict('Next', pdf_file)
assert o9.get_value('Title', '(.*)') == b'(9)'
@assert_no_logs
@requires('cairo', (1, 15, 4))
def test_bookmarks_7():
# Reference for the next test. zoom=1
fileobj = io.BytesIO()
FakeHTML(string='<h2>a</h2>').write_pdf(target=fileobj)
pdf_file = pdf.PDFFile(fileobj)
outlines = pdf_file.catalog.get_indirect_dict('Outlines', pdf_file)
assert outlines.get_type() == 'Outlines'
o1 = outlines.get_indirect_dict('First', pdf_file)
assert o1.get_value('Title', '(.*)') == b'(a)'
y = float(o1.get_value('Dest', '\\[(.+?)\\]').strip().split()[-2])
fileobj = io.BytesIO()
FakeHTML(string='<h2>a</h2>').write_pdf(zoom=1.5, target=fileobj)
pdf_file = pdf.PDFFile(fileobj)
pdf_file = pdf.PDFFile(fileobj)
outlines = pdf_file.catalog.get_indirect_dict('Outlines', pdf_file)
assert outlines.get_type() == 'Outlines'
o1 = outlines.get_indirect_dict('First', pdf_file)
assert o1.get_value('Title', '(.*)') == b'(a)'
assert (
float(o1.get_value('Dest', '\\[(.+?)\\]').strip().split()[-2]) ==
round(y * 1.5))
@assert_no_logs
@requires('cairo', (1, 15, 4))
def test_bookmarks_8():
fileobj = io.BytesIO()
FakeHTML(string='''
<h1>a</h1>
<h2>b</h2>
<h3>c</h3>
<h2 style="bookmark-state: closed">d</h2>
<h3>e</h3>
<h4>f</h4>
<h1>g</h1>
''').write_pdf(target=fileobj)
# a
# |_ b
# | |_ c
# |_ d (closed)
# | |_ e
# | |_ f
# g
pdf_file = pdf.PDFFile(fileobj)
outlines = pdf_file.catalog.get_indirect_dict('Outlines', pdf_file)
assert outlines.get_type() == 'Outlines'
# d is closed, the number of displayed outlines is len(a, b, c, d, g) == 5
assert outlines.get_value('Count', '(.*)') == b'-5'
o1 = outlines.get_indirect_dict('First', pdf_file)
assert o1.get_value('Title', '(.*)') == b'(a)'
o11 = o1.get_indirect_dict('First', pdf_file)
assert o11.get_value('Title', '(.*)') == b'(b)'
o111 = o11.get_indirect_dict('First', pdf_file)
assert o111.get_value('Title', '(.*)') == b'(c)'
o12 = o11.get_indirect_dict('Next', pdf_file)
assert o12.get_value('Title', '(.*)') == b'(d)'
o121 = o12.get_indirect_dict('First', pdf_file)
assert o121.get_value('Title', '(.*)') == b'(e)'
o1211 = o121.get_indirect_dict('First', pdf_file)
assert o1211.get_value('Title', '(.*)') == b'(f)'
o2 = o1.get_indirect_dict('Next', pdf_file)
assert o2.get_value('Title', '(.*)') == b'(g)'
@assert_no_logs
def test_links_none():
fileobj = io.BytesIO()
FakeHTML(string='<body>').write_pdf(target=fileobj)
pdf_file = pdf.PDFFile(fileobj)
with pytest.raises(AttributeError):
pdf_file.pages[0].get_indirect_dict_array('Annots', pdf_file)
@assert_no_logs
@requires('cairo', (1, 15, 4))
def test_links():
fileobj = io.BytesIO()
FakeHTML(string='''
<style>
body { margin: 0; font-size: 10pt; line-height: 2 }
p { display: block; height: 90pt; margin: 0 0 10pt 0 }
img { width: 30pt; vertical-align: top }
</style>
<p><a href="http://weasyprint.org"><img src=pattern.png></a></p>
<p style="padding: 0 10pt"><a
href="#lipsum"><img style="border: solid 1pt"
src=pattern.png></a></p>
<p id=hello>Hello, World</p>
<p id=lipsum>
<a style="display: block; page-break-before: always; height: 30pt"
href="#hel%6Co"></a>a
</p>
''', base_url=resource_filename('<inline HTML>')).write_pdf(target=fileobj)
pdf_file = pdf.PDFFile(fileobj)
links = [
annot for page in pdf_file.pages
for annot in page.get_indirect_dict_array('Annots', pdf_file)]
# 30pt wide (like the image), 20pt high (like line-height)
assert links[0].get_value('URI', '(.*)') == b'(http://weasyprint.org)'
assert links[0].get_value('S', '(.*)') == b'/URI'
assert_rect_almost_equal(
links[0].get_value('Rect', '(.*)'), (0, TOP - 20, 30, TOP))
# The image itself: 30*30pt
assert links[1].get_value('URI', '(.*)') == b'(http://weasyprint.org)'
assert links[1].get_value('S', '(.*)') == b'/URI'
assert_rect_almost_equal(
links[1].get_value('Rect', '(.*)'), (0, TOP - 30, 30, TOP))
# 32pt wide (image + 2 * 1pt of border), 20pt high
# TODO: replace these commented tests now that we use named destinations
# assert links[2].get_value('Subtype', '(.*)') == b'/Link'
# dest = links[2].get_value('Dest', '(.*)').strip(b'[]').split()
# assert dest[-4] == b'/XYZ'
# assert [round(float(value)) for value in dest[-3:]] == […]
assert_rect_almost_equal(
links[2].get_value('Rect', '(.*)'),
(10, TOP - 100 - 20, 10 + 32, TOP - 100))
# The image itself: 32*32pt
# TODO: same as above
# assert links[3].get_value('Subtype', '(.*)') == b'/Link'
# dest = links[3].get_value('Dest', '(.*)').strip(b'[]').split()
# assert dest[-4] == b'/XYZ'
# assert [round(float(value)) for value in dest[-3:]] == […]
assert_rect_almost_equal(
links[3].get_value('Rect', '(.*)'),
(10, TOP - 100 - 32, 10 + 32, TOP - 100))
# 100% wide (block), 30pt high
assert links[4].get_value('Subtype', '(.*)') == b'/Link'
dest = links[4].get_value('Dest', '(.*)').strip(b'[]').split()
assert dest == [b'(hello)']
names = (
pdf_file.catalog
.get_indirect_dict('Names', pdf_file)
.get_indirect_dict('Dests', pdf_file)
.byte_string).decode('ascii')
assert_rect_almost_equal(
re.search(
'\\(hello\\) \\[\\d+ \\d+ R /XYZ (\\d+ \\d+ \\d+)]', names
).group(1),
(0, TOP - 200, 0))
assert_rect_almost_equal(
links[4].get_value('Rect', '(.*)'), (0, TOP - 30, RIGHT, TOP))
# 100% wide (block), 0pt high
fileobj = io.BytesIO()
FakeHTML(
string='<a href="../lipsum" style="display: block"></a>a',
base_url='http://weasyprint.org/foo/bar/').write_pdf(target=fileobj)
pdf_file = pdf.PDFFile(fileobj)
link, = [
annot for page in pdf_file.pages
for annot in page.get_indirect_dict_array('Annots', pdf_file)]
assert (
link.get_value('URI', '(.*)') == b'(http://weasyprint.org/foo/lipsum)')
assert link.get_value('S', '(.*)') == b'/URI'
assert_rect_almost_equal(
link.get_value('Rect', '(.*)'), (0, TOP, RIGHT, TOP))
@assert_no_logs
@requires('cairo', (1, 15, 4))
def test_relative_links():
# Relative URI reference without a base URI: allowed for anchors
fileobj = io.BytesIO()
FakeHTML(
string='<a href="../lipsum" style="display: block"></a>a',
base_url=None).write_pdf(target=fileobj)
pdf_file = pdf.PDFFile(fileobj)
annots = pdf_file.pages[0].get_indirect_dict_array('Annots', pdf_file)[0]
assert annots.get_value('URI', '(.*)') == b'(../lipsum)'
assert annots.get_value('S', '(.*)') == b'/URI'
assert_rect_almost_equal(
annots.get_value('Rect', '(.*)'), (0, TOP, RIGHT, TOP))
@assert_no_logs
def test_relative_links_missing_base():
# Relative URI reference without a base URI: not supported for -weasy-link
fileobj = io.BytesIO()
with capture_logs() as logs:
FakeHTML(
string='<div style="-weasy-link: url(../lipsum)">',
base_url=None).write_pdf(target=fileobj)
pdf_file = pdf.PDFFile(fileobj)
with pytest.raises(AttributeError):
pdf_file.pages[0].get_indirect_dict_array('Annots', pdf_file)
assert len(logs) == 1
assert 'WARNING: Ignored `-weasy-link: url("../lipsum")`' in logs[0]
assert 'Relative URI reference without a base URI' in logs[0]
@assert_no_logs
@requires('cairo', (1, 15, 4))
def test_relative_links_internal():
# Internal URI reference without a base URI: OK
fileobj = io.BytesIO()
FakeHTML(
string='<a href="#lipsum" id="lipsum" style="display: block"></a>a',
base_url=None).write_pdf(target=fileobj)
pdf_file = pdf.PDFFile(fileobj)
annots = pdf_file.pages[0].get_indirect_dict_array('Annots', pdf_file)[0]
dest = annots.get_value('Dest', '(.*)')
assert dest == b'(lipsum)'
names = (
pdf_file.catalog
.get_indirect_dict('Names', pdf_file)
.get_indirect_dict('Dests', pdf_file)
.byte_string).decode('ascii')
assert_rect_almost_equal(
re.search(
'\\(lipsum\\) \\[\\d+ \\d+ R /XYZ (\\d+ \\d+ \\d+)]', names
).group(1),
(0, TOP, 0))
assert_rect_almost_equal(
annots.get_value('Rect', '(.*)'), (0, TOP, RIGHT, TOP))
@assert_no_logs
@requires('cairo', (1, 15, 4))
def test_relative_links_anchors():
fileobj = io.BytesIO()
FakeHTML(
string='<div style="-weasy-link: url(#lipsum)" id="lipsum"></div>a',
base_url=None).write_pdf(target=fileobj)
pdf_file = pdf.PDFFile(fileobj)
annots = pdf_file.pages[0].get_indirect_dict_array('Annots', pdf_file)[0]
dest = annots.get_value('Dest', '(.*)')
assert dest == b'(lipsum)'
names = (
pdf_file.catalog
.get_indirect_dict('Names', pdf_file)
.get_indirect_dict('Dests', pdf_file)
.byte_string).decode('ascii')
assert_rect_almost_equal(
re.search(
'\\(lipsum\\) \\[\\d+ \\d+ R /XYZ (\\d+ \\d+ \\d+)]', names
).group(1),
(0, TOP, 0))
assert_rect_almost_equal(
annots.get_value('Rect', '(.*)'), (0, TOP, RIGHT, TOP))
@assert_no_logs
@requires('cairo', (1, 15, 4))
def test_missing_links():
fileobj = io.BytesIO()
with capture_logs() as logs:
FakeHTML(string='''
<style> a { display: block; height: 15pt } </style>
<a href="#lipsum"></a>
<a href="#missing" id="lipsum"></a>a
''', base_url=None).write_pdf(target=fileobj)
pdf_file = pdf.PDFFile(fileobj)
annots = pdf_file.pages[0].get_indirect_dict_array('Annots', pdf_file)[0]
dest = annots.get_value('Dest', '(.*)')
assert dest == b'(lipsum)'
names = (
pdf_file.catalog
.get_indirect_dict('Names', pdf_file)
.get_indirect_dict('Dests', pdf_file)
.byte_string).decode('ascii')
assert_rect_almost_equal(
re.search(
'\\(lipsum\\) \\[\\d+ \\d+ R /XYZ (\\d+ \\d+ \\d+)]', names
).group(1),
(0, TOP - 15, 0))
assert_rect_almost_equal(
annots.get_value('Rect', '(.*)'), (0, TOP - 15, RIGHT, TOP))
assert len(logs) == 1
assert 'ERROR: No anchor #missing for internal URI reference' in logs[0]
@assert_no_logs
def test_embed_gif():
assert b'/Filter /DCTDecode' not in FakeHTML(
base_url=resource_filename('dummy.html'),
string='<img src="pattern.gif">').write_pdf()
@assert_no_logs
def test_embed_jpeg():
# JPEG-encoded image, embedded in PDF:
assert b'/Filter /DCTDecode' in FakeHTML(
base_url=resource_filename('dummy.html'),
string='<img src="blue.jpg">').write_pdf()
@assert_no_logs
@requires('cairo', (1, 15, 4))
def test_document_info():
fileobj = io.BytesIO()
FakeHTML(string='''
<meta name=author content="I Me &amp; Myself">
<title>Test document</title>
<h1>Another title</h1>
<meta name=generator content="Human after all">
<meta name=keywords content="html ,\tcss,
pdf,css">
<meta name=description content="Blah… ">
<meta name=dcterms.created content=2011-04-21T23:00:00Z>
<meta name=dcterms.modified content=2013-07-21T23:46+01:00>
''').write_pdf(target=fileobj)
info = pdf.PDFFile(fileobj).info
assert info.get_value('Author', '(.*)') == b'(I Me & Myself)'
assert info.get_value('Title', '(.*)') == b'(Test document)'
assert info.get_value('Creator', '(.*)') == (
b'<FEFF00480075006D0061006E00A00061006600740065007200A00061006C006C>')
assert info.get_value('Keywords', '(.*)') == b'(html, css, pdf)'
assert info.get_value('Subject', '(.*)') == (
b'<FEFF0042006C0061006820260020>')
assert info.get_value('CreationDate', '(.*)') == b"(20110421230000+00'00)"
assert info.get_value('ModDate', '(.*)') == b"(20130721234600+01'00)"
@assert_no_logs
@requires('cairo', (1, 15, 4))
def test_embedded_files_attachments(tmpdir):
absolute_tmp_file = tmpdir.join('some_file.txt').strpath
adata = b'12345678'
with open(absolute_tmp_file, 'wb') as afile:
afile.write(adata)
absolute_url = path2url(absolute_tmp_file)
assert absolute_url.startswith('file://')
relative_tmp_file = tmpdir.join('äöü.txt').strpath
rdata = b'abcdefgh'
with open(relative_tmp_file, 'wb') as rfile:
rfile.write(rdata)
fileobj = io.BytesIO()
FakeHTML(
string='''
<title>Test document</title>
<meta charset="utf-8">
<link
rel="attachment"
title="some file attachment äöü"
href="data:,hi%20there">
<link rel="attachment" href="{0}">
<link rel="attachment" href="{1}">
<h1>Heading 1</h1>
<h2>Heading 2</h2>
'''.format(absolute_url, os.path.basename(relative_tmp_file)),
base_url=tmpdir.strpath,
).write_pdf(
target=fileobj,
attachments=[
Attachment('data:,oob attachment', description='Hello'),
'data:,raw URL',
io.BytesIO(b'file like obj')
]
)
pdf_bytes = fileobj.getvalue()
assert (
'<{}>'.format(hashlib.md5(b'hi there').hexdigest()).encode('ascii')
in pdf_bytes)
assert b'/F ()' in pdf_bytes
assert (
b'/UF (\xfe\xff\x00a\x00t\x00t\x00a\x00c\x00h\x00m\x00e\x00n'
b'\x00t\x00.\x00b\x00i\x00n)' in pdf_bytes)
assert (
b'/Desc (\xfe\xff\x00s\x00o\x00m\x00e\x00 \x00f\x00i\x00l\x00e'
b'\x00 \x00a\x00t\x00t\x00a\x00c\x00h\x00m\x00e\x00n\x00t\x00 '
b'\x00\xe4\x00\xf6\x00\xfc)' in pdf_bytes)
assert hashlib.md5(adata).hexdigest().encode('ascii') in pdf_bytes
assert (
os.path.basename(absolute_tmp_file).encode('utf-16-be')
in pdf_bytes)
assert hashlib.md5(rdata).hexdigest().encode('ascii') in pdf_bytes
assert (
os.path.basename(relative_tmp_file).encode('utf-16-be')
in pdf_bytes)
assert (
hashlib.md5(b'oob attachment').hexdigest().encode('ascii')
in pdf_bytes)
assert b'/Desc (\xfe\xff\x00H\x00e\x00l\x00l\x00o)' in pdf_bytes
assert (
hashlib.md5(b'raw URL').hexdigest().encode('ascii')
in pdf_bytes)
assert (
hashlib.md5(b'file like obj').hexdigest().encode('ascii')
in pdf_bytes)
assert b'/EmbeddedFiles' in pdf_bytes
assert b'/Outlines' in pdf_bytes
@assert_no_logs
def test_attachments_data():
fileobj = io.BytesIO()
FakeHTML(string='''
<title>Test document 2</title>
<meta charset="utf-8">
<link rel="attachment" href="data:,some data">
''').write_pdf(target=fileobj)
md5 = '<{}>'.format(hashlib.md5(b'some data').hexdigest()).encode('ascii')
assert md5 in fileobj.getvalue()
@assert_no_logs
@requires('cairo', (1, 15, 4))
def test_attachments_none():
fileobj = io.BytesIO()
FakeHTML(string='''
<title>Test document 3</title>
<meta charset="utf-8">
<h1>Heading</h1>
''').write_pdf(target=fileobj)
pdf_bytes = fileobj.getvalue()
assert b'Names' not in pdf_bytes
assert b'Outlines' in pdf_bytes
@assert_no_logs
def test_attachments_none_empty():
fileobj = io.BytesIO()
FakeHTML(string='''
<title>Test document 3</title>
<meta charset="utf-8">
''').write_pdf(target=fileobj)
pdf_bytes = fileobj.getvalue()
assert b'Names' not in pdf_bytes
assert b'Outlines' not in pdf_bytes
@assert_no_logs
def test_annotations():
pdf_bytes = FakeHTML(string='''
<title>Test document</title>
<meta charset="utf-8">
<a
rel="attachment"
href="data:,some data"
download>A link that lets you download an attachment</a>
''').write_pdf()
assert hashlib.md5(b'some data').hexdigest().encode('ascii') in pdf_bytes
assert b'/FileAttachment' in pdf_bytes
assert b'/EmbeddedFiles' not in pdf_bytes
@pytest.mark.parametrize('style, media, bleed, trim', (
('bleed: 30pt; size: 10pt',
[0, 0, 70, 70],
[20.0, 20.0, 50.0, 50.0],
[30.0, 30.0, 40.0, 40.0]),
('bleed: 15pt 3pt 6pt 18pt; size: 12pt 15pt',
[0, 0, 33, 36],
[8.0, 5.0, 33.0, 36.0],
[18.0, 15.0, 30.0, 30.0]),
))
@assert_no_logs
def test_bleed(style, media, bleed, trim):
fileobj = io.BytesIO()
FakeHTML(string='''
<title>Test document</title>
<style>@page { %s }</style>
<body>test
''' % style).write_pdf(target=fileobj)
pdf_bytes = fileobj.getvalue()
assert (
'/MediaBox [ {} {} {} {} ]'.format(*media).encode('ascii')
in pdf_bytes)
assert (
'/BleedBox [ {} {} {} {} ]'.format(*bleed).encode('ascii')
in pdf_bytes)
assert (
'/TrimBox [ {} {} {} {} ]'.format(*trim).encode('ascii')
in pdf_bytes)