Clean PDF tests

2024-10-05 08:27:22 +03:00 · 2018-03-15 00:26:58 +01:00 · 2018-03-15 00:26:58 +01:00 · 03b47f6a75
commit 03b47f6a75
parent 8eb5183bea
1 changed files with 170 additions and 144 deletions
--- a/weasyprint/tests/test_pdf.py
+++ b/weasyprint/tests/test_pdf.py
@ -15,6 +15,7 @@ import os
 import zlib

 import cairocffi
+import pytest
 from pdfrw import PdfReader

 from .. import Attachment
@ -29,48 +30,41 @@ RIGHT = 595


@assert_no_logs
-def test_pdf_parser():
+@pytest.mark.parametrize('width, height', (
+    (100, 100),
+    (200, 10),
+    (3.14, 987654321),
+))
+def test_pdf_parser(width, height):
    fileobj = io.BytesIO()
    surface = cairocffi.PDFSurface(fileobj, 1, 1)
-    for width, height in [
-        (100, 100),
-        (200, 10),
-        (3.14, 987654321)
-    ]:
-        surface.set_size(width, height)
-        surface.show_page()
+    surface.set_size(width, height)
+    surface.show_page()
    surface.finish()

    fileobj.seek(0)
-    sizes = [page.MediaBox for page in PdfReader(fileobj).Root.Pages.Kids]
-    assert sizes == [
-        ['0', '0', '100', '100'],
-        ['0', '0', '200', '10'],
-        ['0', '0', '3.14', '987654321']
-    ]
+    surface, = [page.MediaBox for page in PdfReader(fileobj).Root.Pages.Kids]
+    assert surface == ['0', '0', str(width), str(height)]


@assert_no_logs
-def test_page_size():
-    pdf_bytes = FakeHTML(string='<style>@page{size:3in 4in').write_pdf()
+@pytest.mark.parametrize('zoom', (1, 1.5, 0.5))
+def test_page_size_zoom(zoom):
+    pdf_bytes = FakeHTML(
+        string='<style>@page{size:3in 4in').write_pdf(zoom=zoom)
    pdf = PdfReader(fdata=pdf_bytes)
-    assert pdf.Root.Pages.Kids[0].MediaBox == ['0', '0', '216', '288']
-
-    pdf_bytes = FakeHTML(string='<style>@page{size:3in 4in').write_pdf(
-        zoom=1.5)
-    pdf = PdfReader(fdata=pdf_bytes)
-    assert pdf.Root.Pages.Kids[0].MediaBox == ['0', '0', '324', '432']
+    assert pdf.Root.Pages.Kids[0].MediaBox == [
+        '0', '0', str(int(216 * zoom)), str(int(288 * zoom))]


@assert_no_logs
-def test_bookmarks():
-    """Test the structure of the document bookmarks."""
+def test_bookmarks_1():
    pdf_bytes = FakeHTML(string='''
-        <h1>a</h1>  #
-        <h4>b</h4>  ####
-        <h3>c</h3>  ###
-        <h2>d</h2>  ##
-        <h1>e</h1>  #
+      <h1>a</h1>  #
+      <h4>b</h4>  ####
+      <h3>c</h3>  ###
+      <h2>d</h2>  ##
+      <h1>e</h1>  #
    ''').write_pdf()
    outlines = PdfReader(fdata=pdf_bytes).Root.Outlines
    # a
@ -87,29 +81,38 @@ def test_bookmarks():
    assert outlines.First.Next.Title == '(e)'
    assert outlines.Last.Title == '(e)'

+
+@assert_no_logs
+def test_bookmarks_2():
    pdf_bytes = FakeHTML(string='<body>').write_pdf()
    assert PdfReader(fdata=pdf_bytes).Root.Outlines is None

+
+@assert_no_logs
+def test_bookmarks_3():
    pdf_bytes = FakeHTML(string='<h1>a nbsp…</h1>').write_pdf()
    outlines = PdfReader(fdata=pdf_bytes).Root.Outlines
    assert outlines.First.Title.decode() == 'a nbsp…'

+
+@assert_no_logs
+def test_bookmarks_4():
    pdf_bytes = FakeHTML(string='''
-        <style>
-            * { height: 90pt; margin: 0 0 10pt 0 }
-        </style>
-        <h1>Title 1</h1>
-        <h1>Title 2</h1>
-        <h2 style="position: relative; left: 20pt">Title 3</h2>
-        <h2>Title 4</h2>
-        <h3>Title 5</h3>
-        <span style="display: block; page-break-before: always"></span>
-        <h2>Title 6</h2>
-        <h1>Title 7</h1>
-        <h2>Title 8</h2>
-        <h3>Title 9</h3>
-        <h1>Title 10</h1>
-        <h2>Title 11</h2>
+      <style>
+        * { height: 90pt; margin: 0 0 10pt 0 }
+      </style>
+      <h1>Title 1</h1>
+      <h1>Title 2</h1>
+      <h2 style="position: relative; left: 20pt">Title 3</h2>
+      <h2>Title 4</h2>
+      <h3>Title 5</h3>
+      <span style="display: block; page-break-before: always"></span>
+      <h2>Title 6</h2>
+      <h1>Title 7</h1>
+      <h2>Title 8</h2>
+      <h3>Title 9</h3>
+      <h1>Title 10</h1>
+      <h2>Title 11</h2>
    ''').write_pdf()
    outlines = PdfReader(fdata=pdf_bytes).Root.Outlines
    # 1
@ -147,12 +150,15 @@ def test_bookmarks():
    assert outlines.Last.First.Title == '(Title 11)'
    assert outlines.Last.Last.Title == '(Title 11)'

+
+@assert_no_logs
+def test_bookmarks_5():
    pdf_bytes = FakeHTML(string='''
-        <h2>1</h2> level 1
-        <h4>2</h4> level 2
-        <h2>3</h2> level 1
-        <h3>4</h3> level 2
-        <h4>5</h4> level 3
+      <h2>1</h2> level 1
+      <h4>2</h4> level 2
+      <h2>3</h2> level 1
+      <h3>4</h3> level 2
+      <h4>5</h4> level 3
    ''').write_pdf()
    outlines = PdfReader(fdata=pdf_bytes).Root.Outlines
    # 1
@ -167,16 +173,19 @@ def test_bookmarks():
    assert outlines.Last.First.Title == '(4)'
    assert outlines.Last.First.First.Title == '(5)'

+
+@assert_no_logs
+def test_bookmarks_6():
    pdf_bytes = FakeHTML(string='''
-        <h2>1</h2> h2 level 1
-        <h4>2</h4> h4 level 2
-        <h3>3</h3> h3 level 2
-        <h5>4</h5> h5 level 3
-        <h1>5</h1> h1 level 1
-        <h2>6</h2> h2 level 2
-        <h2>7</h2> h2 level 2
-        <h4>8</h4> h4 level 3
-        <h1>9</h1> h1 level 1
+      <h2>1</h2> h2 level 1
+      <h4>2</h4> h4 level 2
+      <h3>3</h3> h3 level 2
+      <h5>4</h5> h5 level 3
+      <h1>5</h1> h1 level 1
+      <h2>6</h2> h2 level 2
+      <h2>7</h2> h2 level 2
+      <h4>8</h4> h4 level 3
+      <h1>9</h1> h1 level 1
    ''').write_pdf()
    # 1
    # |_ 2
@ -199,6 +208,9 @@ def test_bookmarks():
    assert outlines.First.Next.First.Next.First.Title == '(8)'
    assert outlines.Last.Title == '(9)'

+
+@assert_no_logs
+def test_bookmarks_7():
    # Reference for the next test. zoom=1
    pdf_bytes = FakeHTML(string='<h2>a</h2>').write_pdf()
    outlines = PdfReader(fdata=pdf_bytes).Root.Outlines
@ -217,20 +229,20 @@ def test_links():
    assert PdfReader(fdata=pdf_bytes).Root.Pages.Kids[0].Annots is None

    pdf_bytes = FakeHTML(string='''
-        <style>
-            body { margin: 0; font-size: 10pt; line-height: 2 }
-            p { display: block; height: 90pt; margin: 0 0 10pt 0 }
-            img { width: 30pt; vertical-align: top }
-        </style>
-        <p><a href="http://weasyprint.org"><img src=pattern.png></a></p>
-        <p style="padding: 0 10pt"><a
-            href="#lipsum"><img style="border: solid 1pt"
-                                src=pattern.png></a></p>
-        <p id=hello>Hello, World</p>
-        <p id=lipsum>
-            <a style="display: block; page-break-before: always; height: 30pt"
-               href="#hel%6Co"></a>
-        </p>
+      <style>
+        body { margin: 0; font-size: 10pt; line-height: 2 }
+        p { display: block; height: 90pt; margin: 0 0 10pt 0 }
+        img { width: 30pt; vertical-align: top }
+      </style>
+      <p><a href="http://weasyprint.org"><img src=pattern.png></a></p>
+      <p style="padding: 0 10pt"><a
+         href="#lipsum"><img style="border: solid 1pt"
+                             src=pattern.png></a></p>
+      <p id=hello>Hello, World</p>
+      <p id=lipsum>
+        <a style="display: block; page-break-before: always; height: 30pt"
+           href="#hel%6Co"></a>
+      </p>
    ''', base_url=resource_filename('<inline HTML>')).write_pdf()
    links = [
        annot for page in PdfReader(fdata=pdf_bytes).Root.Pages.Kids
@ -286,7 +298,7 @@ def test_links():


@assert_no_logs
-def test_relative_links():
+def test_relative_links_relative():
    # Relative URI reference without a base URI: allowed for anchors
    pdf_bytes = FakeHTML(
        string='<a href="../lipsum" style="display: block">',
@ -295,6 +307,9 @@ def test_relative_links():
    assert link.A == {'/URI': '(../lipsum)', '/S': '/URI', '/Type': '/Action'}
    assert [round(float(value)) for value in link.Rect] == [0, TOP, RIGHT, TOP]

+
+@assert_no_logs
+def test_relative_links_links():
    # Relative URI reference without a base URI: not supported for -weasy-link
    with capture_logs() as logs:
        pdf_bytes = FakeHTML(
@ -305,6 +320,9 @@ def test_relative_links():
    assert 'WARNING: Ignored `-weasy-link: url("../lipsum")`' in logs[0]
    assert 'Relative URI reference without a base URI' in logs[0]

+
+@assert_no_logs
+def test_relative_links_internal():
    # Internal URI reference without a base URI: OK
    pdf_bytes = FakeHTML(
        string='<a href="#lipsum" id="lipsum" style="display: block">',
@ -316,6 +334,9 @@ def test_relative_links():
    assert round(float(link.A.D[3])) == TOP
    assert [round(float(value)) for value in link.Rect] == [0, TOP, RIGHT, TOP]

+
+@assert_no_logs
+def test_relative_links_anchors():
    pdf_bytes = FakeHTML(
        string='<div style="-weasy-link: url(#lipsum)" id="lipsum">',
        base_url=None).write_pdf()
@ -331,10 +352,9 @@ def test_relative_links():
 def test_missing_links():
    with capture_logs() as logs:
        pdf_bytes = FakeHTML(string='''
-            <style> a { display: block; height: 15pt; } </style>
-            <body>
-                <a href="#lipsum"></a>
-                <a href="#missing" id="lipsum"></a>
+          <style> a { display: block; height: 15pt } </style>
+          <a href="#lipsum"></a>
+          <a href="#missing" id="lipsum"></a>
        ''', base_url=None).write_pdf()
    link, = PdfReader(fdata=pdf_bytes).Root.Pages.Kids[0].Annots
    assert link.A.S == '/GoTo'
@ -348,27 +368,32 @@ def test_missing_links():


@assert_no_logs
-def test_jpeg():
-    def render(html):
-        return FakeHTML(base_url=resource_filename('dummy.html'),
-                        string=html).write_pdf()
-    assert b'/Filter /DCTDecode' not in render('<img src="pattern.gif">')
+def test_embed_gif():
+    assert b'/Filter /DCTDecode' not in FakeHTML(
+        base_url=resource_filename('dummy.html'),
+        string='<img src="pattern.gif">').write_pdf()
+
+
+@assert_no_logs
+def test_embed_jpeg():
    # JPEG-encoded image, embedded in PDF:
-    assert b'/Filter /DCTDecode' in render('<img src="blue.jpg">')
+    assert b'/Filter /DCTDecode' in FakeHTML(
+        base_url=resource_filename('dummy.html'),
+        string='<img src="blue.jpg">').write_pdf()


@assert_no_logs
 def test_document_info():
    pdf_bytes = FakeHTML(string='''
-        <meta name=author content="I Me &amp; Myself">
-        <title>Test document</title>
-        <h1>Another title</h1>
-        <meta name=generator content="Human after all">
-        <meta name=keywords content="html ,\tcss,
-                                     pdf,css">
-        <meta name=description content="Blah… ">
-        <meta name=dcterms.created content=2011-04>
-        <meta name=dcterms.modified content=2013-07-21T23:46+01:00>
+      <meta name=author content="I Me &amp; Myself">
+      <title>Test document</title>
+      <h1>Another title</h1>
+      <meta name=generator content="Human after all">
+      <meta name=keywords content="html ,\tcss,
+                                   pdf,css">
+      <meta name=description content="Blah… ">
+      <meta name=dcterms.created content=2011-04>
+      <meta name=dcterms.modified content=2013-07-21T23:46+01:00>
    ''').write_pdf()
    info = PdfReader(fdata=pdf_bytes).Info
    assert info.Author.decode() == 'I Me & Myself'
@ -381,7 +406,7 @@ def test_document_info():


@assert_no_logs
-def test_embedded_files(tmpdir):
+def test_embedded_files_attachments(tmpdir):
    absolute_tmp_file = tmpdir.join('some_file.txt')
    adata = b'12345678'
    with open(absolute_tmp_file, 'wb') as afile:
@ -396,16 +421,16 @@ def test_embedded_files(tmpdir):

    pdf_bytes = FakeHTML(
        string='''
-            <title>Test document</title>
-            <meta charset="utf-8">
-            <link
-                rel="attachment"
-                title="some file attachment äöü"
-                href="data:,hi%20there">
-            <link rel="attachment" href="{0}">
-            <link rel="attachment" href="{1}">
-            <h1>Heading 1</h1>
-            <h2>Heading 2</h2>
+          <title>Test document</title>
+          <meta charset="utf-8">
+          <link
+            rel="attachment"
+            title="some file attachment äöü"
+            href="data:,hi%20there">
+          <link rel="attachment" href="{0}">
+          <link rel="attachment" href="{1}">
+          <h1>Heading 1</h1>
+          <h2>Heading 2</h2>
        '''.format(absolute_url, os.path.basename(relative_tmp_file)),
        base_url=tmpdir.strpath,
    ).write_pdf(
@ -454,12 +479,13 @@ def test_embedded_files(tmpdir):
    assert embedded[11].EF.F.Params.CheckSum == (
        '<{}>'.format(hashlib.md5(b'file like obj').hexdigest()))

+
+@assert_no_logs
+def test_attachments_data():
    pdf_bytes = FakeHTML(string='''
-        <title>Test document 2</title>
-        <meta charset="utf-8">
-        <link
-            rel="attachment"
-            href="data:,some data">
+      <title>Test document 2</title>
+      <meta charset="utf-8">
+      <link rel="attachment" href="data:,some data">
    ''').write_pdf()
    pdf = PdfReader(fdata=pdf_bytes)
    embedded = pdf.Root.Names.EmbeddedFiles.Names
@ -467,18 +493,24 @@ def test_embedded_files(tmpdir):
    assert embedded[1].EF.F.Params.CheckSum == (
        '<{}>'.format(hashlib.md5(b'some data').hexdigest()))

+
+@assert_no_logs
+def test_attachments_none():
    pdf_bytes = FakeHTML(string='''
-        <title>Test document 3</title>
-        <meta charset="utf-8">
-        <h1>Heading</h1>
+      <title>Test document 3</title>
+      <meta charset="utf-8">
+      <h1>Heading</h1>
    ''').write_pdf()
    pdf = PdfReader(fdata=pdf_bytes)
    assert pdf.Root.Names is None
    assert pdf.Root.Outlines is not None

+
+@assert_no_logs
+def test_attachments_none_empty():
    pdf_bytes = FakeHTML(string='''
-        <title>Test document 4</title>
-        <meta charset="utf-8">
+      <title>Test document 4</title>
+      <meta charset="utf-8">
    ''').write_pdf()
    pdf = PdfReader(fdata=pdf_bytes)
    assert pdf.Root.Names is None
@ -486,14 +518,14 @@ def test_embedded_files(tmpdir):


@assert_no_logs
-def test_annotation_files():
+def test_annotations():
    pdf_bytes = FakeHTML(string='''
-        <title>Test document</title>
-        <meta charset="utf-8">
-        <a
-            rel="attachment"
-            href="data:,some data"
-            download>A link that lets you download an attachment</a>
+      <title>Test document</title>
+      <meta charset="utf-8">
+      <a
+        rel="attachment"
+        href="data:,some data"
+        download>A link that lets you download an attachment</a>
    ''').write_pdf()

    assert hashlib.md5(b'some data').hexdigest().encode('ascii') in pdf_bytes
@ -501,30 +533,24 @@ def test_annotation_files():
    assert b'/EmbeddedFiles' not in pdf_bytes


+@pytest.mark.parametrize('style, media, bleed, trim', (
+    ('bleed: 30pt; size: 10pt',
+     ['0', '0', '70', '70'],
+     ['20', '20', '50', '50'],
+     ['30', '30', '40', '40']),
+    ('bleed: 15pt 3pt 6pt 18pt; size: 12pt 15pt',
+     ['0', '0', '33', '36'],
+     ['8', '5', '33', '36'],
+     ['18', '15', '30', '30']),
+))
@assert_no_logs
-def test_bleed():
+def test_bleed(style, media, bleed, trim):
    pdf_bytes = FakeHTML(string='''
-        <title>Test document</title>
-        <style>
-            @page { bleed: 30pt; size: 10pt }
-        </style>
-        <body>test
-    ''').write_pdf()
-
+      <title>Test document</title>
+      <style>@page { %s }</style>
+      <body>test
+    ''' % style).write_pdf()
    pdf = PdfReader(fdata=pdf_bytes)
-    assert pdf.Root.Pages.Kids[0].MediaBox == ['0', '0', '70', '70']
-    assert pdf.Root.Pages.Kids[0].BleedBox == ['20', '20', '50', '50']
-    assert pdf.Root.Pages.Kids[0].TrimBox == ['30', '30', '40', '40']
-
-    pdf_bytes = FakeHTML(string='''
-        <title>Test document</title>
-        <style>
-            @page { bleed: 15pt 3pt 6pt 18pt; size: 12pt 15pt }
-        </style>
-        <body>test
-    ''').write_pdf()
-
-    pdf = PdfReader(fdata=pdf_bytes)
-    assert pdf.Root.Pages.Kids[0].MediaBox == ['0', '0', '33', '36']
-    assert pdf.Root.Pages.Kids[0].BleedBox == ['8', '5', '33', '36']
-    assert pdf.Root.Pages.Kids[0].TrimBox == ['18', '15', '30', '30']
+    assert pdf.Root.Pages.Kids[0].MediaBox == media
+    assert pdf.Root.Pages.Kids[0].BleedBox == bleed
+    assert pdf.Root.Pages.Kids[0].TrimBox == trim