Merge branch 'master' into table-breaks

Conflicts: weasyprint/css/validation.py
2024-10-05 00:21:15 +03:00 · 2012-03-21 17:06:47 +01:00 · 2012-03-21 17:06:47 +01:00 · e23c38bfd2
commit e23c38bfd2
parent f4a143a642 c521322ee7
9 changed files with 97 additions and 70 deletions
--- a/1
+++ b/1
@ -2,6 +2,7 @@ Version 0.7, released on 2012-XX-XX
 ===================================

 * Support for the ``orphans`` and ``widows`` properties.
+* Support for ``page-break-inside: avoid``


 Version 0.6.1, released on 2012-03-01
--- a/weasyprint/css/validation.py
+++ b/weasyprint/css/validation.py
@ -629,9 +629,7 @@ def page_break(keyword):
@single_keyword
 def page_break_inside(keyword):
    """Validation for the ``page-break-inside`` property."""
-    if keyword == 'avoid':
-        raise InvalidValues('value not supported yet')
-    return keyword in ('auto',)
+    return keyword in ('auto', 'avoid')


@validator()
@ -872,7 +870,10 @@ def generic_expander(*expanded_names):
                results = {}
                for new_name, new_values in wrapped(name, values):
                    assert new_name in expanded_names, new_name
-                    assert new_name not in results, new_name
+                    if new_name in results:
+                        raise InvalidValues(
+                            'got multiple %s values in a %s shorthand'
+                            % (new_name.strip('_'), name))
                    results[new_name] = new_values

            for new_name in expanded_names:
--- a/weasyprint/images.py
+++ b/weasyprint/images.py
@ -63,10 +63,6 @@ def cairosvg_handler(file_like, uri):

    This handler uses CairoSVG: http://cairosvg.org/
    """
-    try:
-        import cairosvg as _
-    except ImportError as exception:
-        return exception
    from cairosvg.surface import SVGSurface
    from cairosvg.parser import Tree, ParseError

@ -84,12 +80,9 @@ def cairosvg_handler(file_like, uri):
        # Don’t pass data URIs to CairoSVG.
        # They are useless for relative URIs anyway.
        uri = None
-    try:
-        # Draw to a cairo surface but do not write to a file
-        tree = Tree(file_obj=file_like, url=uri)
-        surface = ScaledSVGSurface(tree, output=None, dpi=96)
-    except (ParseError, NotImplementedError) as exception:
-        return exception
+    # Draw to a cairo surface but do not write to a file
+    tree = Tree(file_obj=file_like, url=uri)
+    surface = ScaledSVGSurface(tree, output=None, dpi=96)
    pattern = cairo.SurfacePattern(surface.cairo)
    return pattern, surface.width, surface.height

@ -101,50 +94,27 @@ def fallback_handler(file_like, uri):
    PIL supports many raster image formats and does not take a `format`
    parameter, it guesses the format from the content.
    """
-    try:
-        import pystacia as _
-    except ImportError as exception:
-        return exception
    from pystacia import read_blob
    from pystacia.util import TinyException
-    try:
-        with contextlib.closing(read_blob(file_like.read())) as image:
-            png_bytes = image.get_blob('png')
-    except TinyException as exception:
-        return exception
-    else:
-        return png_handler(BytesIO(png_bytes), uri)
+    with contextlib.closing(read_blob(file_like.read())) as image:
+        png_bytes = image.get_blob('png')
+    return png_handler(BytesIO(png_bytes), uri)


 def get_image_from_uri(uri, type_=None):
    """Get a :class:`cairo.Surface`` from an image URI."""
    try:
        file_like, mime_type, _charset = urlopen(uri)
-    except (IOError, ValueError) as exc:
-        LOGGER.warn('Error while fetching an image from %s : %r', uri, exc)
-        return None
-
-    if not type_:
-        type_ = mime_type  # Use eg. the HTTP header
-    #else: the type was forced by eg. a 'type' attribute on <embed>
-    handler = FORMAT_HANDLERS.get(type_, fallback_handler)
-    exception = None
-    try:
-        image = handler(file_like, uri)
-    except (IOError, MemoryError) as e:
-        exception = e # Network or parsing error
-    else:
-        if isinstance(image, Exception):
-            exception = image
+        if not type_:
+            type_ = mime_type  # Use eg. the HTTP header
+        #else: the type was forced by eg. a 'type' attribute on <embed>
+        handler = FORMAT_HANDLERS.get(type_, fallback_handler)
+        return handler(file_like, uri)
+    except Exception as exc:
+        LOGGER.warn('Error for image at %s : %r', uri, exc)
    finally:
        try:
            file_like.close()
        except Exception:
-            # Do not hide a more relevant exception.
+            # May already be closed or something. This is just cleanup anyway.
            pass
-
-    if exception is None:
-        return image
-    else:
-        LOGGER.warn('Error while parsing an image at %s : %r', uri, exception)
-        return None
--- a/weasyprint/layout/blocks.py
+++ b/weasyprint/layout/blocks.py
@ -258,7 +258,7 @@ def block_level_height(document, box, max_position_y, skip_stack,
            if is_page_break:
                break
        else:
-            if new_children and not page_is_empty:
+            if new_children:
                # between siblings, but not before the first child
                # or after the last child.
                break_here, next_page = forced_page_break(
@ -271,7 +271,8 @@ def block_level_height(document, box, max_position_y, skip_stack,
            (new_child, resume_at, next_page, next_adjoining_margins,
                collapsing_through) = block_level_layout(
                    document, child, max_position_y, skip_stack,
-                    new_containing_block, device_size, page_is_empty,
+                    new_containing_block, device_size,
+                    page_is_empty and not new_children,
                    adjoining_margins)
            skip_stack = None

@ -293,7 +294,8 @@ def block_level_height(document, box, max_position_y, skip_stack,
                    new_position_y = (
                        new_child.border_box_y() + new_child.border_height())

-                    if (new_position_y > max_position_y and not page_is_empty
+                    if (new_position_y > max_position_y and (
+                                new_children or not page_is_empty)
                            and not isinstance(child, boxes.BlockBox)):
                        # The child overflows the page area, put it on the
                        # next page. (But don’t delay whole blocks if eg.
@ -314,7 +316,6 @@ def block_level_height(document, box, max_position_y, skip_stack,
            # Bottom borders may overflow here
            # TODO: back-track somehow when all lines fit but not borders
            new_children.append(new_child)
-            page_is_empty = False
            if resume_at is not None:
                resume_at = (index, resume_at)
                break
@ -322,6 +323,10 @@ def block_level_height(document, box, max_position_y, skip_stack,
    else:
        resume_at = None

+    if resume_at is not None and box.style.page_break_inside == 'avoid' \
+            and not page_is_empty:
+        return None, None, 'any', [], False
+

    if collapsing_with_children:
        if new_children and not isinstance(
--- a/weasyprint/tests/test_css.py
+++ b/weasyprint/tests/test_css.py
@ -34,6 +34,7 @@ from .testing_utils import (
 from .. import css
 from ..css.computed_values import used_line_height
 from ..document import PNGDocument
+from ..utils import parse_data_url
 from .. import HTML


@ -45,6 +46,36 @@ def parse_html(filename, **kwargs):
    return PNGDocument(html.root_element, **kwargs)


+@assert_no_logs
+def test_data_url():
+    """Test URLs with the "data:" scheme."""
+    def parse(url, expected_content, expected_mime_type, expected_charset):
+        file_like, mime_type, charset = parse_data_url(url)
+        assert file_like.read() == expected_content
+        assert mime_type == expected_mime_type
+        assert charset == expected_charset
+    parse('data:,foo', b'foo', 'text/plain', 'US-ASCII')
+    parse('data:,foo%22bar', b'foo"bar', 'text/plain', 'US-ASCII')
+    parse('data:text/plain,foo', b'foo', 'text/plain', None)
+    parse('data:text/html;charset=utf8,<body>', b'<body>', 'text/html', 'utf8')
+    parse('data:text/plain;base64,Zm9v', b'foo', 'text/plain', None)
+    parse('data:text/plain;base64,Zm9vbw==', b'fooo', 'text/plain', None)
+    parse('data:text/plain;base64,Zm9vb28=', b'foooo', 'text/plain', None)
+    parse('data:text/plain;base64,Zm9vb29v', b'fooooo', 'text/plain', None)
+    parse('data:text/plain;base64,Zm9vbw%3D%3D', b'fooo', 'text/plain', None)
+    parse('data:text/plain;base64,Zm9vb28%3D', b'foooo', 'text/plain', None)
+
+    # "From a theoretical point of view, the padding character is not needed,
+    #  since the number of missing bytes can be calculated from the number
+    #  of Base64 digits."
+    # https://en.wikipedia.org/wiki/Base64#Padding
+
+    # The Acid 2 test uses base64 URLs without padding.
+    # http://acid2.acidtests.org/
+    parse('data:text/plain;base64,Zm9vbw', b'fooo', 'text/plain', None)
+    parse('data:text/plain;base64,Zm9vb28', b'foooo', 'text/plain', None)
+
+
@assert_no_logs
 def test_style_dict():
    """Test a style in a ``dict``."""
--- a/weasyprint/tests/test_css_properties.py
+++ b/weasyprint/tests/test_css_properties.py
@ -145,6 +145,8 @@ def test_expand_list_style():
    }
    with raises(ValueError):
        expand_to_dict('list_style', 'red')
+    with raises(ValueError):
+        expand_to_dict('list_style', 'circle disc')


 def assert_background(css, **kwargs):
--- a/weasyprint/tests/test_draw.py
+++ b/weasyprint/tests/test_draw.py
@ -909,7 +909,7 @@ def test_images():
            <div><img src="inexistent1.png" alt=""></div>
        ''')
    assert len(logs) == 1
-    assert 'WARNING: Error while fetching an image' in logs[0]
+    assert 'WARNING: Error for image' in logs[0]
    assert 'inexistent1.png' in logs[0]
    assert_pixels('image_no_src', 8, 8, no_image, '''
        <style>
@ -944,7 +944,7 @@ def test_images():
            '''),
        ])
    assert len(logs) == 1
-    assert 'WARNING: Error while fetching an image' in logs[0]
+    assert 'WARNING: Error for image' in logs[0]
    assert 'inexistent2.png' in logs[0]


--- a/weasyprint/tests/test_layout.py
+++ b/weasyprint/tests/test_layout.py
@ -641,16 +641,16 @@ def test_page_breaks():


@assert_no_logs
-def test_orphans_widows():
+def test_orphans_widows_avoid():
    """Test orphans and widows control."""
-    def line_distribution(orphans, widows):
+    def line_distribution(css):
        pages = parse('''
            <style>
                @page { -weasy-size: 200px }
                h1 { height: 120px }
                p { line-height: 20px;
                    width: 1px; /* line break at each word */
-                    orphans: %s; widows: %s }
+                    %s }
            </style>
            <h1>Tasty test</h1>
            <!-- There is room for 4 lines after h1 on the fist page -->
@ -663,7 +663,7 @@ def test_orphans_widows():
                six
                seven
            </p>
-        ''' % (orphans, widows))
+        ''' % css)
        line_counts = []
        for i, page in enumerate(pages):
            html, = page.children
@ -679,10 +679,13 @@ def test_orphans_widows():
                line_counts.append(0)
        return line_counts

-    assert line_distribution(orphans=2, widows=2) == [4, 3]
-    assert line_distribution(orphans=5, widows=2) == [0, 7]
-    assert line_distribution(orphans=2, widows=4) == [3, 4]
-    assert line_distribution(orphans=4, widows=4) == [0, 7]
+    assert line_distribution('orphans: 2; widows: 2') == [4, 3]
+    assert line_distribution('orphans: 5; widows: 2') == [0, 7]
+    assert line_distribution('orphans: 2; widows: 4') == [3, 4]
+    assert line_distribution('orphans: 4; widows: 4') == [0, 7]
+
+    assert line_distribution(
+        'orphans: 2; widows: 2; page-break-inside: avoid') == [0, 7]


@assert_no_logs
@ -946,10 +949,7 @@ def test_images():
        with capture_logs() as logs:
            page, = parse("<p><img src='%s' alt='invalid image'>" % url)
        assert len(logs) == 1
-        if url.startswith('data:') or 'really' in url:
-            assert 'WARNING: Error while parsing an image' in logs[0]
-        else:
-            assert 'WARNING: Error while fetching an image' in logs[0]
+        assert 'WARNING: Error for image' in logs[0]
        html, = page.children
        body, = html.children
        paragraph, = body.children
--- a/weasyprint/utils.py
+++ b/weasyprint/utils.py
@ -65,6 +65,25 @@ def ensure_url(string):
        return path2url(string.encode('utf8'))


+def decode_base64(data):
+    """Decode base64, padding being optional.
+
+    "From a theoretical point of view, the padding character is not needed,
+     since the number of missing bytes can be calculated from the number
+     of Base64 digits."
+
+    https://en.wikipedia.org/wiki/Base64#Padding
+
+    :param data: Base64 data as an ASCII byte string
+    :returns: The decoded byte string.
+
+    """
+    missing_padding = 4 - len(data) % 4
+    if missing_padding:
+        data += b'='* missing_padding
+    return base64.decodestring(data)
+
+
 def parse_data_url(url):
    """Decode URLs with the 'data' stream. urllib can handle them
    in Python 2, but that is broken in Python 3.
@ -98,11 +117,9 @@ def parse_data_url(url):
        charset = 'US-ASCII'
        encoding = ''

+    data = unquote_to_bytes(data)
    if encoding == 'base64':
-        data = data.encode('ascii')
-        data = base64.decodestring(data)
-    else:
-        data = unquote_to_bytes(data)
+        data = decode_base64(data)

    return io.BytesIO(data), mime_type, charset