WeasyPrint/weasyprint/formatting_structure/build.py

# coding: utf8

#  WeasyPrint converts web documents (HTML, CSS, ...) to PDF.
#  Copyright (C) 2011  Simon Sapin
#
#  This program is free software: you can redistribute it and/or modify
#  it under the terms of the GNU Affero General Public License as
#  published by the Free Software Foundation, either version 3 of the
#  License, or (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU Affero General Public License for more details.
#
#  You should have received a copy of the GNU Affero General Public License
#  along with this program.  If not, see <http://www.gnu.org/licenses/>.


"""
Building helpers.

Functions building a correct formatting structure from a DOM document,
including handling of anonymous boxes and whitespace processing.

"""

from __future__ import division, unicode_literals

import re
from . import boxes, counters
from .. import html
from ..css import properties
from ..compat import basestring, xrange


# Maps values of the ``display`` CSS property to box types.
BOX_TYPE_FROM_DISPLAY = {
    'block': boxes.BlockBox,
    'list-item': boxes.BlockBox,
    'inline': boxes.InlineBox,
    'inline-block': boxes.InlineBlockBox,
    'table': boxes.TableBox,
    'inline-table': boxes.InlineTableBox,
    'table-row': boxes.TableRowBox,
    'table-row-group': boxes.TableRowGroupBox,
    'table-header-group': boxes.TableRowGroupBox,
    'table-footer-group': boxes.TableRowGroupBox,
    'table-column': boxes.TableColumnBox,
    'table-column-group': boxes.TableColumnGroupBox,
    'table-cell': boxes.TableCellBox,
    'table-caption': boxes.TableCaptionBox,
}


def build_formatting_structure(document, computed_styles):
    """Build a formatting structure (box tree) from a ``document``."""
    # TODO: use computed_styles intsead of document.style_for()
    box, = dom_to_box(document, document.dom)
    box.is_for_root_element = True
    # If this is changed, maybe update weasy.layout.pages.make_margin_boxes()
    box = process_whitespace(box)
    box = anonymous_table_boxes(box)
    box = inline_in_block(box)
    box = block_in_inline(box)
    box = set_canvas_background(box)
    box = set_viewport_overflow(box)
    return box


def dom_to_box(document, element, state=None):
    """Convert a DOM element and its children into a box with children.

    Return a list of boxes. Most of the time the list will have one item but
    may have zero or more than one.

    Eg.::

        <p>Some <em>emphasised</em> text.<p>

    gives (not actual syntax)::

        BlockBox[
            TextBox['Some '],
            InlineBox[
                TextBox['emphasised'],
            ],
            TextBox[' text.'],
        ]

    ``TextBox``es are anonymous inline boxes:
    See http://www.w3.org/TR/CSS21/visuren.html#anonymous

    """
    if not isinstance(element.tag, basestring):
        # lxml.html already converts HTML entities to text.
        # Here we ignore comments and XML processing instructions.
        return []

    style = document.style_for(element)

    # TODO: should be the used value. When does the used value for `display`
    # differ from the computer value?
    display = style.display
    if display == 'none':
        return []

    box = BOX_TYPE_FROM_DISPLAY[display](
        element.tag, element.sourceline, style, [])

    if state is None:
        # use a list to have a shared mutable object
        state = (
            # Shared mutable objects:
            [0],  # quote_depth: single integer
            {},  # counter_values: name -> stacked/scoped values
            [set()]  # counter_scopes: DOM tree depths -> counter names
        )
    _quote_depth, counter_values, counter_scopes = state

    update_counters(state, style)

    children = []
    if display == 'list-item':
        children.extend(add_box_marker(document, counter_values, box))

    # If this element’s direct children create new scopes, the counter
    # names will be in this new list
    counter_scopes.append(set())

    children.extend(pseudo_to_box(document, state, element, 'before'))
    text = element.text
    if text:
        children.append(boxes.TextBox.anonymous_from(box, text))
    for child_element in element:
        children.extend(dom_to_box(document, child_element, state))
        text = child_element.tail
        if text:
            children.append(boxes.TextBox.anonymous_from(box, text))
    children.extend(pseudo_to_box(document, state, element, 'after'))

    # Scopes created by this element’s children stop here.
    for name in counter_scopes.pop():
        counter_values[name].pop()

    box = box.copy_with_children(children)

    # Specific handling for the element. (eg. replaced element)
    return html.handle_element(document, element, box)


def pseudo_to_box(document, state, element, pseudo_type):
    """Yield the box for a :before or :after pseudo-element if there is one."""
    style = document.style_for(element, pseudo_type)
    if pseudo_type and style is None:
        # Pseudo-elements with no style at all do not get a StyleDict
        # Their initial content property computes to 'none'.
        return

    # TODO: should be the used value. When does the used value for `display`
    # differ from the computer value?
    display = style.display
    content = style.content
    if 'none' in (display, content) or content == 'normal':
        return

    box = BOX_TYPE_FROM_DISPLAY[display](
        '%s:%s' % (element.tag, pseudo_type), element.sourceline, style, [])

    quote_depth, counter_values, _counter_scopes = state
    update_counters(state, style)
    children = []
    if display == 'list-item':
        children.extend(add_box_marker(document, counter_values, box))
    children.extend(content_to_boxes(
        document, style, box, quote_depth, counter_values))

    yield box.copy_with_children(children)


def content_to_boxes(document, style, parent_box, quote_depth, counter_values):
    """Takes the value of a ``content`` property and yield boxes."""
    texts = []
    for type_, value in style.content:
        if type_ == 'STRING':
            texts.append(value)
        elif type_ == 'URI':
            image = document.get_image_from_uri(value)
            if image is not None:
                text = ''.join(texts)
                if text:
                    yield boxes.TextBox.anonymous_from(parent_box, text)
                texts = []
                yield boxes.InlineReplacedBox.anonymous_from(parent_box, image)
        elif type_ == 'counter':
            counter_name, counter_style = value
            counter_value = counter_values.get(counter_name, [0])[-1]
            texts.append(counters.format(counter_value, counter_style))
        elif type_ == 'counters':
            counter_name, separator, counter_style = value
            texts.append(separator.join(
                counters.format(counter_value, counter_style)
                for counter_value in counter_values.get(counter_name, [0])
            ))
        else:
            assert type_ == 'QUOTE'
            is_open, insert = value
            if not is_open:
                quote_depth[0] = max(0, quote_depth[0] - 1)
            if insert:
                open_quotes, close_quotes = style.quotes
                quotes = open_quotes if is_open else close_quotes
                texts.append(quotes[min(quote_depth[0], len(quotes) - 1)])
            if is_open:
                quote_depth[0] += 1
    text = ''.join(texts)
    if text:
        yield boxes.TextBox.anonymous_from(parent_box, text)


def update_counters(state, style):
    """Handle the ``counter-*`` properties."""
    _quote_depth, counter_values, counter_scopes = state
    sibling_scopes = counter_scopes[-1]

    for name, value in style.counter_reset:
        if name in sibling_scopes:
            counter_values[name].pop()
        else:
            sibling_scopes.add(name)
        counter_values.setdefault(name, []).append(value)

    # Disabled for now, only exists in Lists3’s editor’s draft.
    for name, value in []: # XXX style.counter_set:
        values = counter_values.setdefault(name, [])
        if not values:
            if name in sibling_scopes:
                counter_values[name].pop()
            else:
                sibling_scopes.add(name)
            values.append(0)
        values[-1] = value

    counter_increment = style.counter_increment
    if counter_increment == 'auto':
        # 'auto' is the initial value but is not valid in stylesheet:
        # there was no counter-increment declaration for this element.
        # (Or the winning value was 'initial'.)
        # http://dev.w3.org/csswg/css3-lists/#declaring-a-list-item
        if style.display == 'list-item':
            counter_increment = [('list-item', 1)]
        else:
            counter_increment = []
    for name, value in counter_increment:
        values = counter_values.setdefault(name, [])
        if not values:
            if name in sibling_scopes:
                counter_values[name].pop()
            else:
                sibling_scopes.add(name)
            values.append(0)
        values[-1] += value


def add_box_marker(document, counter_values, box):
    """Add a list marker to boxes for elements with ``display: list-item``,
    and yield children to add a the start of the box.

    See http://www.w3.org/TR/CSS21/generate.html#lists

    """
    style = box.style
    image = style.list_style_image
    if image != 'none':
        # surface may be None here too, in case the image is not available.
        image = document.get_image_from_uri(image)
    else:
        image = None

    if image is None:
        type_ = style.list_style_type
        if type_ == 'none':
            return
        counter_value = counter_values['list-item'][-1]
        marker_text = counters.format_list_marker(counter_value, type_)
        marker_box = boxes.TextBox.anonymous_from(box, marker_text)
    else:
        marker_box = boxes.InlineReplacedBox.anonymous_from(box, image)
        marker_box.is_list_marker = True
    marker_box.element_tag += '::marker'

    position = style.list_style_position
    if position == 'inside':
        side = 'right' if style.direction == 'ltr' else 'left'
        marker_box.style['margin_' + side] = style.font_size * 0.5
        yield marker_box
    elif position == 'outside':
        box.outside_list_marker = marker_box


def is_whitespace(box, _has_non_whitespace=re.compile('\S').search):
    """Return True if ``box`` is a TextBox with only whitespace."""
    return (
        isinstance(box, boxes.TextBox)
        and not _has_non_whitespace(box.text)
    )


def wrap_improper(box, children, wrapper_type, test=None):
    """
    Wrap consecutive children that do not pass ``test`` in a box of type
    ``wrapper_type``.

    ``test`` defaults to children being of the same type as ``wrapper_type``.

    """
    if test is None:
        test = lambda child: isinstance(child, wrapper_type)
    improper = []
    for child in children:
        if test(child):
            if improper:
                wrapper = wrapper_type.anonymous_from(box, children=[])
                # Apply the rules again on the new wrapper
                yield table_boxes_children(wrapper, improper)
                improper = []
            yield child
        else:
            # Whitespace either fail the test or were removed earlier,
            # so there is no need to take special care with the definition
            # of "consecutive".
            improper.append(child)
    if improper:
        wrapper = wrapper_type.anonymous_from(box, children=[])
        # Apply the rules again on the new wrapper
        yield table_boxes_children(wrapper, improper)


def anonymous_table_boxes(box):
    """Remove and add boxes according to the table model.

    Take and return a ``Box`` object.

    See http://www.w3.org/TR/CSS21/tables.html#anonymous-boxes

    """
    if not isinstance(box, boxes.ParentBox):
        return box

    # Do recursion.
    children = [anonymous_table_boxes(child) for child in box.children]
    return table_boxes_children(box, children)


def table_boxes_children(box, children):
    """Internal implementation of anonymous_table_boxes()."""
    if isinstance(box, boxes.TableColumnBox):  # rule 1.1
        # Remove all children.
        children = []
    elif isinstance(box, boxes.TableColumnGroupBox):  # rule 1.2
        # Remove children other than table-column.
        children = [
            child for child in children
            if isinstance(child, boxes.TableColumnBox)
        ]
        # Rule XXX (not in the spec): column groups have at least
        # one column child.
        if not children:
            children = [boxes.TableColumnBox.anonymous_from(box, [])
                        for _i in xrange(box.span)]

    # rule 1.3
    if box.tabular_container and len(children) >= 2:
        # TODO: Maybe only remove text if internal is also
        #       a proper table descendant of box.
        # This is what the spec says, but maybe not what browsers do:
        # http://lists.w3.org/Archives/Public/www-style/2011Oct/0567

        # Last child
        internal, text = children[-2:]
        if (internal.internal_table_or_caption and is_whitespace(text)):
            children.pop()

        # First child
        if len(children) >= 2:
            text, internal = children[:2]
            if (internal.internal_table_or_caption and is_whitespace(text)):
                children.pop(0)

        # Children other than first and last that would be removed by
        # rule 1.3 are also removed by rule 1.4 below.

    children = [
        child
        for prev_child, child, next_child in zip(
            [None] + children[:-1],
            children,
            children[1:] + [None]
        )
        if not (
            # Ignore some whitespace: rule 1.4
            prev_child and prev_child.internal_table_or_caption and
            next_child and next_child.internal_table_or_caption and
            is_whitespace(child)
        )
    ]

    if isinstance(box, boxes.TableBox) or \
            isinstance(box, boxes.InlineTableBox):
        # Rule 2.1
        children = wrap_improper(box, children, boxes.TableRowBox,
            lambda child: child.proper_table_child)
    elif isinstance(box, boxes.TableRowGroupBox):
        # Rule 2.2
        children = wrap_improper(box, children, boxes.TableRowBox)

    if isinstance(box, boxes.TableRowBox):
        # Rule 2.3
        children = wrap_improper(box, children, boxes.TableCellBox)
    else:
        # Rule 3.1
        children = wrap_improper(box, children, boxes.TableRowBox,
            lambda child: not isinstance(child, boxes.TableCellBox))

    # Rule 3.2
    if isinstance(box, boxes.InlineBox):
        children = wrap_improper(box, children, boxes.InlineTableBox,
            lambda child: not child.proper_table_child)
    else:
        parent_type = type(box)
        children = wrap_improper(box, children, boxes.TableBox,
            lambda child:
                not child.proper_table_child or
                parent_type in child.proper_parents)


    if isinstance(box, boxes.TableBox):
        return wrap_table(box, children)
    else:
        return box.copy_with_children(children)


def wrap_table(box, children):
    """Take a table box and return it in its table wrapper box.

    Also re-order children and assign grid positions to each column an cell.

    http://www.w3.org/TR/CSS21/tables.html#model
    http://www.w3.org/TR/CSS21/tables.html#table-layout

    """
    # Group table children by type
    columns = []
    rows = []
    all_captions = []
    by_type = {
        boxes.TableColumnBox: columns,
        boxes.TableColumnGroupBox: columns,
        boxes.TableRowBox: rows,
        boxes.TableRowGroupBox: rows,
        boxes.TableCaptionBox: all_captions,
    }
    for child in children:
        by_type[type(child)].append(child)

    # Split top and bottom captions
    captions = {'top': [], 'bottom': []}
    for caption in all_captions:
        captions[caption.style.caption_side].append(caption)

    # Assign X positions on the grid to column boxes
    column_groups = list(wrap_improper(
        box, columns, boxes.TableColumnGroupBox))
    grid_x = 0
    for group in column_groups:
        group.grid_x = grid_x
        if group.children:
            for column in group.children:
                column.grid_x = grid_x
                grid_x += 1
            group.span = len(group.children)
        else:
            grid_x += group.span

    # Extract the optional header and footer groups.
    body_row_groups = []
    header = None
    footer = None
    for group in wrap_improper(box, rows, boxes.TableRowGroupBox):
        display = group.style.display
        if display == 'table-header-group' and header is None:
            group.header_group = True
            header = group
        elif display == 'table-footer-group' and footer is None:
            group.footer_group = True
            footer = group
        else:
            body_row_groups.append(group)

    row_groups = (
        ([header] if header is not None else []) +
        body_row_groups +
        ([footer] if footer is not None else []))

    # Assign a (x,y) position in the grid to each cell.
    # rowspan can not extend beyond a row group, so each row group
    # is independent.
    # http://www.w3.org/TR/CSS21/tables.html#table-layout
    # Column 0 is on the left if direction is ltr, right if rtl.
    # This algorithm does not change.
    for group in row_groups:
        # Indexes: row number in the group.
        # Values: set of cells already occupied by row-spanning cells.
        occupied_cells_by_row = [set() for row in group.children]
        for row in group.children:
            occupied_cells_in_this_row = occupied_cells_by_row.pop(0)
            # The list is now about rows after this one.
            grid_x = 0
            for cell in row.children:
                # Make sure that the first grid cell is free.
                while grid_x in occupied_cells_in_this_row:
                    grid_x += 1
                cell.grid_x = grid_x
                new_grid_x = grid_x + cell.colspan
                # http://www.w3.org/TR/html401/struct/tables.html#adef-rowspan
                if cell.rowspan != 1:
                    if cell.rowspan == 0:
                        # All rows until the end of the group
                        spanned_rows = occupied_cells_by_row
                        cell.rowspan = len(occupied_cells_by_row) + 1
                    else:
                        spanned_rows = occupied_cells_by_row[:cell.rowspan - 1]
                    spanned_columns = range(grid_x, new_grid_x)
                    for occupied_cells in spanned_rows:
                        occupied_cells.update(spanned_columns)
                grid_x = new_grid_x

    table = box.copy_with_children(row_groups)
    table.column_groups = tuple(column_groups)

#    table.body_row_groups = tuple(body_row_groups)
#    table.row_groups = tuple(row_groups)
#    table.header = header
#    table.footer = footer

    # TODO: re-enable this once we support inline-block layout.
    if False: # isinstance(box, boxes.InlineTableBox):
        # XXX disabled
        wrapper_type = boxes.InlineBlockBox
    else:
        wrapper_type = boxes.BlockBox

    wrapper = wrapper_type.anonymous_from(
        box, captions['top'] + [table] + captions['bottom'])
    wrapper.is_table_wrapper = True
    if not table.style.anonymous:
        # Non-inherited properties of the table element apply to one
        # of the wrapper and the table. The other get the initial value.
        for name in properties.TABLE_WRAPPER_BOX_PROPERTIES:
            wrapper.style[name] = table.style[name]
            table.style[name] = properties.INITIAL_VALUES[name]
    # else: non-inherited properties already have their initial values

    return wrapper


def process_whitespace(box):
    """First part of "The 'white-space' processing model".

    See http://www.w3.org/TR/CSS21/text.html#white-space-model

    """
    following_collapsible_space = False
    for child in box.descendants():
        if not isinstance(child, boxes.TextBox):
            if isinstance(child, boxes.AtomicInlineLevelBox):
                following_collapsible_space = False
            continue

        text = child.text
        if not text:
            continue

        # Normalize line feeds
        text = re.sub('\r\n?', '\n', text)

        handling = child.style.white_space

        if handling in ('normal', 'nowrap', 'pre-line'):
            # \r characters were removed/converted earlier
            text = re.sub('[\t ]*\n[\t ]*', '\n', text)
        if handling in ('pre', 'pre-wrap'):
            # \xA0 is the non-breaking space
            text = text.replace(' ', '\xA0')
            if handling == 'pre-wrap':
                # "a line break opportunity at the end of the sequence"
                # \u200B is the zero-width space, marks a line break
                # opportunity.
                text = re.sub('\xA0([^\xA0]|$)', '\xA0\u200B\\1', text)
        elif handling in ('normal', 'nowrap'):
            # TODO: this should be language-specific
            # Could also replace with a zero width space character (U+200B),
            # or no character
            # CSS3: http://www.w3.org/TR/css3-text/#line-break-transform
            text = text.replace('\n', ' ')

        if handling in ('normal', 'nowrap', 'pre-line'):
            text = text.replace('\t', ' ')
            text = re.sub(' +', ' ', text)
            if following_collapsible_space and text.startswith(' '):
                text = text[1:]
            following_collapsible_space = text.endswith(' ')
        else:
            following_collapsible_space = False

        child.text = text
    return box


def inline_in_block(box):
    """Build the structure of lines inside blocks and return a new box tree.

    Consecutive inline-level boxes in a block container box are wrapped into a
    line box, itself wrapped into an anonymous block box.

    This line box will be broken into multiple lines later.

    This is the first case in
    http://www.w3.org/TR/CSS21/visuren.html#anonymous-block-level

    Eg.::

        BlockBox[
            TextBox['Some '],
            InlineBox[TextBox['text']],
            BlockBox[
                TextBox['More text'],
            ]
        ]

    is turned into::

        BlockBox[
            AnonymousBlockBox[
                LineBox[
                    TextBox['Some '],
                    InlineBox[TextBox['text']],
                ]
            ]
            BlockBox[
                LineBox[
                    TextBox['More text'],
                ]
            ]
        ]

    """
    if not isinstance(box, boxes.ParentBox):
        return box

    children = [inline_in_block(child) for child in box.children
                # Remove empty text boxes.
                # (They may have been emptied by process_whitespace().)
                if not (isinstance(child, boxes.TextBox) and not child.text)]

    if not isinstance(box, boxes.BlockContainerBox):
        return box.copy_with_children(children)

    new_line_children = []
    new_children = []
    for child_box in children:
        assert not isinstance(child_box, boxes.LineBox)
        if isinstance(child_box, boxes.InlineLevelBox):
            # Do not append white space at the start of a line:
            # It would be removed during layout.
            if new_line_children or not (
                    isinstance(child_box, boxes.TextBox) and
                    # Sequence of white-space was collapsed to a single
                    # space by process_whitespace().
                    child_box.text == ' ' and
                    child_box.style.white_space in (
                        'normal', 'nowrap', 'pre-line')):
                new_line_children.append(child_box)
        else:
            if new_line_children:
                # Inlines are consecutive no more: add this line box
                # and create a new one.
                line_box = boxes.LineBox.anonymous_from(box, new_line_children)
                anonymous = boxes.BlockBox.anonymous_from(box, [line_box])
                new_children.append(anonymous)
                new_line_children = []
            new_children.append(child_box)
    if new_line_children:
        # There were inlines at the end
        line_box = boxes.LineBox.anonymous_from(box, new_line_children)
        if new_children:
            anonymous = boxes.BlockBox.anonymous_from(box, [line_box])
            new_children.append(anonymous)
        else:
            # Only inline-level children: one line box
            new_children.append(line_box)

    return box.copy_with_children(new_children)


def block_in_inline(box):
    """Build the structure of blocks inside lines.

    Inline boxes containing block-level boxes will be broken in two
    boxes on each side on consecutive block-level boxes, each side wrapped
    in an anonymous block-level box.

    This is the second case in
    http://www.w3.org/TR/CSS21/visuren.html#anonymous-block-level

    Eg. if this is given::

        BlockBox[
            LineBox[
                InlineBox[
                    TextBox['Hello.'],
                ],
                InlineBox[
                    TextBox['Some '],
                    InlineBox[
                        TextBox['text']
                        BlockBox[LineBox[TextBox['More text']]],
                        BlockBox[LineBox[TextBox['More text again']]],
                    ],
                    BlockBox[LineBox[TextBox['And again.']]],
                ]
            ]
        ]

    this is returned::

        BlockBox[
            AnonymousBlockBox[
                LineBox[
                    InlineBox[
                        TextBox['Hello.'],
                    ],
                    InlineBox[
                        TextBox['Some '],
                        InlineBox[TextBox['text']],
                    ]
                ]
            ],
            BlockBox[LineBox[TextBox['More text']]],
            BlockBox[LineBox[TextBox['More text again']]],
            AnonymousBlockBox[
                LineBox[
                    InlineBox[
                    ]
                ]
            ],
            BlockBox[LineBox[TextBox['And again.']]],
            AnonymousBlockBox[
                LineBox[
                    InlineBox[
                    ]
                ]
            ],
        ]

    """
    if not isinstance(box, boxes.ParentBox):
        return box

    new_children = []
    changed = False

    for child in box.children:
        if isinstance(child, boxes.LineBox):
            assert len(box.children) == 1, ('Line boxes should have no '
                'siblings at this stage, got %r.' % box.children)
            stack = None
            while 1:
                new_line, block, stack = _inner_block_in_inline(child, stack)
                if block is None:
                    break
                anon = boxes.BlockBox.anonymous_from(box, [new_line])
                new_children.append(anon)
                new_children.append(block_in_inline(block))
                # Loop with the same child and the new stack.
            if new_children:
                # Some children were already added, this became a block
                # context.
                new_child = boxes.BlockBox.anonymous_from(box, [new_line])
            else:
                # Keep the single line box as-is, without anonymous blocks.
                new_child = new_line
        else:
            # Not in an inline formatting context.
            new_child = block_in_inline(child)

        if new_child is not child:
            changed = True
        new_children.append(new_child)

    if changed:
        return box.copy_with_children(new_children)
    else:
        return box


def _inner_block_in_inline(box, skip_stack=None):
    """Find a block-level box in an inline formatting context.

    If one is found, return ``(new_box, block_level_box, resume_at)``.
    ``new_box`` contains all of ``box`` content before the block-level box.
    ``resume_at`` can be passed as ``skip_stack`` in a new call to
    this function to resume the search just after the block-level box.

    If no block-level box is found after the position marked by
    ``skip_stack``, return ``(new_box, None, None)``

    """
    new_children = []
    block_level_box = None
    resume_at = None
    changed = False

    if skip_stack is None:
        skip = 0
    else:
        skip, skip_stack = skip_stack

    for index, child in box.enumerate_skip(skip):
        if isinstance(child, boxes.BlockLevelBox):
            assert skip_stack is None  # Should not skip here
            block_level_box = child
            index += 1  # Resume *after* the block
        else:
            if isinstance(child, boxes.InlineBox):
                recursion = _inner_block_in_inline(child, skip_stack)
                skip_stack = None
                new_child, block_level_box, resume_at = recursion
            else:
                assert skip_stack is None  # Should not skip here
                if isinstance(child, boxes.ParentBox):
                    # inline-block or inline-table.
                    new_child = block_in_inline(child)
                else:
                    # text or replaced box
                    new_child = child
                # block_level_box is still None.
            if new_child is not child:
                changed = True
            new_children.append(new_child)
        if block_level_box is not None:
            resume_at = (index, resume_at)
            box = box.copy_with_children(new_children)
            break
    else:
        if changed or skip:
            box = box.copy_with_children(new_children)

    return box, block_level_box, resume_at


def set_canvas_background(root_box):
    """
    Set a ``canvas_background`` attribute on the box for the root element,
    with style for the canvas background, taken from the root elememt
    or a <body> child of the root element.

    See http://www.w3.org/TR/CSS21/colors.html#background
    """
    chosen_box = root_box
    if (root_box.element_tag.lower() == 'html' and
            root_box.style.background_color.alpha == 0 and
            root_box.style.background_image == 'none'):
        for child in root_box.children:
            if child.element_tag.lower() == 'body':
                chosen_box = child
                break

    style = chosen_box.style
    root_box.canvas_background = style
    chosen_box.style = style.updated_copy(properties.BACKGROUND_INITIAL)
    return root_box


def set_viewport_overflow(root_box):
    """
    Set a ``viewport_overflow`` attribute on the box for the root element.

    Like backgrounds, ``overflow`` on the root element must be propagated
    to the viewport.

    See http://www.w3.org/TR/CSS21/visufx.html#overflow
    """
    chosen_box = root_box
    if (root_box.element_tag.lower() == 'html' and
            root_box.style.overflow == 'visible'):
        for child in root_box.children:
            if child.element_tag.lower() == 'body':
                chosen_box = child
                break

    root_box.viewport_overflow = chosen_box.style.overflow
    chosen_box.style = chosen_box.style.updated_copy({'overflow': 'visible'})
    return root_box