Merge branch 'target-counter' of https://github.com/Tontyna/WeasyPrint into target-collector

2024-10-05 08:27:22 +03:00 · 2018-03-25 22:03:19 +02:00 · 2018-03-25 22:03:19 +02:00 · 1ebd36e83a
commit 1ebd36e83a
parent 10fc8919af 307143266c
6 changed files with 685 additions and 111 deletions
--- a/weasyprint/css/computed_values.py
+++ b/weasyprint/css/computed_values.py
@ -9,10 +9,13 @@
    :license: BSD, see LICENSE for details.

 """
+from urllib.parse import unquote

 from .. import text
+from ..logger import LOGGER
 from ..urls import get_link_attribute
 from .properties import INITIAL_VALUES, Dimension
+from .targets import TARGET_COLLECTOR

 ZERO_PIXELS = Dimension(0, 'px')

@ -399,16 +402,131 @@ def column_gap(computer, name, value):
    return length(computer, name, value, pixels_only=True)


-@register_computer('content')
-def content(computer, name, values):
-    """Compute the ``content`` property."""
+def _toSelector(el, pseudo_type):
+    """convenience function"""
+    elname = type(el).__name__
+    if elname == 'PageType':
+        return ('@page%s %s%s%s %s    ' % (
+            ' ' + el.name if el.name else '',
+            ':' + el.side if el.side else '',
+            ':blank' if el.blank else '',
+            ':first' if el.first else '',
+            pseudo_type if pseudo_type else ''
+            )).rstrip()
+    elif elname == 'Element':
+        return '%s%s' % (
+            el.tag,
+            '::' + pseudo_type if pseudo_type else ''
+            )
+    else:
+        return '<%s>' % (
+            ('%s %s' % (elname, pseudo_type)).rstrip())
+
+
+@register_computer('string-set')
+def string_set(computer, name, values):
+    """Compute the <content-lists> of the ``string-set`` property."""
+    # never happens, but...prudence is the better part of valor
    if values in ('normal', 'none'):
        return values
+    if type(computer.element).__name__ != 'Element' or computer.pseudo_type:
+        LOGGER.debug(
+            'property `%s` discarded: %s in selector `%s`.',
+            name,
+            'Not a real element',
+            _toSelector(computer.element, computer.pseudo_type))
+        return 'none'
+    return tuple(
+        (string_name, content(computer, name, string_values))
+        for i, (string_name, string_values) in enumerate(values))
+
+
+@register_computer('bookmark-label')
+@register_computer('content')
+def content(computer, name, values):
+    """Compute the <content-list>s of ``content``,
+    ``bookmark-label`` and ``string-set`` property."""
+
+    class ComputedContentError(ValueError):
+        """Invalid or unsupported values for a known CSS property."""
+
+    def computed_content_error(level, reason):
+        getattr(LOGGER, level)(
+            'property `%s` discarded: %s in selector `%s`.',
+            name,
+            reason,
+            _toSelector(computer.element, computer.pseudo_type)
+            )
+
+    def parse_target_type(type_, values):
+        if type(computer.element).__name__ != 'Element':
+            raise ComputedContentError('\'%s\' not (yet) supported' % (type_,))
+        # values = ['STRING', <anchorname>, ...]
+        #     or   ['attr', <attrname>, ...  ]
+        if values[0] == 'attr':
+            attrname = values[1]
+            href = computer.element.get(attrname, '')
+        else:
+            href = values[1]
+        # [spec](https://www.w3.org/TR/css-content-3/#target-counter)
+        # says:
+        # > If there’s no fragment, if the ID referenced isn’t there,
+        # > or if the URL points to an outside document,
+        # > the user agent must treat that as an error.
+        if href == '' or href == '#':
+            raise ComputedContentError('Empty anchor name in %s' % (type_,))
+        if not href.startswith('#'):
+            raise ComputedContentError(
+                'No %s for external URI reference "%s"' % (type_, href))
+        href = unquote(href[1:])
+        TARGET_COLLECTOR.collect_computed_target(href)
+        return [href] + values[2:]
+
+    if values in ('normal', 'none'):
+        return values
+
+    if name == 'content':
+        # [CSS3 spec](https://www.w3.org/TR/css-content-3/#content-property)
+        # says:
+        # > 'content' applies to:
+        # > ::before, ::after, ::marker, and page margin boxes.
+        # > Image and url values can apply to all elements.
+        if not computer.pseudo_type:
+            computed_content_error(
+                'debug',
+                'Not a pseudo-element')
+            return 'none'
    else:
+        # ignore string-set, bookmark-label unless in a *real* element
+        if type(computer.element).__name__ != 'Element' \
+           or computer.pseudo_type:
+            computed_content_error(
+                'debug',
+                'Not a real element')
+            return 'none'
+
+    target_checks = ['target-counter', 'target-counters', 'target-text']
+    try:
+        # TODO: catch `string()` when not in @page-margin
        return tuple(
            ('STRING', computer.element.get(value, ''))
-            if type_ == 'attr' else (type_, value)
+            if type_ == 'attr' else (
+                (type_, parse_target_type(type_, value))
+                if type_ in target_checks else (type_, value)
+            )
            for type_, value in values)
+    except ComputedContentError as exc:
+        computed_content_error(
+            'warning',
+            exc.args[0] if exc.args and exc.args[0] else 'invalid content')
+        return 'none'
+    except AttributeError as exc:
+        # attr() in @page-'element'
+        # e.g.: 'PageType' object has no attribute 'get'
+        computed_content_error(
+            'warning',
+            exc.args[0] if exc.args and exc.args[0] else 'invalid content')
+        return 'none'


@register_computer('display')
@ -495,7 +613,9 @@ def anchor(computer, name, values):
    """Compute the ``anchor`` property."""
    if values != 'none':
        _, key = values
-        return computer.element.get(key) or None
+        anchor_name = computer.element.get(key) or None
+        TARGET_COLLECTOR.collect_anchor(anchor_name)
+        return anchor_name


@register_computer('link')
--- a/weasyprint/css/targets.py
+++ b/weasyprint/css/targets.py
@ -0,0 +1,170 @@
+"""
+    weasyprint.formatting_structure.targets
+    -------------------------------------
+
+    An attempt to implement target-counter, target-counters and target-text
+
+    The TARGET_COLLECTOR is a structure providing required targets'
+    counter_values and stuff needed to build PENDING targets later,
+    when all targetted anchors have been 'layouted'
+
+    :copyright: Copyright 2018 Simon Sapin and contributors, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+
+"""
+
+import copy  # deepcopy needed!
+
+from ..logger import LOGGER
+
+
+# not shure what's the Python way  to create consts, maybe a namedtuple?
+# thx [Jon Betts](https://stackoverflow.com/a/23274028)
+class _STATE(object):
+    """constants for target states"""
+    PENDING = 0
+    UPTODATE = 1
+    UNDEFINED = 2
+    __stateToName = {
+        PENDING: 'PENDING',
+        UPTODATE: 'UPTODATE',
+        UNDEFINED: 'UNDEFINED',
+    }
+
+    def __setattr__(self, *_):
+        """prohibit changes"""
+        pass
+
+    def name(self, state):
+        """ return human readable state-name"""
+        return self.__stateToName.get(state, 'Invalid state')
+
+
+TARGET_STATE = _STATE()
+
+
+class TargetLookupItem(object):
+    """item collected by the TargetColector"""
+
+    def __init__(self, state=TARGET_STATE.PENDING):
+        self.state = state
+        # required by target-counter and target-counters
+        self.target_counter_values = {}
+        # neede for target-text via TEXT_CONTENT_EXTRACTORS
+        self.target_box = None
+        # stuff for PENDING targets
+        self.pending_boxes = {}
+
+
+class _TargetCollector(object):
+    """collect and provide stuff for css content with `target-*`"""
+
+    def __init__(self):
+        self.reset()
+
+    def reset(self):
+        self.had_peding_targets = False
+        self.existing_anchors = []
+        self.items = {}
+
+    def _addtarget(self, anchor_name):
+        return self.items.setdefault(anchor_name, TargetLookupItem())
+
+    def collect_anchor(self, anchor_name):
+        """
+        stores `anchor_name` in `existing_anchors`
+        should be called by computed_values.anchor()
+        """
+        if anchor_name and isinstance(anchor_name, str):
+            if anchor_name in self.existing_anchors:
+                LOGGER.warning('  ! anchor redefined: %s', anchor_name)
+            else:
+                self.existing_anchors.append(anchor_name)
+                LOGGER.debug('  + anchor added: "%s" ', anchor_name)
+
+    def collect_computed_target(self, anchor_name):
+        """
+        stores a `computed` target's (internal!) anchor name,
+        verified by computed_values.content()
+
+        anchor_name without '#' and already unquoted
+        """
+        if anchor_name and isinstance(anchor_name, str):
+            self._addtarget(anchor_name)
+
+    def verify_collection(self):
+        """obsolete function, only needed for testing"""
+        LOGGER.debug('------- collected targets -------------')
+        for key, item in self.items.items():
+            # mark target names not in existing_anchors as UNDEFINED
+            if key not in self.existing_anchors:
+                item.state = TARGET_STATE.UNDEFINED
+            LOGGER.debug('%s %s', key, TARGET_STATE.name(item.state))
+        LOGGER.debug('------- existing anchors -------------')
+        LOGGER.debug(self.existing_anchors)
+
+    def lookup_target(self, anchor_name, source_box, parse_again_function):
+        """ called in content_to_boxes() when the source_box needs a target-*
+        returns a TargetLookupItem
+        if already filled by a previous anchor-element: UPDTODATE
+        else: PENDING, we must parse the whole thing again
+        """
+        item = self.items.get(
+            anchor_name,
+            TargetLookupItem(TARGET_STATE.UNDEFINED))
+        LOGGER.debug(
+            'lookup_target %s %s', anchor_name, TARGET_STATE.name(item.state))
+        if item.state == TARGET_STATE.PENDING:
+            if anchor_name not in self.existing_anchors:
+                item.state = TARGET_STATE.UNDEFINED
+            else:
+                self.had_peding_targets = True
+                LOGGER.debug('   -> still pending. Keep infos.')
+                item.pending_boxes.setdefault(source_box, parse_again_function)
+
+        if item.state == TARGET_STATE.UNDEFINED:
+            LOGGER.error(
+                'content discarded: target points to undefined anchor "%s"',
+                anchor_name)
+            # feedback to invoker: discard the parent_box
+            # at the moment it's `build.before_after_to_box()` which cares
+            source_box.style['content'] = 'none'
+        return item
+
+    def store_target(self, anchor_name, target_counter_values, target_box):
+        """
+        called by every anchor-element in build.element_to_box
+        if there is a PENDING TargetLookupItem, it is updated
+        only previously collected anchor_names are stored
+        """
+        item = self.items.get(anchor_name, None)
+        if item:
+            LOGGER.debug(
+                'store_target? %s %s', anchor_name,
+                TARGET_STATE.name(item.state))
+            if item.state == TARGET_STATE.PENDING:
+                LOGGER.debug('   -> update: %s', target_counter_values)
+                # need A REAL DUPLICATE UNCONNECTED SEPARATE COPY!!
+                item.state = TARGET_STATE.UPTODATE
+                item.target_counter_values = copy.deepcopy(
+                    target_counter_values)
+                item.target_box = target_box
+            else:
+                LOGGER.debug(
+                    '   -> duplicate anchor definition: %s' % anchor_name)
+        else:
+            LOGGER.debug('    -> achor %s not targetted' % anchor_name)
+
+    def check_peding_targets(self):
+        if not self.had_peding_targets:
+            return
+        LOGGER.info('Step 4.3 Reparsing pending targets')
+        self.had_peding_targets = False
+        for key, item in self.items.items():
+            # create the pending content boxes NOW
+            # UNDEFINED items never hava a `parse_again` function
+            for abox, func in item.pending_boxes.items():
+                func()
+
+
+TARGET_COLLECTOR = _TargetCollector()
--- a/weasyprint/css/validation.py
+++ b/weasyprint/css/validation.py
@ -733,7 +733,10 @@ def clip(token):

@validator(wants_base_url=True)
 def content(tokens, base_url):
-    """``content`` property validation."""
+    """``content`` property validation.
+    TODO: should become a @comma_separated_list to validate
+          CSS3 <content-replacement>
+    """
    keyword = get_single_keyword(tokens)
    if keyword in ('normal', 'none'):
        return keyword
@ -748,38 +751,8 @@ def validate_content_token(base_url, token):
    Return (type, content) or False for invalid tokens.

    """
-    quote_type = CONTENT_QUOTE_KEYWORDS.get(get_keyword(token))
-    if quote_type is not None:
-        return ('QUOTE', quote_type)
-
-    type_ = token.type
-    if type_ == 'string':
-        return ('STRING', token.value)
-    if type_ == 'url':
-        return ('URI', safe_urljoin(base_url, token.value))
-    function = parse_function(token)
-    if function:
-        name, args = function
-        prototype = (name, [a.type for a in args])
-        args = [getattr(a, 'value', a) for a in args]
-        if prototype == ('attr', ['ident']):
-            return (name, args[0])
-        elif prototype in (('counter', ['ident']),
-                           ('counters', ['ident', 'string'])):
-            args.append('decimal')
-            return (name, args)
-        elif prototype in (('counter', ['ident', 'ident']),
-                           ('counters', ['ident', 'string', 'ident'])):
-            style = args[-1]
-            if style in ('none', 'decimal') or style in counters.STYLES:
-                return (name, args)
-        elif prototype in (('string', ['ident']),
-                           ('string', ['ident', 'ident'])):
-            if len(args) > 1:
-                args[1] = args[1].lower()
-                if args[1] not in ('first', 'start', 'last', 'first-except'):
-                    raise InvalidValues()
-            return (name, args)
+    return validate_content_list_token(
+        base_url, token, for_content_box=True)


 def parse_function(function_token):
@ -1614,10 +1587,11 @@ def lang(token):
        return ('string', token.value)


-@validator(unstable=True)
-def bookmark_label(tokens):
+@validator(unstable=True, wants_base_url=True)
+def bookmark_label(tokens, base_url):
    """Validation for ``bookmark-label``."""
-    parsed_tokens = tuple(validate_content_list_token(v) for v in tokens)
+    parsed_tokens = tuple(validate_content_list_token(
+        base_url, v, for_content_box=False) for v in tokens)
    if None not in parsed_tokens:
        return parsed_tokens

@ -1634,50 +1608,212 @@ def bookmark_level(token):
        return 'none'


-@validator(unstable=True)
+@validator(unstable=True, wants_base_url=True)
@comma_separated_list
-def string_set(tokens):
+def string_set(tokens, base_url):
    """Validation for ``string-set``."""
    if len(tokens) >= 2:
        var_name = get_keyword(tokens[0])
        parsed_tokens = tuple(
-            validate_content_list_token(v) for v in tokens[1:])
+            validate_content_list_token(
+                base_url, v, for_content_box=False) for v in tokens[1:])
        if None not in parsed_tokens:
            return (var_name, parsed_tokens)
    elif tokens and tokens[0].value == 'none':
        return 'none'


-def validate_content_list_token(token):
+def validate_content_list_token(base_url, token, for_content_box):
    """Validation for a single token of <content-list> used in GCPM.
+    Not really.
+    GCPM <content-list> =
+        [ <string> | contents | <image> | <quote> | <target> | <leader()> ]+
+    (Draft, 24 January 2018. Really a DRAFT. Not an RFC. Not a SPEC.
+    BTW: The current Draft GCPM ``string-set`` value =
+        none | [ <custom-ident> <string>+ ]#
+
+    So. This is the validation for tokens that make sense in
+    css properties ``string-set``, ``bookmark-label`` and  ``content``:
+
+    <modified-content-list> = [
+      <string> | attr() | <counter> | <target> |
+      <content> |
+      url() | <quote> | string() | leader()
+    ]+
+
+    :param for_content_box: controls which tokens are valid
+
+    Valid tokens when ``for_content_box`` ==
+
+    - True (called from/for css property 'content':
+
+      <string> | attr() | <counter> | <target> |
+      url() | <quote> | string() | leader()
+
+      The final decision whether a token is valid is the job of
+      computed_values.content()
+
+    - False (called from/for css properties 'string-set', 'bookmark-label':
+      <string> | attr() | <counter> | <target> |
+      <content>

    Return (type, content) or False for invalid tokens.
-
    """
+
+    def validate_target_token(token):
+        """ validate first parameter of ``target-*()``-token
+            returns ['attr', '<attrname>' ]
+                 or ['STRING', '<anchorname>'] when valid
+            evaluation of the anchorname is job of compute()
+        """
+        # TODO: what about ``attr(href url)`` ?
+        if isinstance(token, str):
+            # url() or "string" given
+            # verify #anchor is done in compute()
+            # if token.value.startswith('#'):
+            return ['STRING', token]
+        # parse_function takes token.type for granted!
+        if not hasattr(token, 'type'):
+            return
+        function = parse_function(token)
+        if function:
+            name, args = function
+            params = [a.type for a in args]
+            values = [getattr(a, 'value', a) for a in args]
+            if name == 'attr' and params == ['ident']:
+                return [name, values[0]]
+
+    if for_content_box:
+        quote_type = CONTENT_QUOTE_KEYWORDS.get(get_keyword(token))
+        if quote_type is not None:
+            return ('QUOTE', quote_type)
+    else:
+        if get_keyword(token) == 'contents':
+            return ('content', 'text')
    type_ = token.type
    if type_ == 'string':
        return ('STRING', token.value)
+    if for_content_box:
+        if type_ == 'url':
+            return ('URI', safe_urljoin(base_url, token.value))
    function = parse_function(token)
-    if function:
-        name, args = function
-        prototype = (name, tuple(a.type for a in args))
-        args = tuple(getattr(a, 'value', a) for a in args)
-        if prototype == ('attr', ('ident',)):
+    if not function:
+        # to pass unit test `test_boxes.test_before_after`
+        # the log string must contain "invalid value"
+        raise InvalidValues('invalid value/unsupported token ´%s\´' % (token,))
+
+    name, args = function
+    # known functions in 'content', 'string-set' and 'bookmark-label':
+    valid_functions = ['attr',
+                       'counter', 'counters',
+                       'target-counter', 'target-counters', 'target-text']
+    # 'content'
+    if for_content_box:
+        valid_functions += ['string',
+                            'leader']
+    else:
+        valid_functions += ['content']
+    unsupported_functions = ['leader']
+    if name not in valid_functions:
+        # to pass unit test `test_boxes.test_before_after`
+        # the log string must contain "invalid value"
+        raise InvalidValues('invalid value: function `%s()`' % (name))
+    if name in unsupported_functions:
+        # suppress -- not (yet) implemented, no error
+        LOGGER.warn('\'%s()\' not (yet) supported', name)
+        return ('STRING', '')
+
+    prototype = (name, [a.type for a in args])
+    args = [getattr(a, 'value', a) for a in args]
+    if prototype == ('attr', ['ident']):
+        # TODO: what about ``attr(href url)`` ?
+        return (name, args[0])
+    elif prototype in (('content', []), ('content', ['ident', ])):
+        if not args:
+            return (name, 'text')
+        elif args[0] in ('text', 'after', 'before', 'first-letter'):
            return (name, args[0])
-        elif prototype in (('content', ()), ('content', ('ident',))):
-            if not args:
-                return (name, 'text')
-            elif args[0] in ('text', 'after', 'before', 'first-letter'):
-                return (name, args[0])
-        elif prototype in (('counter', ('ident',)),
-                           ('counters', ('ident', 'string'))):
-            args += ('decimal',)
+    elif prototype in (('counter', ['ident']),
+                       ('counters', ['ident', 'string'])):
+        args.append('decimal')
+        return (name, args)
+    elif prototype in (('counter', ['ident', 'ident']),
+                       ('counters', ['ident', 'string', 'ident'])):
+        style = args[-1]
+        if style in ('none', 'decimal') or style in counters.STYLES:
            return (name, args)
-        elif prototype in (('counter', ('ident', 'ident')),
-                           ('counters', ('ident', 'string', 'ident'))):
+    elif prototype in (('string', ['ident']),
+                       ('string', ['ident', 'ident'])):
+        if len(args) > 1:
+            args[1] = args[1].lower()
+            if args[1] not in ('first', 'start', 'last', 'first-except'):
+                raise InvalidValues()
+        return (name, args)
+    # target-counter() = target-counter(
+    #    [ <string> | <url> ] , <custom-ident> ,
+    #    <counter-style>? )
+    elif name == 'target-counter':
+        if prototype in ((name, ['url', 'ident']),
+                         (name, ['url', 'ident', 'ident']),
+                         (name, ['string', 'ident']),
+                         (name, ['string', 'ident', 'ident']),
+                         (name, ['function', 'ident']),
+                         (name, ['function', 'ident', 'ident'])):
+            # default style
+            if len(args) == 2:
+                args.append('decimal')
+            # accept "#anchorname" and attr(x)
+            retval = validate_target_token(args.pop(0))
+            if retval is None:
+                raise InvalidValues()
            style = args[-1]
            if style in ('none', 'decimal') or style in counters.STYLES:
-                return (name, args)
+                return (name, retval + args)
+    # target-counters() = target-counters(
+    #    [ <string> | <url> ] , <custom-ident> , <string> ,
+    #    <counter-style>? )
+    elif name == 'target-counters':
+        if prototype in ((name, ['url', 'ident', 'string']),
+                         (name, ['url', 'ident', 'string', 'ident']),
+                         (name, ['string', 'ident', 'string']),
+                         (name, ['string', 'ident', 'string', 'ident']),
+                         (name, ['function', 'ident', 'string']),
+                         (name, ['function', 'ident', 'string', 'ident'])):
+            # default style
+            if len(args) == 3:
+                args.append('decimal')
+            # accept "#anchorname" and attr(x)
+            retval = validate_target_token(args.pop(0))
+            if retval is None:
+                raise InvalidValues()
+            style = args[-1]
+            if style in ('none', 'decimal') or style in counters.STYLES:
+                return (name, retval + args)
+    # target-text() = target-text(
+    #    [ <string> | <url> ] ,
+    #    [ content | before | after | first-letter ]? )
+    elif name == 'target-text':
+        if prototype in ((name, ['url']),
+                         (name, ['url', 'ident']),
+                         (name, ['string']),
+                         (name, ['string', 'ident']),
+                         (name, ['function']),
+                         (name, ['function', 'ident'])):
+            if len(args) == 1:
+                args.append('content')
+            # accept "#anchorname" and attr(x)
+            retval = validate_target_token(args.pop(0))
+            if retval is None:
+                raise InvalidValues()
+            style = args[-1]
+            # hint: the syntax isn't stable yet!
+            if style in ('content', 'after', 'before', 'first-letter'):
+                # build.TEXT_CONTENT_EXTRACTORS needs 'text'
+                # TODO: should we define
+                # TEXT_CONTENT_EXTRACTORS['content'] == box_text ?
+                if style == 'content':
+                    args[-1] = 'text'
+                return (name, retval + args)


@validator(unstable=True)
--- a/weasyprint/document.py
+++ b/weasyprint/document.py
@ -16,6 +16,7 @@ import cairocffi as cairo

 from . import CSS
 from .css import get_all_computed_styles
+from .css.targets import TARGET_COLLECTOR
 from .draw import draw_page, stacked
 from .fonts import FontConfiguration
 from .formatting_structure import boxes
@ -295,6 +296,15 @@ class Document(object):
    @classmethod
    def _render(cls, html, stylesheets, enable_hinting,
                presentational_hints=False, font_config=None):
+        # new Document needs fresh Target-Collection
+        # reset the TARGET_COLLECTOR before the Document's styles are parsed
+        # TODO: call reset at the end of this function to cleanup?
+        # - reset_target_collector Yes/No could be a useful option for users
+        #   who want to combine several documents...
+        # - in the future each Document should create its own TargetCollector
+        #   and hand it down to formatting_structure / pages / maybe css
+        TARGET_COLLECTOR.reset()
+
        if font_config is None:
            font_config = FontConfiguration()
        page_rules = []
--- a/weasyprint/formatting_structure/build.py
+++ b/weasyprint/formatting_structure/build.py
@ -21,6 +21,8 @@ import tinycss2.color3
 from . import boxes, counters
 from .. import html
 from ..css import properties
+from ..css.targets import TARGET_COLLECTOR, TARGET_STATE
+from ..logger import LOGGER

 # Maps values of the ``display`` CSS property to box types.
 BOX_TYPE_FROM_DISPLAY = {
@ -46,6 +48,13 @@ BOX_TYPE_FROM_DISPLAY = {
 def build_formatting_structure(element_tree, style_for, get_image_from_uri,
                               base_url):
    """Build a formatting structure (box tree) from an element tree."""
+
+    LOGGER.info('Step 4.1 - Verifying collected targets')
+    # BTW: this step is *not* required. Dont't think it speeds up things a lot
+    # by tagging UNDEFINED targets in advance
+    TARGET_COLLECTOR.verify_collection()
+    LOGGER.info('Step 4.2 - Building basic boxes')
+
    box_list = element_to_box(
        element_tree, style_for, get_image_from_uri, base_url)
    if box_list:
@ -63,6 +72,10 @@ def build_formatting_structure(element_tree, style_for, get_image_from_uri,
            return style
        box, = element_to_box(
            element_tree, root_style_for, get_image_from_uri, base_url)
+
+    TARGET_COLLECTOR.check_peding_targets()
+    # state now: no more pending targeds in pseudo-element's content boxes
+
    box.is_for_root_element = True
    # If this is changed, maybe update weasy.layout.pages.make_margin_boxes()
    process_whitespace(box)
@ -144,6 +157,13 @@ def element_to_box(element, style_for, get_image_from_uri, base_url,

    children.extend(before_after_to_box(
        element, 'before', state, style_for, get_image_from_uri))
+
+    # collect anchor's counter_values, maybe it's a target.
+    # to get the spec-conform counter_valuse we must do it here,
+    # after the ::before is parsed and befor the ::after is
+    if style['anchor']:
+        TARGET_COLLECTOR.store_target(style['anchor'], counter_values, box)
+
    text = element.text
    if text:
        children.append(boxes.TextBox.anonymous_from(box, text))
@ -168,6 +188,7 @@ def element_to_box(element, style_for, get_image_from_uri, base_url,
            counter_values.pop(name)

    box.children = children
+    # calculate string-set and bookmark-label
    set_content_lists(element, box, style, counter_values)

    # Specific handling for the element. (eg. replaced element)
@ -195,6 +216,10 @@ def before_after_to_box(element, pseudo_type, state, style_for,

    quote_depth, counter_values, _counter_scopes = state
    update_counters(state, style)
+
+    # pseudo-elements can't be anchors, no need to call
+    # TARGET_COLLECTOR.store_target(...)
+
    children = []
    if display == 'list-item':
        children.extend(add_box_marker(
@ -202,25 +227,57 @@ def before_after_to_box(element, pseudo_type, state, style_for,
    children.extend(content_to_boxes(
        style, box, quote_depth, counter_values, get_image_from_uri))

+    # content_to_boxes detected an UNDEFINED target, discard the box
+    if style['content'] == 'none':
+        return
+
    box.children = children
    yield box


-def content_to_boxes(style, parent_box, quote_depth, counter_values,
-                     get_image_from_uri, context=None, page=None):
-    """Takes the value of a ``content`` property and yield boxes."""
+def compute_content_list(return_a_string,
+                         content_list, parent_box, counter_values,
+                         parse_again_func,
+                         get_image_from_uri=None,
+                         quote_depth=None, quote_style=None,
+                         context=None, page=None):
+    """
+    Compute and return the string or the boxes corresponding
+    to the content_list.
+
+    :param return_a_string:
+        True for string-set-string and bookmark-label,
+        otherwise (content) a list of anonymous InlineBox(es) is returned
+    :param parse_again_func:
+        fnction to compute the content_list again
+        when TARGET_COLLECTOR.lookup_target() detected a TARGET_STATE.PENDING
+
+        build_formatting_structure calls
+        TARGET_COLLECTOR.check_pending_targets
+        after the first pass to do required reparsing
+    """
+    boxlist = []
    texts = []
-    for type_, value in style['content']:
+    for type_, value in content_list:
        if type_ == 'STRING':
            texts.append(value)
-        elif type_ == 'URI':
+        elif type_ == 'URI' and not return_a_string and \
+                get_image_from_uri is not None:
            image = get_image_from_uri(value)
            if image is not None:
                text = ''.join(texts)
                if text:
-                    yield boxes.TextBox.anonymous_from(parent_box, text)
+                    boxlist.append(
+                        boxes.TextBox.anonymous_from(parent_box, text))
                texts = []
-                yield boxes.InlineReplacedBox.anonymous_from(parent_box, image)
+                boxlist.append(
+                    boxes.InlineReplacedBox.anonymous_from(parent_box, image))
+        elif type_ == 'content' and return_a_string:
+            added_text = TEXT_CONTENT_EXTRACTORS[value](parent_box)
+            # Simulate the step of white space processing
+            # (normally done during the layout)
+            added_text = added_text.strip()
+            texts.append(added_text)
        elif type_ == 'counter':
            counter_name, counter_style = value
            counter_value = counter_values.get(counter_name, [0])[-1]
@ -232,49 +289,133 @@ def content_to_boxes(style, parent_box, quote_depth, counter_values,
                for counter_value in counter_values.get(counter_name, [0])
            ))
        elif type_ == 'string' and context is not None and page is not None:
+            # string() is only valid in @page context
            text = context.get_string_set_for(page, *value)
            texts.append(text)
-        else:
-            assert type_ == 'QUOTE'
+        elif type_ == 'target-counter':
+            target_name, counter_name, counter_style = value
+            lookup_target = TARGET_COLLECTOR.lookup_target(
+                target_name, parent_box, parse_again_func)
+            if lookup_target.state == TARGET_STATE.UPTODATE:
+                counter_value = lookup_target.target_counter_values.get(
+                    counter_name, [0])[-1]
+                texts.append(counters.format(counter_value, counter_style))
+            else:
+                texts = []
+                break
+        elif type_ == 'target-counters':
+            target_name, counter_name, separator, counter_style = value
+            lookup_target = TARGET_COLLECTOR.lookup_target(
+                target_name, parent_box, parse_again_func)
+            if lookup_target.state == TARGET_STATE.UPTODATE:
+                target_counter_values = lookup_target.target_counter_values
+                texts.append(separator.join(
+                    counters.format(counter_value, counter_style)
+                    for counter_value in target_counter_values.get(
+                        counter_name, [0])
+                ))
+            else:
+                texts = []
+                break
+        elif type_ == 'target-text':
+            target_name, text_style = value
+            lookup_target = TARGET_COLLECTOR.lookup_target(
+                target_name, parent_box, parse_again_func)
+            if lookup_target.state == TARGET_STATE.UPTODATE:
+                target_box = lookup_target.target_box
+                text = TEXT_CONTENT_EXTRACTORS[text_style](target_box)
+                # Simulate the step of white space processing
+                # (normally done during the layout)
+                texts.append(text.strip())
+            else:
+                texts = []
+                break
+        elif type_ == 'QUOTE' and not return_a_string and \
+                quote_depth is not None and quote_style is not None:
            is_open, insert = value
            if not is_open:
                quote_depth[0] = max(0, quote_depth[0] - 1)
            if insert:
-                open_quotes, close_quotes = style['quotes']
+                open_quotes, close_quotes = quote_style
                quotes = open_quotes if is_open else close_quotes
                texts.append(quotes[min(quote_depth[0], len(quotes) - 1)])
            if is_open:
                quote_depth[0] += 1
+        else:
+            # TODO: in previous versions an AssertionError was raised!
+            pass
    text = ''.join(texts)
+    if return_a_string:
+        return text
    if text:
-        yield boxes.TextBox.anonymous_from(parent_box, text)
+        boxlist.append(boxes.TextBox.anonymous_from(parent_box, text))
+    return boxlist


-def compute_content_list_string(element, box, counter_values, content_list):
-    """Compute the string corresponding to the content-list."""
-    string = ''
-    for type_, value in content_list:
-        if type_ == 'STRING':
-            string += value
-        elif type_ == 'content':
-            added_text = TEXT_CONTENT_EXTRACTORS[value](box)
-            # Simulate the step of white space processing
-            # (normally done during the layout)
-            added_text = added_text.strip()
-            string += added_text
-        elif type_ == 'counter':
-            counter_name, counter_style = value
-            counter_value = counter_values.get(counter_name, [0])[-1]
-            string += counters.format(counter_value, counter_style)
-        elif type_ == 'counters':
-            counter_name, separator, counter_style = value
-            string += separator.join(
-                counters.format(counter_value, counter_style)
-                for counter_value
-                in counter_values.get(counter_name, [0]))
-        elif type_ == 'attr':
-            string += element.get(value, '')
-    return string
+def content_to_boxes(style, parent_box, quote_depth, counter_values,
+                     get_image_from_uri, context=None, page=None):
+    """Takes the value of a ``content`` property and returns boxes."""
+    def parse_again():
+        """
+        closure to parse the parent_boxes children all again
+        when TARGET_COLLECTOR.lookup_target() detected a TARGET_STATE.PENDING,
+        Thx to closure no need to explicitly copy.deepcopy the whole stuff,
+        """
+        local_children = []
+        if style['display'] == 'list-item':
+            local_children.extend(add_box_marker(
+                parent_box, counter_values, get_image_from_uri))
+        local_children.extend(content_to_boxes(
+            style, parent_box,
+            quote_depth, counter_values,
+            get_image_from_uri))
+        parent_box.children = local_children
+
+    # Can't use `yield`! Must `return` the boxes otherwise set_content_lists,
+    # calling compute_content_list for `contents()`, will fail
+    return compute_content_list(
+        False,
+        style['content'],
+        parent_box, counter_values,
+        parse_again,
+        get_image_from_uri, quote_depth, style['quotes'],
+        context, page)
+
+
+def compute_string_set_string(box, string_name, content_list, counter_values):
+    """For ``string-set`` property:
+    Parses the content-list value of the string named `string_name`
+    and append the resulting string to the boxes string_set
+    """
+    def parse_again():
+        """
+        closure to parse the string-set-string value all again
+        when TARGET_COLLECTOR.lookup_target() detected a TARGET_STATE.PENDING
+        """
+        compute_string_set_string(
+            box, string_name, content_list, counter_values)
+
+    s = compute_content_list(
+        True,
+        content_list, box,
+        counter_values,
+        parse_again)
+    if s:
+        box.string_set.append((string_name, s))
+
+
+def compute_bookmark_label(box, content_list, counter_values):
+    """For ``bookmark-label`` property:
+    Parses the content-list value and put it in the boxes .bookmark_label
+    """
+    def parse_again():
+        compute_bookmark_label(
+            box, content_list, counter_values)
+
+    box.bookmark_label = compute_content_list(
+        True,
+        content_list, box, counter_values,
+        parse_again)


 def set_content_lists(element, box, style, counter_values):
@ -282,20 +423,17 @@ def set_content_lists(element, box, style, counter_values):

    These content-lists are used in GCPM properties like ``string-set`` and
    ``bookmark-label``.
-
    """
-    string_set = []
+    box.string_set = []
    if style['string_set'] != 'none':
        for i, (string_name, string_values) in enumerate(style['string_set']):
-            string_set.append((string_name, compute_content_list_string(
-                element, box, counter_values, string_values)))
-    box.string_set = string_set
-
+            compute_string_set_string(
+                box, string_name, string_values, counter_values)
    if style['bookmark_label'] == 'none':
        box.bookmark_label = ''
    else:
-        box.bookmark_label = compute_content_list_string(
-            element, box, counter_values, style['bookmark_label'])
+        compute_bookmark_label(
+            box, style['bookmark_label'], counter_values)


 def update_counters(state, style):
--- a/weasyprint/tests/test_css_validation.py
+++ b/weasyprint/tests/test_css_validation.py
@ -638,24 +638,24 @@ def test_string_set():
    assert expand_to_dict('string-set: test attr(class)') == {
        'string_set': (('test', (('attr', 'class'),)),)}
    assert expand_to_dict('string-set: test counter(count)') == {
-        'string_set': (('test', (('counter', ('count', 'decimal')),)),)}
+        'string_set': (('test', (('counter', ['count', 'decimal']),)),)}
    assert expand_to_dict(
        'string-set: test counter(count, upper-roman)') == {
            'string_set': (
-                ('test', (('counter', ('count', 'upper-roman')),)),)}
+                ('test', (('counter', ['count', 'upper-roman']),)),)}
    assert expand_to_dict('string-set: test counters(count, ".")') == {
-        'string_set': (('test', (('counters', ('count', '.', 'decimal')),)),)}
+        'string_set': (('test', (('counters', ['count', '.', 'decimal']),)),)}
    assert expand_to_dict(
        'string-set: test counters(count, ".", upper-roman)') == {
            'string_set': (
-                ('test', (('counters', ('count', '.', 'upper-roman')),)),)}
+                ('test', (('counters', ['count', '.', 'upper-roman']),)),)}
    assert expand_to_dict(
        'string-set: test content(text) "string" '
        'attr(title) attr(title) counter(count)') == {
            'string_set': (('test', (
                ('content', 'text'), ('STRING', 'string'),
                ('attr', 'title'), ('attr', 'title'),
-                ('counter', ('count', 'decimal')),)),)}
+                ('counter', ['count', 'decimal']))),)}

    assert_invalid('string-set: test')
    assert_invalid('string-set: test test1')