mirror of
https://github.com/Kozea/WeasyPrint.git
synced 2024-10-05 08:27:22 +03:00
Merge branch 'target-counter' of https://github.com/Tontyna/WeasyPrint into target-collector
This commit is contained in:
commit
1ebd36e83a
@ -9,10 +9,13 @@
|
||||
:license: BSD, see LICENSE for details.
|
||||
|
||||
"""
|
||||
from urllib.parse import unquote
|
||||
|
||||
from .. import text
|
||||
from ..logger import LOGGER
|
||||
from ..urls import get_link_attribute
|
||||
from .properties import INITIAL_VALUES, Dimension
|
||||
from .targets import TARGET_COLLECTOR
|
||||
|
||||
ZERO_PIXELS = Dimension(0, 'px')
|
||||
|
||||
@ -399,16 +402,131 @@ def column_gap(computer, name, value):
|
||||
return length(computer, name, value, pixels_only=True)
|
||||
|
||||
|
||||
@register_computer('content')
|
||||
def content(computer, name, values):
|
||||
"""Compute the ``content`` property."""
|
||||
def _toSelector(el, pseudo_type):
|
||||
"""convenience function"""
|
||||
elname = type(el).__name__
|
||||
if elname == 'PageType':
|
||||
return ('@page%s %s%s%s %s ' % (
|
||||
' ' + el.name if el.name else '',
|
||||
':' + el.side if el.side else '',
|
||||
':blank' if el.blank else '',
|
||||
':first' if el.first else '',
|
||||
pseudo_type if pseudo_type else ''
|
||||
)).rstrip()
|
||||
elif elname == 'Element':
|
||||
return '%s%s' % (
|
||||
el.tag,
|
||||
'::' + pseudo_type if pseudo_type else ''
|
||||
)
|
||||
else:
|
||||
return '<%s>' % (
|
||||
('%s %s' % (elname, pseudo_type)).rstrip())
|
||||
|
||||
|
||||
@register_computer('string-set')
|
||||
def string_set(computer, name, values):
|
||||
"""Compute the <content-lists> of the ``string-set`` property."""
|
||||
# never happens, but...prudence is the better part of valor
|
||||
if values in ('normal', 'none'):
|
||||
return values
|
||||
if type(computer.element).__name__ != 'Element' or computer.pseudo_type:
|
||||
LOGGER.debug(
|
||||
'property `%s` discarded: %s in selector `%s`.',
|
||||
name,
|
||||
'Not a real element',
|
||||
_toSelector(computer.element, computer.pseudo_type))
|
||||
return 'none'
|
||||
return tuple(
|
||||
(string_name, content(computer, name, string_values))
|
||||
for i, (string_name, string_values) in enumerate(values))
|
||||
|
||||
|
||||
@register_computer('bookmark-label')
|
||||
@register_computer('content')
|
||||
def content(computer, name, values):
|
||||
"""Compute the <content-list>s of ``content``,
|
||||
``bookmark-label`` and ``string-set`` property."""
|
||||
|
||||
class ComputedContentError(ValueError):
|
||||
"""Invalid or unsupported values for a known CSS property."""
|
||||
|
||||
def computed_content_error(level, reason):
|
||||
getattr(LOGGER, level)(
|
||||
'property `%s` discarded: %s in selector `%s`.',
|
||||
name,
|
||||
reason,
|
||||
_toSelector(computer.element, computer.pseudo_type)
|
||||
)
|
||||
|
||||
def parse_target_type(type_, values):
|
||||
if type(computer.element).__name__ != 'Element':
|
||||
raise ComputedContentError('\'%s\' not (yet) supported' % (type_,))
|
||||
# values = ['STRING', <anchorname>, ...]
|
||||
# or ['attr', <attrname>, ... ]
|
||||
if values[0] == 'attr':
|
||||
attrname = values[1]
|
||||
href = computer.element.get(attrname, '')
|
||||
else:
|
||||
href = values[1]
|
||||
# [spec](https://www.w3.org/TR/css-content-3/#target-counter)
|
||||
# says:
|
||||
# > If there’s no fragment, if the ID referenced isn’t there,
|
||||
# > or if the URL points to an outside document,
|
||||
# > the user agent must treat that as an error.
|
||||
if href == '' or href == '#':
|
||||
raise ComputedContentError('Empty anchor name in %s' % (type_,))
|
||||
if not href.startswith('#'):
|
||||
raise ComputedContentError(
|
||||
'No %s for external URI reference "%s"' % (type_, href))
|
||||
href = unquote(href[1:])
|
||||
TARGET_COLLECTOR.collect_computed_target(href)
|
||||
return [href] + values[2:]
|
||||
|
||||
if values in ('normal', 'none'):
|
||||
return values
|
||||
|
||||
if name == 'content':
|
||||
# [CSS3 spec](https://www.w3.org/TR/css-content-3/#content-property)
|
||||
# says:
|
||||
# > 'content' applies to:
|
||||
# > ::before, ::after, ::marker, and page margin boxes.
|
||||
# > Image and url values can apply to all elements.
|
||||
if not computer.pseudo_type:
|
||||
computed_content_error(
|
||||
'debug',
|
||||
'Not a pseudo-element')
|
||||
return 'none'
|
||||
else:
|
||||
# ignore string-set, bookmark-label unless in a *real* element
|
||||
if type(computer.element).__name__ != 'Element' \
|
||||
or computer.pseudo_type:
|
||||
computed_content_error(
|
||||
'debug',
|
||||
'Not a real element')
|
||||
return 'none'
|
||||
|
||||
target_checks = ['target-counter', 'target-counters', 'target-text']
|
||||
try:
|
||||
# TODO: catch `string()` when not in @page-margin
|
||||
return tuple(
|
||||
('STRING', computer.element.get(value, ''))
|
||||
if type_ == 'attr' else (type_, value)
|
||||
if type_ == 'attr' else (
|
||||
(type_, parse_target_type(type_, value))
|
||||
if type_ in target_checks else (type_, value)
|
||||
)
|
||||
for type_, value in values)
|
||||
except ComputedContentError as exc:
|
||||
computed_content_error(
|
||||
'warning',
|
||||
exc.args[0] if exc.args and exc.args[0] else 'invalid content')
|
||||
return 'none'
|
||||
except AttributeError as exc:
|
||||
# attr() in @page-'element'
|
||||
# e.g.: 'PageType' object has no attribute 'get'
|
||||
computed_content_error(
|
||||
'warning',
|
||||
exc.args[0] if exc.args and exc.args[0] else 'invalid content')
|
||||
return 'none'
|
||||
|
||||
|
||||
@register_computer('display')
|
||||
@ -495,7 +613,9 @@ def anchor(computer, name, values):
|
||||
"""Compute the ``anchor`` property."""
|
||||
if values != 'none':
|
||||
_, key = values
|
||||
return computer.element.get(key) or None
|
||||
anchor_name = computer.element.get(key) or None
|
||||
TARGET_COLLECTOR.collect_anchor(anchor_name)
|
||||
return anchor_name
|
||||
|
||||
|
||||
@register_computer('link')
|
||||
|
170
weasyprint/css/targets.py
Normal file
170
weasyprint/css/targets.py
Normal file
@ -0,0 +1,170 @@
|
||||
"""
|
||||
weasyprint.formatting_structure.targets
|
||||
-------------------------------------
|
||||
|
||||
An attempt to implement target-counter, target-counters and target-text
|
||||
|
||||
The TARGET_COLLECTOR is a structure providing required targets'
|
||||
counter_values and stuff needed to build PENDING targets later,
|
||||
when all targetted anchors have been 'layouted'
|
||||
|
||||
:copyright: Copyright 2018 Simon Sapin and contributors, see AUTHORS.
|
||||
:license: BSD, see LICENSE for details.
|
||||
|
||||
"""
|
||||
|
||||
import copy # deepcopy needed!
|
||||
|
||||
from ..logger import LOGGER
|
||||
|
||||
|
||||
# not shure what's the Python way to create consts, maybe a namedtuple?
|
||||
# thx [Jon Betts](https://stackoverflow.com/a/23274028)
|
||||
class _STATE(object):
|
||||
"""constants for target states"""
|
||||
PENDING = 0
|
||||
UPTODATE = 1
|
||||
UNDEFINED = 2
|
||||
__stateToName = {
|
||||
PENDING: 'PENDING',
|
||||
UPTODATE: 'UPTODATE',
|
||||
UNDEFINED: 'UNDEFINED',
|
||||
}
|
||||
|
||||
def __setattr__(self, *_):
|
||||
"""prohibit changes"""
|
||||
pass
|
||||
|
||||
def name(self, state):
|
||||
""" return human readable state-name"""
|
||||
return self.__stateToName.get(state, 'Invalid state')
|
||||
|
||||
|
||||
TARGET_STATE = _STATE()
|
||||
|
||||
|
||||
class TargetLookupItem(object):
|
||||
"""item collected by the TargetColector"""
|
||||
|
||||
def __init__(self, state=TARGET_STATE.PENDING):
|
||||
self.state = state
|
||||
# required by target-counter and target-counters
|
||||
self.target_counter_values = {}
|
||||
# neede for target-text via TEXT_CONTENT_EXTRACTORS
|
||||
self.target_box = None
|
||||
# stuff for PENDING targets
|
||||
self.pending_boxes = {}
|
||||
|
||||
|
||||
class _TargetCollector(object):
|
||||
"""collect and provide stuff for css content with `target-*`"""
|
||||
|
||||
def __init__(self):
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self.had_peding_targets = False
|
||||
self.existing_anchors = []
|
||||
self.items = {}
|
||||
|
||||
def _addtarget(self, anchor_name):
|
||||
return self.items.setdefault(anchor_name, TargetLookupItem())
|
||||
|
||||
def collect_anchor(self, anchor_name):
|
||||
"""
|
||||
stores `anchor_name` in `existing_anchors`
|
||||
should be called by computed_values.anchor()
|
||||
"""
|
||||
if anchor_name and isinstance(anchor_name, str):
|
||||
if anchor_name in self.existing_anchors:
|
||||
LOGGER.warning(' ! anchor redefined: %s', anchor_name)
|
||||
else:
|
||||
self.existing_anchors.append(anchor_name)
|
||||
LOGGER.debug(' + anchor added: "%s" ', anchor_name)
|
||||
|
||||
def collect_computed_target(self, anchor_name):
|
||||
"""
|
||||
stores a `computed` target's (internal!) anchor name,
|
||||
verified by computed_values.content()
|
||||
|
||||
anchor_name without '#' and already unquoted
|
||||
"""
|
||||
if anchor_name and isinstance(anchor_name, str):
|
||||
self._addtarget(anchor_name)
|
||||
|
||||
def verify_collection(self):
|
||||
"""obsolete function, only needed for testing"""
|
||||
LOGGER.debug('------- collected targets -------------')
|
||||
for key, item in self.items.items():
|
||||
# mark target names not in existing_anchors as UNDEFINED
|
||||
if key not in self.existing_anchors:
|
||||
item.state = TARGET_STATE.UNDEFINED
|
||||
LOGGER.debug('%s %s', key, TARGET_STATE.name(item.state))
|
||||
LOGGER.debug('------- existing anchors -------------')
|
||||
LOGGER.debug(self.existing_anchors)
|
||||
|
||||
def lookup_target(self, anchor_name, source_box, parse_again_function):
|
||||
""" called in content_to_boxes() when the source_box needs a target-*
|
||||
returns a TargetLookupItem
|
||||
if already filled by a previous anchor-element: UPDTODATE
|
||||
else: PENDING, we must parse the whole thing again
|
||||
"""
|
||||
item = self.items.get(
|
||||
anchor_name,
|
||||
TargetLookupItem(TARGET_STATE.UNDEFINED))
|
||||
LOGGER.debug(
|
||||
'lookup_target %s %s', anchor_name, TARGET_STATE.name(item.state))
|
||||
if item.state == TARGET_STATE.PENDING:
|
||||
if anchor_name not in self.existing_anchors:
|
||||
item.state = TARGET_STATE.UNDEFINED
|
||||
else:
|
||||
self.had_peding_targets = True
|
||||
LOGGER.debug(' -> still pending. Keep infos.')
|
||||
item.pending_boxes.setdefault(source_box, parse_again_function)
|
||||
|
||||
if item.state == TARGET_STATE.UNDEFINED:
|
||||
LOGGER.error(
|
||||
'content discarded: target points to undefined anchor "%s"',
|
||||
anchor_name)
|
||||
# feedback to invoker: discard the parent_box
|
||||
# at the moment it's `build.before_after_to_box()` which cares
|
||||
source_box.style['content'] = 'none'
|
||||
return item
|
||||
|
||||
def store_target(self, anchor_name, target_counter_values, target_box):
|
||||
"""
|
||||
called by every anchor-element in build.element_to_box
|
||||
if there is a PENDING TargetLookupItem, it is updated
|
||||
only previously collected anchor_names are stored
|
||||
"""
|
||||
item = self.items.get(anchor_name, None)
|
||||
if item:
|
||||
LOGGER.debug(
|
||||
'store_target? %s %s', anchor_name,
|
||||
TARGET_STATE.name(item.state))
|
||||
if item.state == TARGET_STATE.PENDING:
|
||||
LOGGER.debug(' -> update: %s', target_counter_values)
|
||||
# need A REAL DUPLICATE UNCONNECTED SEPARATE COPY!!
|
||||
item.state = TARGET_STATE.UPTODATE
|
||||
item.target_counter_values = copy.deepcopy(
|
||||
target_counter_values)
|
||||
item.target_box = target_box
|
||||
else:
|
||||
LOGGER.debug(
|
||||
' -> duplicate anchor definition: %s' % anchor_name)
|
||||
else:
|
||||
LOGGER.debug(' -> achor %s not targetted' % anchor_name)
|
||||
|
||||
def check_peding_targets(self):
|
||||
if not self.had_peding_targets:
|
||||
return
|
||||
LOGGER.info('Step 4.3 Reparsing pending targets')
|
||||
self.had_peding_targets = False
|
||||
for key, item in self.items.items():
|
||||
# create the pending content boxes NOW
|
||||
# UNDEFINED items never hava a `parse_again` function
|
||||
for abox, func in item.pending_boxes.items():
|
||||
func()
|
||||
|
||||
|
||||
TARGET_COLLECTOR = _TargetCollector()
|
@ -733,7 +733,10 @@ def clip(token):
|
||||
|
||||
@validator(wants_base_url=True)
|
||||
def content(tokens, base_url):
|
||||
"""``content`` property validation."""
|
||||
"""``content`` property validation.
|
||||
TODO: should become a @comma_separated_list to validate
|
||||
CSS3 <content-replacement>
|
||||
"""
|
||||
keyword = get_single_keyword(tokens)
|
||||
if keyword in ('normal', 'none'):
|
||||
return keyword
|
||||
@ -748,38 +751,8 @@ def validate_content_token(base_url, token):
|
||||
Return (type, content) or False for invalid tokens.
|
||||
|
||||
"""
|
||||
quote_type = CONTENT_QUOTE_KEYWORDS.get(get_keyword(token))
|
||||
if quote_type is not None:
|
||||
return ('QUOTE', quote_type)
|
||||
|
||||
type_ = token.type
|
||||
if type_ == 'string':
|
||||
return ('STRING', token.value)
|
||||
if type_ == 'url':
|
||||
return ('URI', safe_urljoin(base_url, token.value))
|
||||
function = parse_function(token)
|
||||
if function:
|
||||
name, args = function
|
||||
prototype = (name, [a.type for a in args])
|
||||
args = [getattr(a, 'value', a) for a in args]
|
||||
if prototype == ('attr', ['ident']):
|
||||
return (name, args[0])
|
||||
elif prototype in (('counter', ['ident']),
|
||||
('counters', ['ident', 'string'])):
|
||||
args.append('decimal')
|
||||
return (name, args)
|
||||
elif prototype in (('counter', ['ident', 'ident']),
|
||||
('counters', ['ident', 'string', 'ident'])):
|
||||
style = args[-1]
|
||||
if style in ('none', 'decimal') or style in counters.STYLES:
|
||||
return (name, args)
|
||||
elif prototype in (('string', ['ident']),
|
||||
('string', ['ident', 'ident'])):
|
||||
if len(args) > 1:
|
||||
args[1] = args[1].lower()
|
||||
if args[1] not in ('first', 'start', 'last', 'first-except'):
|
||||
raise InvalidValues()
|
||||
return (name, args)
|
||||
return validate_content_list_token(
|
||||
base_url, token, for_content_box=True)
|
||||
|
||||
|
||||
def parse_function(function_token):
|
||||
@ -1614,10 +1587,11 @@ def lang(token):
|
||||
return ('string', token.value)
|
||||
|
||||
|
||||
@validator(unstable=True)
|
||||
def bookmark_label(tokens):
|
||||
@validator(unstable=True, wants_base_url=True)
|
||||
def bookmark_label(tokens, base_url):
|
||||
"""Validation for ``bookmark-label``."""
|
||||
parsed_tokens = tuple(validate_content_list_token(v) for v in tokens)
|
||||
parsed_tokens = tuple(validate_content_list_token(
|
||||
base_url, v, for_content_box=False) for v in tokens)
|
||||
if None not in parsed_tokens:
|
||||
return parsed_tokens
|
||||
|
||||
@ -1634,50 +1608,212 @@ def bookmark_level(token):
|
||||
return 'none'
|
||||
|
||||
|
||||
@validator(unstable=True)
|
||||
@validator(unstable=True, wants_base_url=True)
|
||||
@comma_separated_list
|
||||
def string_set(tokens):
|
||||
def string_set(tokens, base_url):
|
||||
"""Validation for ``string-set``."""
|
||||
if len(tokens) >= 2:
|
||||
var_name = get_keyword(tokens[0])
|
||||
parsed_tokens = tuple(
|
||||
validate_content_list_token(v) for v in tokens[1:])
|
||||
validate_content_list_token(
|
||||
base_url, v, for_content_box=False) for v in tokens[1:])
|
||||
if None not in parsed_tokens:
|
||||
return (var_name, parsed_tokens)
|
||||
elif tokens and tokens[0].value == 'none':
|
||||
return 'none'
|
||||
|
||||
|
||||
def validate_content_list_token(token):
|
||||
def validate_content_list_token(base_url, token, for_content_box):
|
||||
"""Validation for a single token of <content-list> used in GCPM.
|
||||
Not really.
|
||||
GCPM <content-list> =
|
||||
[ <string> | contents | <image> | <quote> | <target> | <leader()> ]+
|
||||
(Draft, 24 January 2018. Really a DRAFT. Not an RFC. Not a SPEC.
|
||||
BTW: The current Draft GCPM ``string-set`` value =
|
||||
none | [ <custom-ident> <string>+ ]#
|
||||
|
||||
So. This is the validation for tokens that make sense in
|
||||
css properties ``string-set``, ``bookmark-label`` and ``content``:
|
||||
|
||||
<modified-content-list> = [
|
||||
<string> | attr() | <counter> | <target> |
|
||||
<content> |
|
||||
url() | <quote> | string() | leader()
|
||||
]+
|
||||
|
||||
:param for_content_box: controls which tokens are valid
|
||||
|
||||
Valid tokens when ``for_content_box`` ==
|
||||
|
||||
- True (called from/for css property 'content':
|
||||
|
||||
<string> | attr() | <counter> | <target> |
|
||||
url() | <quote> | string() | leader()
|
||||
|
||||
The final decision whether a token is valid is the job of
|
||||
computed_values.content()
|
||||
|
||||
- False (called from/for css properties 'string-set', 'bookmark-label':
|
||||
<string> | attr() | <counter> | <target> |
|
||||
<content>
|
||||
|
||||
Return (type, content) or False for invalid tokens.
|
||||
|
||||
"""
|
||||
|
||||
def validate_target_token(token):
|
||||
""" validate first parameter of ``target-*()``-token
|
||||
returns ['attr', '<attrname>' ]
|
||||
or ['STRING', '<anchorname>'] when valid
|
||||
evaluation of the anchorname is job of compute()
|
||||
"""
|
||||
# TODO: what about ``attr(href url)`` ?
|
||||
if isinstance(token, str):
|
||||
# url() or "string" given
|
||||
# verify #anchor is done in compute()
|
||||
# if token.value.startswith('#'):
|
||||
return ['STRING', token]
|
||||
# parse_function takes token.type for granted!
|
||||
if not hasattr(token, 'type'):
|
||||
return
|
||||
function = parse_function(token)
|
||||
if function:
|
||||
name, args = function
|
||||
params = [a.type for a in args]
|
||||
values = [getattr(a, 'value', a) for a in args]
|
||||
if name == 'attr' and params == ['ident']:
|
||||
return [name, values[0]]
|
||||
|
||||
if for_content_box:
|
||||
quote_type = CONTENT_QUOTE_KEYWORDS.get(get_keyword(token))
|
||||
if quote_type is not None:
|
||||
return ('QUOTE', quote_type)
|
||||
else:
|
||||
if get_keyword(token) == 'contents':
|
||||
return ('content', 'text')
|
||||
type_ = token.type
|
||||
if type_ == 'string':
|
||||
return ('STRING', token.value)
|
||||
if for_content_box:
|
||||
if type_ == 'url':
|
||||
return ('URI', safe_urljoin(base_url, token.value))
|
||||
function = parse_function(token)
|
||||
if function:
|
||||
name, args = function
|
||||
prototype = (name, tuple(a.type for a in args))
|
||||
args = tuple(getattr(a, 'value', a) for a in args)
|
||||
if prototype == ('attr', ('ident',)):
|
||||
if not function:
|
||||
# to pass unit test `test_boxes.test_before_after`
|
||||
# the log string must contain "invalid value"
|
||||
raise InvalidValues('invalid value/unsupported token ´%s\´' % (token,))
|
||||
|
||||
name, args = function
|
||||
# known functions in 'content', 'string-set' and 'bookmark-label':
|
||||
valid_functions = ['attr',
|
||||
'counter', 'counters',
|
||||
'target-counter', 'target-counters', 'target-text']
|
||||
# 'content'
|
||||
if for_content_box:
|
||||
valid_functions += ['string',
|
||||
'leader']
|
||||
else:
|
||||
valid_functions += ['content']
|
||||
unsupported_functions = ['leader']
|
||||
if name not in valid_functions:
|
||||
# to pass unit test `test_boxes.test_before_after`
|
||||
# the log string must contain "invalid value"
|
||||
raise InvalidValues('invalid value: function `%s()`' % (name))
|
||||
if name in unsupported_functions:
|
||||
# suppress -- not (yet) implemented, no error
|
||||
LOGGER.warn('\'%s()\' not (yet) supported', name)
|
||||
return ('STRING', '')
|
||||
|
||||
prototype = (name, [a.type for a in args])
|
||||
args = [getattr(a, 'value', a) for a in args]
|
||||
if prototype == ('attr', ['ident']):
|
||||
# TODO: what about ``attr(href url)`` ?
|
||||
return (name, args[0])
|
||||
elif prototype in (('content', []), ('content', ['ident', ])):
|
||||
if not args:
|
||||
return (name, 'text')
|
||||
elif args[0] in ('text', 'after', 'before', 'first-letter'):
|
||||
return (name, args[0])
|
||||
elif prototype in (('content', ()), ('content', ('ident',))):
|
||||
if not args:
|
||||
return (name, 'text')
|
||||
elif args[0] in ('text', 'after', 'before', 'first-letter'):
|
||||
return (name, args[0])
|
||||
elif prototype in (('counter', ('ident',)),
|
||||
('counters', ('ident', 'string'))):
|
||||
args += ('decimal',)
|
||||
elif prototype in (('counter', ['ident']),
|
||||
('counters', ['ident', 'string'])):
|
||||
args.append('decimal')
|
||||
return (name, args)
|
||||
elif prototype in (('counter', ['ident', 'ident']),
|
||||
('counters', ['ident', 'string', 'ident'])):
|
||||
style = args[-1]
|
||||
if style in ('none', 'decimal') or style in counters.STYLES:
|
||||
return (name, args)
|
||||
elif prototype in (('counter', ('ident', 'ident')),
|
||||
('counters', ('ident', 'string', 'ident'))):
|
||||
elif prototype in (('string', ['ident']),
|
||||
('string', ['ident', 'ident'])):
|
||||
if len(args) > 1:
|
||||
args[1] = args[1].lower()
|
||||
if args[1] not in ('first', 'start', 'last', 'first-except'):
|
||||
raise InvalidValues()
|
||||
return (name, args)
|
||||
# target-counter() = target-counter(
|
||||
# [ <string> | <url> ] , <custom-ident> ,
|
||||
# <counter-style>? )
|
||||
elif name == 'target-counter':
|
||||
if prototype in ((name, ['url', 'ident']),
|
||||
(name, ['url', 'ident', 'ident']),
|
||||
(name, ['string', 'ident']),
|
||||
(name, ['string', 'ident', 'ident']),
|
||||
(name, ['function', 'ident']),
|
||||
(name, ['function', 'ident', 'ident'])):
|
||||
# default style
|
||||
if len(args) == 2:
|
||||
args.append('decimal')
|
||||
# accept "#anchorname" and attr(x)
|
||||
retval = validate_target_token(args.pop(0))
|
||||
if retval is None:
|
||||
raise InvalidValues()
|
||||
style = args[-1]
|
||||
if style in ('none', 'decimal') or style in counters.STYLES:
|
||||
return (name, args)
|
||||
return (name, retval + args)
|
||||
# target-counters() = target-counters(
|
||||
# [ <string> | <url> ] , <custom-ident> , <string> ,
|
||||
# <counter-style>? )
|
||||
elif name == 'target-counters':
|
||||
if prototype in ((name, ['url', 'ident', 'string']),
|
||||
(name, ['url', 'ident', 'string', 'ident']),
|
||||
(name, ['string', 'ident', 'string']),
|
||||
(name, ['string', 'ident', 'string', 'ident']),
|
||||
(name, ['function', 'ident', 'string']),
|
||||
(name, ['function', 'ident', 'string', 'ident'])):
|
||||
# default style
|
||||
if len(args) == 3:
|
||||
args.append('decimal')
|
||||
# accept "#anchorname" and attr(x)
|
||||
retval = validate_target_token(args.pop(0))
|
||||
if retval is None:
|
||||
raise InvalidValues()
|
||||
style = args[-1]
|
||||
if style in ('none', 'decimal') or style in counters.STYLES:
|
||||
return (name, retval + args)
|
||||
# target-text() = target-text(
|
||||
# [ <string> | <url> ] ,
|
||||
# [ content | before | after | first-letter ]? )
|
||||
elif name == 'target-text':
|
||||
if prototype in ((name, ['url']),
|
||||
(name, ['url', 'ident']),
|
||||
(name, ['string']),
|
||||
(name, ['string', 'ident']),
|
||||
(name, ['function']),
|
||||
(name, ['function', 'ident'])):
|
||||
if len(args) == 1:
|
||||
args.append('content')
|
||||
# accept "#anchorname" and attr(x)
|
||||
retval = validate_target_token(args.pop(0))
|
||||
if retval is None:
|
||||
raise InvalidValues()
|
||||
style = args[-1]
|
||||
# hint: the syntax isn't stable yet!
|
||||
if style in ('content', 'after', 'before', 'first-letter'):
|
||||
# build.TEXT_CONTENT_EXTRACTORS needs 'text'
|
||||
# TODO: should we define
|
||||
# TEXT_CONTENT_EXTRACTORS['content'] == box_text ?
|
||||
if style == 'content':
|
||||
args[-1] = 'text'
|
||||
return (name, retval + args)
|
||||
|
||||
|
||||
@validator(unstable=True)
|
||||
|
@ -16,6 +16,7 @@ import cairocffi as cairo
|
||||
|
||||
from . import CSS
|
||||
from .css import get_all_computed_styles
|
||||
from .css.targets import TARGET_COLLECTOR
|
||||
from .draw import draw_page, stacked
|
||||
from .fonts import FontConfiguration
|
||||
from .formatting_structure import boxes
|
||||
@ -295,6 +296,15 @@ class Document(object):
|
||||
@classmethod
|
||||
def _render(cls, html, stylesheets, enable_hinting,
|
||||
presentational_hints=False, font_config=None):
|
||||
# new Document needs fresh Target-Collection
|
||||
# reset the TARGET_COLLECTOR before the Document's styles are parsed
|
||||
# TODO: call reset at the end of this function to cleanup?
|
||||
# - reset_target_collector Yes/No could be a useful option for users
|
||||
# who want to combine several documents...
|
||||
# - in the future each Document should create its own TargetCollector
|
||||
# and hand it down to formatting_structure / pages / maybe css
|
||||
TARGET_COLLECTOR.reset()
|
||||
|
||||
if font_config is None:
|
||||
font_config = FontConfiguration()
|
||||
page_rules = []
|
||||
|
@ -21,6 +21,8 @@ import tinycss2.color3
|
||||
from . import boxes, counters
|
||||
from .. import html
|
||||
from ..css import properties
|
||||
from ..css.targets import TARGET_COLLECTOR, TARGET_STATE
|
||||
from ..logger import LOGGER
|
||||
|
||||
# Maps values of the ``display`` CSS property to box types.
|
||||
BOX_TYPE_FROM_DISPLAY = {
|
||||
@ -46,6 +48,13 @@ BOX_TYPE_FROM_DISPLAY = {
|
||||
def build_formatting_structure(element_tree, style_for, get_image_from_uri,
|
||||
base_url):
|
||||
"""Build a formatting structure (box tree) from an element tree."""
|
||||
|
||||
LOGGER.info('Step 4.1 - Verifying collected targets')
|
||||
# BTW: this step is *not* required. Dont't think it speeds up things a lot
|
||||
# by tagging UNDEFINED targets in advance
|
||||
TARGET_COLLECTOR.verify_collection()
|
||||
LOGGER.info('Step 4.2 - Building basic boxes')
|
||||
|
||||
box_list = element_to_box(
|
||||
element_tree, style_for, get_image_from_uri, base_url)
|
||||
if box_list:
|
||||
@ -63,6 +72,10 @@ def build_formatting_structure(element_tree, style_for, get_image_from_uri,
|
||||
return style
|
||||
box, = element_to_box(
|
||||
element_tree, root_style_for, get_image_from_uri, base_url)
|
||||
|
||||
TARGET_COLLECTOR.check_peding_targets()
|
||||
# state now: no more pending targeds in pseudo-element's content boxes
|
||||
|
||||
box.is_for_root_element = True
|
||||
# If this is changed, maybe update weasy.layout.pages.make_margin_boxes()
|
||||
process_whitespace(box)
|
||||
@ -144,6 +157,13 @@ def element_to_box(element, style_for, get_image_from_uri, base_url,
|
||||
|
||||
children.extend(before_after_to_box(
|
||||
element, 'before', state, style_for, get_image_from_uri))
|
||||
|
||||
# collect anchor's counter_values, maybe it's a target.
|
||||
# to get the spec-conform counter_valuse we must do it here,
|
||||
# after the ::before is parsed and befor the ::after is
|
||||
if style['anchor']:
|
||||
TARGET_COLLECTOR.store_target(style['anchor'], counter_values, box)
|
||||
|
||||
text = element.text
|
||||
if text:
|
||||
children.append(boxes.TextBox.anonymous_from(box, text))
|
||||
@ -168,6 +188,7 @@ def element_to_box(element, style_for, get_image_from_uri, base_url,
|
||||
counter_values.pop(name)
|
||||
|
||||
box.children = children
|
||||
# calculate string-set and bookmark-label
|
||||
set_content_lists(element, box, style, counter_values)
|
||||
|
||||
# Specific handling for the element. (eg. replaced element)
|
||||
@ -195,6 +216,10 @@ def before_after_to_box(element, pseudo_type, state, style_for,
|
||||
|
||||
quote_depth, counter_values, _counter_scopes = state
|
||||
update_counters(state, style)
|
||||
|
||||
# pseudo-elements can't be anchors, no need to call
|
||||
# TARGET_COLLECTOR.store_target(...)
|
||||
|
||||
children = []
|
||||
if display == 'list-item':
|
||||
children.extend(add_box_marker(
|
||||
@ -202,25 +227,57 @@ def before_after_to_box(element, pseudo_type, state, style_for,
|
||||
children.extend(content_to_boxes(
|
||||
style, box, quote_depth, counter_values, get_image_from_uri))
|
||||
|
||||
# content_to_boxes detected an UNDEFINED target, discard the box
|
||||
if style['content'] == 'none':
|
||||
return
|
||||
|
||||
box.children = children
|
||||
yield box
|
||||
|
||||
|
||||
def content_to_boxes(style, parent_box, quote_depth, counter_values,
|
||||
get_image_from_uri, context=None, page=None):
|
||||
"""Takes the value of a ``content`` property and yield boxes."""
|
||||
def compute_content_list(return_a_string,
|
||||
content_list, parent_box, counter_values,
|
||||
parse_again_func,
|
||||
get_image_from_uri=None,
|
||||
quote_depth=None, quote_style=None,
|
||||
context=None, page=None):
|
||||
"""
|
||||
Compute and return the string or the boxes corresponding
|
||||
to the content_list.
|
||||
|
||||
:param return_a_string:
|
||||
True for string-set-string and bookmark-label,
|
||||
otherwise (content) a list of anonymous InlineBox(es) is returned
|
||||
:param parse_again_func:
|
||||
fnction to compute the content_list again
|
||||
when TARGET_COLLECTOR.lookup_target() detected a TARGET_STATE.PENDING
|
||||
|
||||
build_formatting_structure calls
|
||||
TARGET_COLLECTOR.check_pending_targets
|
||||
after the first pass to do required reparsing
|
||||
"""
|
||||
boxlist = []
|
||||
texts = []
|
||||
for type_, value in style['content']:
|
||||
for type_, value in content_list:
|
||||
if type_ == 'STRING':
|
||||
texts.append(value)
|
||||
elif type_ == 'URI':
|
||||
elif type_ == 'URI' and not return_a_string and \
|
||||
get_image_from_uri is not None:
|
||||
image = get_image_from_uri(value)
|
||||
if image is not None:
|
||||
text = ''.join(texts)
|
||||
if text:
|
||||
yield boxes.TextBox.anonymous_from(parent_box, text)
|
||||
boxlist.append(
|
||||
boxes.TextBox.anonymous_from(parent_box, text))
|
||||
texts = []
|
||||
yield boxes.InlineReplacedBox.anonymous_from(parent_box, image)
|
||||
boxlist.append(
|
||||
boxes.InlineReplacedBox.anonymous_from(parent_box, image))
|
||||
elif type_ == 'content' and return_a_string:
|
||||
added_text = TEXT_CONTENT_EXTRACTORS[value](parent_box)
|
||||
# Simulate the step of white space processing
|
||||
# (normally done during the layout)
|
||||
added_text = added_text.strip()
|
||||
texts.append(added_text)
|
||||
elif type_ == 'counter':
|
||||
counter_name, counter_style = value
|
||||
counter_value = counter_values.get(counter_name, [0])[-1]
|
||||
@ -232,49 +289,133 @@ def content_to_boxes(style, parent_box, quote_depth, counter_values,
|
||||
for counter_value in counter_values.get(counter_name, [0])
|
||||
))
|
||||
elif type_ == 'string' and context is not None and page is not None:
|
||||
# string() is only valid in @page context
|
||||
text = context.get_string_set_for(page, *value)
|
||||
texts.append(text)
|
||||
else:
|
||||
assert type_ == 'QUOTE'
|
||||
elif type_ == 'target-counter':
|
||||
target_name, counter_name, counter_style = value
|
||||
lookup_target = TARGET_COLLECTOR.lookup_target(
|
||||
target_name, parent_box, parse_again_func)
|
||||
if lookup_target.state == TARGET_STATE.UPTODATE:
|
||||
counter_value = lookup_target.target_counter_values.get(
|
||||
counter_name, [0])[-1]
|
||||
texts.append(counters.format(counter_value, counter_style))
|
||||
else:
|
||||
texts = []
|
||||
break
|
||||
elif type_ == 'target-counters':
|
||||
target_name, counter_name, separator, counter_style = value
|
||||
lookup_target = TARGET_COLLECTOR.lookup_target(
|
||||
target_name, parent_box, parse_again_func)
|
||||
if lookup_target.state == TARGET_STATE.UPTODATE:
|
||||
target_counter_values = lookup_target.target_counter_values
|
||||
texts.append(separator.join(
|
||||
counters.format(counter_value, counter_style)
|
||||
for counter_value in target_counter_values.get(
|
||||
counter_name, [0])
|
||||
))
|
||||
else:
|
||||
texts = []
|
||||
break
|
||||
elif type_ == 'target-text':
|
||||
target_name, text_style = value
|
||||
lookup_target = TARGET_COLLECTOR.lookup_target(
|
||||
target_name, parent_box, parse_again_func)
|
||||
if lookup_target.state == TARGET_STATE.UPTODATE:
|
||||
target_box = lookup_target.target_box
|
||||
text = TEXT_CONTENT_EXTRACTORS[text_style](target_box)
|
||||
# Simulate the step of white space processing
|
||||
# (normally done during the layout)
|
||||
texts.append(text.strip())
|
||||
else:
|
||||
texts = []
|
||||
break
|
||||
elif type_ == 'QUOTE' and not return_a_string and \
|
||||
quote_depth is not None and quote_style is not None:
|
||||
is_open, insert = value
|
||||
if not is_open:
|
||||
quote_depth[0] = max(0, quote_depth[0] - 1)
|
||||
if insert:
|
||||
open_quotes, close_quotes = style['quotes']
|
||||
open_quotes, close_quotes = quote_style
|
||||
quotes = open_quotes if is_open else close_quotes
|
||||
texts.append(quotes[min(quote_depth[0], len(quotes) - 1)])
|
||||
if is_open:
|
||||
quote_depth[0] += 1
|
||||
else:
|
||||
# TODO: in previous versions an AssertionError was raised!
|
||||
pass
|
||||
text = ''.join(texts)
|
||||
if return_a_string:
|
||||
return text
|
||||
if text:
|
||||
yield boxes.TextBox.anonymous_from(parent_box, text)
|
||||
boxlist.append(boxes.TextBox.anonymous_from(parent_box, text))
|
||||
return boxlist
|
||||
|
||||
|
||||
def compute_content_list_string(element, box, counter_values, content_list):
|
||||
"""Compute the string corresponding to the content-list."""
|
||||
string = ''
|
||||
for type_, value in content_list:
|
||||
if type_ == 'STRING':
|
||||
string += value
|
||||
elif type_ == 'content':
|
||||
added_text = TEXT_CONTENT_EXTRACTORS[value](box)
|
||||
# Simulate the step of white space processing
|
||||
# (normally done during the layout)
|
||||
added_text = added_text.strip()
|
||||
string += added_text
|
||||
elif type_ == 'counter':
|
||||
counter_name, counter_style = value
|
||||
counter_value = counter_values.get(counter_name, [0])[-1]
|
||||
string += counters.format(counter_value, counter_style)
|
||||
elif type_ == 'counters':
|
||||
counter_name, separator, counter_style = value
|
||||
string += separator.join(
|
||||
counters.format(counter_value, counter_style)
|
||||
for counter_value
|
||||
in counter_values.get(counter_name, [0]))
|
||||
elif type_ == 'attr':
|
||||
string += element.get(value, '')
|
||||
return string
|
||||
def content_to_boxes(style, parent_box, quote_depth, counter_values,
|
||||
get_image_from_uri, context=None, page=None):
|
||||
"""Takes the value of a ``content`` property and returns boxes."""
|
||||
def parse_again():
|
||||
"""
|
||||
closure to parse the parent_boxes children all again
|
||||
when TARGET_COLLECTOR.lookup_target() detected a TARGET_STATE.PENDING,
|
||||
Thx to closure no need to explicitly copy.deepcopy the whole stuff,
|
||||
"""
|
||||
local_children = []
|
||||
if style['display'] == 'list-item':
|
||||
local_children.extend(add_box_marker(
|
||||
parent_box, counter_values, get_image_from_uri))
|
||||
local_children.extend(content_to_boxes(
|
||||
style, parent_box,
|
||||
quote_depth, counter_values,
|
||||
get_image_from_uri))
|
||||
parent_box.children = local_children
|
||||
|
||||
# Can't use `yield`! Must `return` the boxes otherwise set_content_lists,
|
||||
# calling compute_content_list for `contents()`, will fail
|
||||
return compute_content_list(
|
||||
False,
|
||||
style['content'],
|
||||
parent_box, counter_values,
|
||||
parse_again,
|
||||
get_image_from_uri, quote_depth, style['quotes'],
|
||||
context, page)
|
||||
|
||||
|
||||
def compute_string_set_string(box, string_name, content_list, counter_values):
|
||||
"""For ``string-set`` property:
|
||||
Parses the content-list value of the string named `string_name`
|
||||
and append the resulting string to the boxes string_set
|
||||
"""
|
||||
def parse_again():
|
||||
"""
|
||||
closure to parse the string-set-string value all again
|
||||
when TARGET_COLLECTOR.lookup_target() detected a TARGET_STATE.PENDING
|
||||
"""
|
||||
compute_string_set_string(
|
||||
box, string_name, content_list, counter_values)
|
||||
|
||||
s = compute_content_list(
|
||||
True,
|
||||
content_list, box,
|
||||
counter_values,
|
||||
parse_again)
|
||||
if s:
|
||||
box.string_set.append((string_name, s))
|
||||
|
||||
|
||||
def compute_bookmark_label(box, content_list, counter_values):
|
||||
"""For ``bookmark-label`` property:
|
||||
Parses the content-list value and put it in the boxes .bookmark_label
|
||||
"""
|
||||
def parse_again():
|
||||
compute_bookmark_label(
|
||||
box, content_list, counter_values)
|
||||
|
||||
box.bookmark_label = compute_content_list(
|
||||
True,
|
||||
content_list, box, counter_values,
|
||||
parse_again)
|
||||
|
||||
|
||||
def set_content_lists(element, box, style, counter_values):
|
||||
@ -282,20 +423,17 @@ def set_content_lists(element, box, style, counter_values):
|
||||
|
||||
These content-lists are used in GCPM properties like ``string-set`` and
|
||||
``bookmark-label``.
|
||||
|
||||
"""
|
||||
string_set = []
|
||||
box.string_set = []
|
||||
if style['string_set'] != 'none':
|
||||
for i, (string_name, string_values) in enumerate(style['string_set']):
|
||||
string_set.append((string_name, compute_content_list_string(
|
||||
element, box, counter_values, string_values)))
|
||||
box.string_set = string_set
|
||||
|
||||
compute_string_set_string(
|
||||
box, string_name, string_values, counter_values)
|
||||
if style['bookmark_label'] == 'none':
|
||||
box.bookmark_label = ''
|
||||
else:
|
||||
box.bookmark_label = compute_content_list_string(
|
||||
element, box, counter_values, style['bookmark_label'])
|
||||
compute_bookmark_label(
|
||||
box, style['bookmark_label'], counter_values)
|
||||
|
||||
|
||||
def update_counters(state, style):
|
||||
|
@ -638,24 +638,24 @@ def test_string_set():
|
||||
assert expand_to_dict('string-set: test attr(class)') == {
|
||||
'string_set': (('test', (('attr', 'class'),)),)}
|
||||
assert expand_to_dict('string-set: test counter(count)') == {
|
||||
'string_set': (('test', (('counter', ('count', 'decimal')),)),)}
|
||||
'string_set': (('test', (('counter', ['count', 'decimal']),)),)}
|
||||
assert expand_to_dict(
|
||||
'string-set: test counter(count, upper-roman)') == {
|
||||
'string_set': (
|
||||
('test', (('counter', ('count', 'upper-roman')),)),)}
|
||||
('test', (('counter', ['count', 'upper-roman']),)),)}
|
||||
assert expand_to_dict('string-set: test counters(count, ".")') == {
|
||||
'string_set': (('test', (('counters', ('count', '.', 'decimal')),)),)}
|
||||
'string_set': (('test', (('counters', ['count', '.', 'decimal']),)),)}
|
||||
assert expand_to_dict(
|
||||
'string-set: test counters(count, ".", upper-roman)') == {
|
||||
'string_set': (
|
||||
('test', (('counters', ('count', '.', 'upper-roman')),)),)}
|
||||
('test', (('counters', ['count', '.', 'upper-roman']),)),)}
|
||||
assert expand_to_dict(
|
||||
'string-set: test content(text) "string" '
|
||||
'attr(title) attr(title) counter(count)') == {
|
||||
'string_set': (('test', (
|
||||
('content', 'text'), ('STRING', 'string'),
|
||||
('attr', 'title'), ('attr', 'title'),
|
||||
('counter', ('count', 'decimal')),)),)}
|
||||
('counter', ['count', 'decimal']))),)}
|
||||
|
||||
assert_invalid('string-set: test')
|
||||
assert_invalid('string-set: test test1')
|
||||
|
Loading…
Reference in New Issue
Block a user