1
1
mirror of https://github.com/Kozea/WeasyPrint.git synced 2024-10-05 08:27:22 +03:00

Merge branch 'target-counter' of https://github.com/Tontyna/WeasyPrint into target-collector

This commit is contained in:
Guillaume Ayoub 2018-03-25 22:03:19 +02:00
commit 1ebd36e83a
6 changed files with 685 additions and 111 deletions

View File

@ -9,10 +9,13 @@
:license: BSD, see LICENSE for details.
"""
from urllib.parse import unquote
from .. import text
from ..logger import LOGGER
from ..urls import get_link_attribute
from .properties import INITIAL_VALUES, Dimension
from .targets import TARGET_COLLECTOR
ZERO_PIXELS = Dimension(0, 'px')
@ -399,16 +402,131 @@ def column_gap(computer, name, value):
return length(computer, name, value, pixels_only=True)
@register_computer('content')
def content(computer, name, values):
"""Compute the ``content`` property."""
def _toSelector(el, pseudo_type):
"""convenience function"""
elname = type(el).__name__
if elname == 'PageType':
return ('@page%s %s%s%s %s ' % (
' ' + el.name if el.name else '',
':' + el.side if el.side else '',
':blank' if el.blank else '',
':first' if el.first else '',
pseudo_type if pseudo_type else ''
)).rstrip()
elif elname == 'Element':
return '%s%s' % (
el.tag,
'::' + pseudo_type if pseudo_type else ''
)
else:
return '<%s>' % (
('%s %s' % (elname, pseudo_type)).rstrip())
@register_computer('string-set')
def string_set(computer, name, values):
"""Compute the <content-lists> of the ``string-set`` property."""
# never happens, but...prudence is the better part of valor
if values in ('normal', 'none'):
return values
if type(computer.element).__name__ != 'Element' or computer.pseudo_type:
LOGGER.debug(
'property `%s` discarded: %s in selector `%s`.',
name,
'Not a real element',
_toSelector(computer.element, computer.pseudo_type))
return 'none'
return tuple(
(string_name, content(computer, name, string_values))
for i, (string_name, string_values) in enumerate(values))
@register_computer('bookmark-label')
@register_computer('content')
def content(computer, name, values):
"""Compute the <content-list>s of ``content``,
``bookmark-label`` and ``string-set`` property."""
class ComputedContentError(ValueError):
"""Invalid or unsupported values for a known CSS property."""
def computed_content_error(level, reason):
getattr(LOGGER, level)(
'property `%s` discarded: %s in selector `%s`.',
name,
reason,
_toSelector(computer.element, computer.pseudo_type)
)
def parse_target_type(type_, values):
if type(computer.element).__name__ != 'Element':
raise ComputedContentError('\'%s\' not (yet) supported' % (type_,))
# values = ['STRING', <anchorname>, ...]
# or ['attr', <attrname>, ... ]
if values[0] == 'attr':
attrname = values[1]
href = computer.element.get(attrname, '')
else:
href = values[1]
# [spec](https://www.w3.org/TR/css-content-3/#target-counter)
# says:
# > If theres no fragment, if the ID referenced isnt there,
# > or if the URL points to an outside document,
# > the user agent must treat that as an error.
if href == '' or href == '#':
raise ComputedContentError('Empty anchor name in %s' % (type_,))
if not href.startswith('#'):
raise ComputedContentError(
'No %s for external URI reference "%s"' % (type_, href))
href = unquote(href[1:])
TARGET_COLLECTOR.collect_computed_target(href)
return [href] + values[2:]
if values in ('normal', 'none'):
return values
if name == 'content':
# [CSS3 spec](https://www.w3.org/TR/css-content-3/#content-property)
# says:
# > 'content' applies to:
# > ::before, ::after, ::marker, and page margin boxes.
# > Image and url values can apply to all elements.
if not computer.pseudo_type:
computed_content_error(
'debug',
'Not a pseudo-element')
return 'none'
else:
# ignore string-set, bookmark-label unless in a *real* element
if type(computer.element).__name__ != 'Element' \
or computer.pseudo_type:
computed_content_error(
'debug',
'Not a real element')
return 'none'
target_checks = ['target-counter', 'target-counters', 'target-text']
try:
# TODO: catch `string()` when not in @page-margin
return tuple(
('STRING', computer.element.get(value, ''))
if type_ == 'attr' else (type_, value)
if type_ == 'attr' else (
(type_, parse_target_type(type_, value))
if type_ in target_checks else (type_, value)
)
for type_, value in values)
except ComputedContentError as exc:
computed_content_error(
'warning',
exc.args[0] if exc.args and exc.args[0] else 'invalid content')
return 'none'
except AttributeError as exc:
# attr() in @page-'element'
# e.g.: 'PageType' object has no attribute 'get'
computed_content_error(
'warning',
exc.args[0] if exc.args and exc.args[0] else 'invalid content')
return 'none'
@register_computer('display')
@ -495,7 +613,9 @@ def anchor(computer, name, values):
"""Compute the ``anchor`` property."""
if values != 'none':
_, key = values
return computer.element.get(key) or None
anchor_name = computer.element.get(key) or None
TARGET_COLLECTOR.collect_anchor(anchor_name)
return anchor_name
@register_computer('link')

170
weasyprint/css/targets.py Normal file
View File

@ -0,0 +1,170 @@
"""
weasyprint.formatting_structure.targets
-------------------------------------
An attempt to implement target-counter, target-counters and target-text
The TARGET_COLLECTOR is a structure providing required targets'
counter_values and stuff needed to build PENDING targets later,
when all targetted anchors have been 'layouted'
:copyright: Copyright 2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
import copy # deepcopy needed!
from ..logger import LOGGER
# not shure what's the Python way to create consts, maybe a namedtuple?
# thx [Jon Betts](https://stackoverflow.com/a/23274028)
class _STATE(object):
"""constants for target states"""
PENDING = 0
UPTODATE = 1
UNDEFINED = 2
__stateToName = {
PENDING: 'PENDING',
UPTODATE: 'UPTODATE',
UNDEFINED: 'UNDEFINED',
}
def __setattr__(self, *_):
"""prohibit changes"""
pass
def name(self, state):
""" return human readable state-name"""
return self.__stateToName.get(state, 'Invalid state')
TARGET_STATE = _STATE()
class TargetLookupItem(object):
"""item collected by the TargetColector"""
def __init__(self, state=TARGET_STATE.PENDING):
self.state = state
# required by target-counter and target-counters
self.target_counter_values = {}
# neede for target-text via TEXT_CONTENT_EXTRACTORS
self.target_box = None
# stuff for PENDING targets
self.pending_boxes = {}
class _TargetCollector(object):
"""collect and provide stuff for css content with `target-*`"""
def __init__(self):
self.reset()
def reset(self):
self.had_peding_targets = False
self.existing_anchors = []
self.items = {}
def _addtarget(self, anchor_name):
return self.items.setdefault(anchor_name, TargetLookupItem())
def collect_anchor(self, anchor_name):
"""
stores `anchor_name` in `existing_anchors`
should be called by computed_values.anchor()
"""
if anchor_name and isinstance(anchor_name, str):
if anchor_name in self.existing_anchors:
LOGGER.warning(' ! anchor redefined: %s', anchor_name)
else:
self.existing_anchors.append(anchor_name)
LOGGER.debug(' + anchor added: "%s" ', anchor_name)
def collect_computed_target(self, anchor_name):
"""
stores a `computed` target's (internal!) anchor name,
verified by computed_values.content()
anchor_name without '#' and already unquoted
"""
if anchor_name and isinstance(anchor_name, str):
self._addtarget(anchor_name)
def verify_collection(self):
"""obsolete function, only needed for testing"""
LOGGER.debug('------- collected targets -------------')
for key, item in self.items.items():
# mark target names not in existing_anchors as UNDEFINED
if key not in self.existing_anchors:
item.state = TARGET_STATE.UNDEFINED
LOGGER.debug('%s %s', key, TARGET_STATE.name(item.state))
LOGGER.debug('------- existing anchors -------------')
LOGGER.debug(self.existing_anchors)
def lookup_target(self, anchor_name, source_box, parse_again_function):
""" called in content_to_boxes() when the source_box needs a target-*
returns a TargetLookupItem
if already filled by a previous anchor-element: UPDTODATE
else: PENDING, we must parse the whole thing again
"""
item = self.items.get(
anchor_name,
TargetLookupItem(TARGET_STATE.UNDEFINED))
LOGGER.debug(
'lookup_target %s %s', anchor_name, TARGET_STATE.name(item.state))
if item.state == TARGET_STATE.PENDING:
if anchor_name not in self.existing_anchors:
item.state = TARGET_STATE.UNDEFINED
else:
self.had_peding_targets = True
LOGGER.debug(' -> still pending. Keep infos.')
item.pending_boxes.setdefault(source_box, parse_again_function)
if item.state == TARGET_STATE.UNDEFINED:
LOGGER.error(
'content discarded: target points to undefined anchor "%s"',
anchor_name)
# feedback to invoker: discard the parent_box
# at the moment it's `build.before_after_to_box()` which cares
source_box.style['content'] = 'none'
return item
def store_target(self, anchor_name, target_counter_values, target_box):
"""
called by every anchor-element in build.element_to_box
if there is a PENDING TargetLookupItem, it is updated
only previously collected anchor_names are stored
"""
item = self.items.get(anchor_name, None)
if item:
LOGGER.debug(
'store_target? %s %s', anchor_name,
TARGET_STATE.name(item.state))
if item.state == TARGET_STATE.PENDING:
LOGGER.debug(' -> update: %s', target_counter_values)
# need A REAL DUPLICATE UNCONNECTED SEPARATE COPY!!
item.state = TARGET_STATE.UPTODATE
item.target_counter_values = copy.deepcopy(
target_counter_values)
item.target_box = target_box
else:
LOGGER.debug(
' -> duplicate anchor definition: %s' % anchor_name)
else:
LOGGER.debug(' -> achor %s not targetted' % anchor_name)
def check_peding_targets(self):
if not self.had_peding_targets:
return
LOGGER.info('Step 4.3 Reparsing pending targets')
self.had_peding_targets = False
for key, item in self.items.items():
# create the pending content boxes NOW
# UNDEFINED items never hava a `parse_again` function
for abox, func in item.pending_boxes.items():
func()
TARGET_COLLECTOR = _TargetCollector()

View File

@ -733,7 +733,10 @@ def clip(token):
@validator(wants_base_url=True)
def content(tokens, base_url):
"""``content`` property validation."""
"""``content`` property validation.
TODO: should become a @comma_separated_list to validate
CSS3 <content-replacement>
"""
keyword = get_single_keyword(tokens)
if keyword in ('normal', 'none'):
return keyword
@ -748,38 +751,8 @@ def validate_content_token(base_url, token):
Return (type, content) or False for invalid tokens.
"""
quote_type = CONTENT_QUOTE_KEYWORDS.get(get_keyword(token))
if quote_type is not None:
return ('QUOTE', quote_type)
type_ = token.type
if type_ == 'string':
return ('STRING', token.value)
if type_ == 'url':
return ('URI', safe_urljoin(base_url, token.value))
function = parse_function(token)
if function:
name, args = function
prototype = (name, [a.type for a in args])
args = [getattr(a, 'value', a) for a in args]
if prototype == ('attr', ['ident']):
return (name, args[0])
elif prototype in (('counter', ['ident']),
('counters', ['ident', 'string'])):
args.append('decimal')
return (name, args)
elif prototype in (('counter', ['ident', 'ident']),
('counters', ['ident', 'string', 'ident'])):
style = args[-1]
if style in ('none', 'decimal') or style in counters.STYLES:
return (name, args)
elif prototype in (('string', ['ident']),
('string', ['ident', 'ident'])):
if len(args) > 1:
args[1] = args[1].lower()
if args[1] not in ('first', 'start', 'last', 'first-except'):
raise InvalidValues()
return (name, args)
return validate_content_list_token(
base_url, token, for_content_box=True)
def parse_function(function_token):
@ -1614,10 +1587,11 @@ def lang(token):
return ('string', token.value)
@validator(unstable=True)
def bookmark_label(tokens):
@validator(unstable=True, wants_base_url=True)
def bookmark_label(tokens, base_url):
"""Validation for ``bookmark-label``."""
parsed_tokens = tuple(validate_content_list_token(v) for v in tokens)
parsed_tokens = tuple(validate_content_list_token(
base_url, v, for_content_box=False) for v in tokens)
if None not in parsed_tokens:
return parsed_tokens
@ -1634,50 +1608,212 @@ def bookmark_level(token):
return 'none'
@validator(unstable=True)
@validator(unstable=True, wants_base_url=True)
@comma_separated_list
def string_set(tokens):
def string_set(tokens, base_url):
"""Validation for ``string-set``."""
if len(tokens) >= 2:
var_name = get_keyword(tokens[0])
parsed_tokens = tuple(
validate_content_list_token(v) for v in tokens[1:])
validate_content_list_token(
base_url, v, for_content_box=False) for v in tokens[1:])
if None not in parsed_tokens:
return (var_name, parsed_tokens)
elif tokens and tokens[0].value == 'none':
return 'none'
def validate_content_list_token(token):
def validate_content_list_token(base_url, token, for_content_box):
"""Validation for a single token of <content-list> used in GCPM.
Not really.
GCPM <content-list> =
[ <string> | contents | <image> | <quote> | <target> | <leader()> ]+
(Draft, 24 January 2018. Really a DRAFT. Not an RFC. Not a SPEC.
BTW: The current Draft GCPM ``string-set`` value =
none | [ <custom-ident> <string>+ ]#
So. This is the validation for tokens that make sense in
css properties ``string-set``, ``bookmark-label`` and ``content``:
<modified-content-list> = [
<string> | attr() | <counter> | <target> |
<content> |
url() | <quote> | string() | leader()
]+
:param for_content_box: controls which tokens are valid
Valid tokens when ``for_content_box`` ==
- True (called from/for css property 'content':
<string> | attr() | <counter> | <target> |
url() | <quote> | string() | leader()
The final decision whether a token is valid is the job of
computed_values.content()
- False (called from/for css properties 'string-set', 'bookmark-label':
<string> | attr() | <counter> | <target> |
<content>
Return (type, content) or False for invalid tokens.
"""
def validate_target_token(token):
""" validate first parameter of ``target-*()``-token
returns ['attr', '<attrname>' ]
or ['STRING', '<anchorname>'] when valid
evaluation of the anchorname is job of compute()
"""
# TODO: what about ``attr(href url)`` ?
if isinstance(token, str):
# url() or "string" given
# verify #anchor is done in compute()
# if token.value.startswith('#'):
return ['STRING', token]
# parse_function takes token.type for granted!
if not hasattr(token, 'type'):
return
function = parse_function(token)
if function:
name, args = function
params = [a.type for a in args]
values = [getattr(a, 'value', a) for a in args]
if name == 'attr' and params == ['ident']:
return [name, values[0]]
if for_content_box:
quote_type = CONTENT_QUOTE_KEYWORDS.get(get_keyword(token))
if quote_type is not None:
return ('QUOTE', quote_type)
else:
if get_keyword(token) == 'contents':
return ('content', 'text')
type_ = token.type
if type_ == 'string':
return ('STRING', token.value)
if for_content_box:
if type_ == 'url':
return ('URI', safe_urljoin(base_url, token.value))
function = parse_function(token)
if function:
name, args = function
prototype = (name, tuple(a.type for a in args))
args = tuple(getattr(a, 'value', a) for a in args)
if prototype == ('attr', ('ident',)):
if not function:
# to pass unit test `test_boxes.test_before_after`
# the log string must contain "invalid value"
raise InvalidValues('invalid value/unsupported token ´%s\´' % (token,))
name, args = function
# known functions in 'content', 'string-set' and 'bookmark-label':
valid_functions = ['attr',
'counter', 'counters',
'target-counter', 'target-counters', 'target-text']
# 'content'
if for_content_box:
valid_functions += ['string',
'leader']
else:
valid_functions += ['content']
unsupported_functions = ['leader']
if name not in valid_functions:
# to pass unit test `test_boxes.test_before_after`
# the log string must contain "invalid value"
raise InvalidValues('invalid value: function `%s()`' % (name))
if name in unsupported_functions:
# suppress -- not (yet) implemented, no error
LOGGER.warn('\'%s()\' not (yet) supported', name)
return ('STRING', '')
prototype = (name, [a.type for a in args])
args = [getattr(a, 'value', a) for a in args]
if prototype == ('attr', ['ident']):
# TODO: what about ``attr(href url)`` ?
return (name, args[0])
elif prototype in (('content', []), ('content', ['ident', ])):
if not args:
return (name, 'text')
elif args[0] in ('text', 'after', 'before', 'first-letter'):
return (name, args[0])
elif prototype in (('content', ()), ('content', ('ident',))):
if not args:
return (name, 'text')
elif args[0] in ('text', 'after', 'before', 'first-letter'):
return (name, args[0])
elif prototype in (('counter', ('ident',)),
('counters', ('ident', 'string'))):
args += ('decimal',)
elif prototype in (('counter', ['ident']),
('counters', ['ident', 'string'])):
args.append('decimal')
return (name, args)
elif prototype in (('counter', ['ident', 'ident']),
('counters', ['ident', 'string', 'ident'])):
style = args[-1]
if style in ('none', 'decimal') or style in counters.STYLES:
return (name, args)
elif prototype in (('counter', ('ident', 'ident')),
('counters', ('ident', 'string', 'ident'))):
elif prototype in (('string', ['ident']),
('string', ['ident', 'ident'])):
if len(args) > 1:
args[1] = args[1].lower()
if args[1] not in ('first', 'start', 'last', 'first-except'):
raise InvalidValues()
return (name, args)
# target-counter() = target-counter(
# [ <string> | <url> ] , <custom-ident> ,
# <counter-style>? )
elif name == 'target-counter':
if prototype in ((name, ['url', 'ident']),
(name, ['url', 'ident', 'ident']),
(name, ['string', 'ident']),
(name, ['string', 'ident', 'ident']),
(name, ['function', 'ident']),
(name, ['function', 'ident', 'ident'])):
# default style
if len(args) == 2:
args.append('decimal')
# accept "#anchorname" and attr(x)
retval = validate_target_token(args.pop(0))
if retval is None:
raise InvalidValues()
style = args[-1]
if style in ('none', 'decimal') or style in counters.STYLES:
return (name, args)
return (name, retval + args)
# target-counters() = target-counters(
# [ <string> | <url> ] , <custom-ident> , <string> ,
# <counter-style>? )
elif name == 'target-counters':
if prototype in ((name, ['url', 'ident', 'string']),
(name, ['url', 'ident', 'string', 'ident']),
(name, ['string', 'ident', 'string']),
(name, ['string', 'ident', 'string', 'ident']),
(name, ['function', 'ident', 'string']),
(name, ['function', 'ident', 'string', 'ident'])):
# default style
if len(args) == 3:
args.append('decimal')
# accept "#anchorname" and attr(x)
retval = validate_target_token(args.pop(0))
if retval is None:
raise InvalidValues()
style = args[-1]
if style in ('none', 'decimal') or style in counters.STYLES:
return (name, retval + args)
# target-text() = target-text(
# [ <string> | <url> ] ,
# [ content | before | after | first-letter ]? )
elif name == 'target-text':
if prototype in ((name, ['url']),
(name, ['url', 'ident']),
(name, ['string']),
(name, ['string', 'ident']),
(name, ['function']),
(name, ['function', 'ident'])):
if len(args) == 1:
args.append('content')
# accept "#anchorname" and attr(x)
retval = validate_target_token(args.pop(0))
if retval is None:
raise InvalidValues()
style = args[-1]
# hint: the syntax isn't stable yet!
if style in ('content', 'after', 'before', 'first-letter'):
# build.TEXT_CONTENT_EXTRACTORS needs 'text'
# TODO: should we define
# TEXT_CONTENT_EXTRACTORS['content'] == box_text ?
if style == 'content':
args[-1] = 'text'
return (name, retval + args)
@validator(unstable=True)

View File

@ -16,6 +16,7 @@ import cairocffi as cairo
from . import CSS
from .css import get_all_computed_styles
from .css.targets import TARGET_COLLECTOR
from .draw import draw_page, stacked
from .fonts import FontConfiguration
from .formatting_structure import boxes
@ -295,6 +296,15 @@ class Document(object):
@classmethod
def _render(cls, html, stylesheets, enable_hinting,
presentational_hints=False, font_config=None):
# new Document needs fresh Target-Collection
# reset the TARGET_COLLECTOR before the Document's styles are parsed
# TODO: call reset at the end of this function to cleanup?
# - reset_target_collector Yes/No could be a useful option for users
# who want to combine several documents...
# - in the future each Document should create its own TargetCollector
# and hand it down to formatting_structure / pages / maybe css
TARGET_COLLECTOR.reset()
if font_config is None:
font_config = FontConfiguration()
page_rules = []

View File

@ -21,6 +21,8 @@ import tinycss2.color3
from . import boxes, counters
from .. import html
from ..css import properties
from ..css.targets import TARGET_COLLECTOR, TARGET_STATE
from ..logger import LOGGER
# Maps values of the ``display`` CSS property to box types.
BOX_TYPE_FROM_DISPLAY = {
@ -46,6 +48,13 @@ BOX_TYPE_FROM_DISPLAY = {
def build_formatting_structure(element_tree, style_for, get_image_from_uri,
base_url):
"""Build a formatting structure (box tree) from an element tree."""
LOGGER.info('Step 4.1 - Verifying collected targets')
# BTW: this step is *not* required. Dont't think it speeds up things a lot
# by tagging UNDEFINED targets in advance
TARGET_COLLECTOR.verify_collection()
LOGGER.info('Step 4.2 - Building basic boxes')
box_list = element_to_box(
element_tree, style_for, get_image_from_uri, base_url)
if box_list:
@ -63,6 +72,10 @@ def build_formatting_structure(element_tree, style_for, get_image_from_uri,
return style
box, = element_to_box(
element_tree, root_style_for, get_image_from_uri, base_url)
TARGET_COLLECTOR.check_peding_targets()
# state now: no more pending targeds in pseudo-element's content boxes
box.is_for_root_element = True
# If this is changed, maybe update weasy.layout.pages.make_margin_boxes()
process_whitespace(box)
@ -144,6 +157,13 @@ def element_to_box(element, style_for, get_image_from_uri, base_url,
children.extend(before_after_to_box(
element, 'before', state, style_for, get_image_from_uri))
# collect anchor's counter_values, maybe it's a target.
# to get the spec-conform counter_valuse we must do it here,
# after the ::before is parsed and befor the ::after is
if style['anchor']:
TARGET_COLLECTOR.store_target(style['anchor'], counter_values, box)
text = element.text
if text:
children.append(boxes.TextBox.anonymous_from(box, text))
@ -168,6 +188,7 @@ def element_to_box(element, style_for, get_image_from_uri, base_url,
counter_values.pop(name)
box.children = children
# calculate string-set and bookmark-label
set_content_lists(element, box, style, counter_values)
# Specific handling for the element. (eg. replaced element)
@ -195,6 +216,10 @@ def before_after_to_box(element, pseudo_type, state, style_for,
quote_depth, counter_values, _counter_scopes = state
update_counters(state, style)
# pseudo-elements can't be anchors, no need to call
# TARGET_COLLECTOR.store_target(...)
children = []
if display == 'list-item':
children.extend(add_box_marker(
@ -202,25 +227,57 @@ def before_after_to_box(element, pseudo_type, state, style_for,
children.extend(content_to_boxes(
style, box, quote_depth, counter_values, get_image_from_uri))
# content_to_boxes detected an UNDEFINED target, discard the box
if style['content'] == 'none':
return
box.children = children
yield box
def content_to_boxes(style, parent_box, quote_depth, counter_values,
get_image_from_uri, context=None, page=None):
"""Takes the value of a ``content`` property and yield boxes."""
def compute_content_list(return_a_string,
content_list, parent_box, counter_values,
parse_again_func,
get_image_from_uri=None,
quote_depth=None, quote_style=None,
context=None, page=None):
"""
Compute and return the string or the boxes corresponding
to the content_list.
:param return_a_string:
True for string-set-string and bookmark-label,
otherwise (content) a list of anonymous InlineBox(es) is returned
:param parse_again_func:
fnction to compute the content_list again
when TARGET_COLLECTOR.lookup_target() detected a TARGET_STATE.PENDING
build_formatting_structure calls
TARGET_COLLECTOR.check_pending_targets
after the first pass to do required reparsing
"""
boxlist = []
texts = []
for type_, value in style['content']:
for type_, value in content_list:
if type_ == 'STRING':
texts.append(value)
elif type_ == 'URI':
elif type_ == 'URI' and not return_a_string and \
get_image_from_uri is not None:
image = get_image_from_uri(value)
if image is not None:
text = ''.join(texts)
if text:
yield boxes.TextBox.anonymous_from(parent_box, text)
boxlist.append(
boxes.TextBox.anonymous_from(parent_box, text))
texts = []
yield boxes.InlineReplacedBox.anonymous_from(parent_box, image)
boxlist.append(
boxes.InlineReplacedBox.anonymous_from(parent_box, image))
elif type_ == 'content' and return_a_string:
added_text = TEXT_CONTENT_EXTRACTORS[value](parent_box)
# Simulate the step of white space processing
# (normally done during the layout)
added_text = added_text.strip()
texts.append(added_text)
elif type_ == 'counter':
counter_name, counter_style = value
counter_value = counter_values.get(counter_name, [0])[-1]
@ -232,49 +289,133 @@ def content_to_boxes(style, parent_box, quote_depth, counter_values,
for counter_value in counter_values.get(counter_name, [0])
))
elif type_ == 'string' and context is not None and page is not None:
# string() is only valid in @page context
text = context.get_string_set_for(page, *value)
texts.append(text)
else:
assert type_ == 'QUOTE'
elif type_ == 'target-counter':
target_name, counter_name, counter_style = value
lookup_target = TARGET_COLLECTOR.lookup_target(
target_name, parent_box, parse_again_func)
if lookup_target.state == TARGET_STATE.UPTODATE:
counter_value = lookup_target.target_counter_values.get(
counter_name, [0])[-1]
texts.append(counters.format(counter_value, counter_style))
else:
texts = []
break
elif type_ == 'target-counters':
target_name, counter_name, separator, counter_style = value
lookup_target = TARGET_COLLECTOR.lookup_target(
target_name, parent_box, parse_again_func)
if lookup_target.state == TARGET_STATE.UPTODATE:
target_counter_values = lookup_target.target_counter_values
texts.append(separator.join(
counters.format(counter_value, counter_style)
for counter_value in target_counter_values.get(
counter_name, [0])
))
else:
texts = []
break
elif type_ == 'target-text':
target_name, text_style = value
lookup_target = TARGET_COLLECTOR.lookup_target(
target_name, parent_box, parse_again_func)
if lookup_target.state == TARGET_STATE.UPTODATE:
target_box = lookup_target.target_box
text = TEXT_CONTENT_EXTRACTORS[text_style](target_box)
# Simulate the step of white space processing
# (normally done during the layout)
texts.append(text.strip())
else:
texts = []
break
elif type_ == 'QUOTE' and not return_a_string and \
quote_depth is not None and quote_style is not None:
is_open, insert = value
if not is_open:
quote_depth[0] = max(0, quote_depth[0] - 1)
if insert:
open_quotes, close_quotes = style['quotes']
open_quotes, close_quotes = quote_style
quotes = open_quotes if is_open else close_quotes
texts.append(quotes[min(quote_depth[0], len(quotes) - 1)])
if is_open:
quote_depth[0] += 1
else:
# TODO: in previous versions an AssertionError was raised!
pass
text = ''.join(texts)
if return_a_string:
return text
if text:
yield boxes.TextBox.anonymous_from(parent_box, text)
boxlist.append(boxes.TextBox.anonymous_from(parent_box, text))
return boxlist
def compute_content_list_string(element, box, counter_values, content_list):
"""Compute the string corresponding to the content-list."""
string = ''
for type_, value in content_list:
if type_ == 'STRING':
string += value
elif type_ == 'content':
added_text = TEXT_CONTENT_EXTRACTORS[value](box)
# Simulate the step of white space processing
# (normally done during the layout)
added_text = added_text.strip()
string += added_text
elif type_ == 'counter':
counter_name, counter_style = value
counter_value = counter_values.get(counter_name, [0])[-1]
string += counters.format(counter_value, counter_style)
elif type_ == 'counters':
counter_name, separator, counter_style = value
string += separator.join(
counters.format(counter_value, counter_style)
for counter_value
in counter_values.get(counter_name, [0]))
elif type_ == 'attr':
string += element.get(value, '')
return string
def content_to_boxes(style, parent_box, quote_depth, counter_values,
get_image_from_uri, context=None, page=None):
"""Takes the value of a ``content`` property and returns boxes."""
def parse_again():
"""
closure to parse the parent_boxes children all again
when TARGET_COLLECTOR.lookup_target() detected a TARGET_STATE.PENDING,
Thx to closure no need to explicitly copy.deepcopy the whole stuff,
"""
local_children = []
if style['display'] == 'list-item':
local_children.extend(add_box_marker(
parent_box, counter_values, get_image_from_uri))
local_children.extend(content_to_boxes(
style, parent_box,
quote_depth, counter_values,
get_image_from_uri))
parent_box.children = local_children
# Can't use `yield`! Must `return` the boxes otherwise set_content_lists,
# calling compute_content_list for `contents()`, will fail
return compute_content_list(
False,
style['content'],
parent_box, counter_values,
parse_again,
get_image_from_uri, quote_depth, style['quotes'],
context, page)
def compute_string_set_string(box, string_name, content_list, counter_values):
"""For ``string-set`` property:
Parses the content-list value of the string named `string_name`
and append the resulting string to the boxes string_set
"""
def parse_again():
"""
closure to parse the string-set-string value all again
when TARGET_COLLECTOR.lookup_target() detected a TARGET_STATE.PENDING
"""
compute_string_set_string(
box, string_name, content_list, counter_values)
s = compute_content_list(
True,
content_list, box,
counter_values,
parse_again)
if s:
box.string_set.append((string_name, s))
def compute_bookmark_label(box, content_list, counter_values):
"""For ``bookmark-label`` property:
Parses the content-list value and put it in the boxes .bookmark_label
"""
def parse_again():
compute_bookmark_label(
box, content_list, counter_values)
box.bookmark_label = compute_content_list(
True,
content_list, box, counter_values,
parse_again)
def set_content_lists(element, box, style, counter_values):
@ -282,20 +423,17 @@ def set_content_lists(element, box, style, counter_values):
These content-lists are used in GCPM properties like ``string-set`` and
``bookmark-label``.
"""
string_set = []
box.string_set = []
if style['string_set'] != 'none':
for i, (string_name, string_values) in enumerate(style['string_set']):
string_set.append((string_name, compute_content_list_string(
element, box, counter_values, string_values)))
box.string_set = string_set
compute_string_set_string(
box, string_name, string_values, counter_values)
if style['bookmark_label'] == 'none':
box.bookmark_label = ''
else:
box.bookmark_label = compute_content_list_string(
element, box, counter_values, style['bookmark_label'])
compute_bookmark_label(
box, style['bookmark_label'], counter_values)
def update_counters(state, style):

View File

@ -638,24 +638,24 @@ def test_string_set():
assert expand_to_dict('string-set: test attr(class)') == {
'string_set': (('test', (('attr', 'class'),)),)}
assert expand_to_dict('string-set: test counter(count)') == {
'string_set': (('test', (('counter', ('count', 'decimal')),)),)}
'string_set': (('test', (('counter', ['count', 'decimal']),)),)}
assert expand_to_dict(
'string-set: test counter(count, upper-roman)') == {
'string_set': (
('test', (('counter', ('count', 'upper-roman')),)),)}
('test', (('counter', ['count', 'upper-roman']),)),)}
assert expand_to_dict('string-set: test counters(count, ".")') == {
'string_set': (('test', (('counters', ('count', '.', 'decimal')),)),)}
'string_set': (('test', (('counters', ['count', '.', 'decimal']),)),)}
assert expand_to_dict(
'string-set: test counters(count, ".", upper-roman)') == {
'string_set': (
('test', (('counters', ('count', '.', 'upper-roman')),)),)}
('test', (('counters', ['count', '.', 'upper-roman']),)),)}
assert expand_to_dict(
'string-set: test content(text) "string" '
'attr(title) attr(title) counter(count)') == {
'string_set': (('test', (
('content', 'text'), ('STRING', 'string'),
('attr', 'title'), ('attr', 'title'),
('counter', ('count', 'decimal')),)),)}
('counter', ['count', 'decimal']))),)}
assert_invalid('string-set: test')
assert_invalid('string-set: test test1')