mirror of
https://github.com/Kozea/WeasyPrint.git
synced 2024-10-05 16:37:47 +03:00
249 lines
9.6 KiB
Python
249 lines
9.6 KiB
Python
"""
|
|
weasyprint.formatting_structure.targets
|
|
---------------------------------------
|
|
|
|
Handle target-counter, target-counters and target-text.
|
|
|
|
The TargetCollector is a structure providing required targets'
|
|
counter_values and stuff needed to build pending targets later,
|
|
when the layout of all targetted anchors has been done.
|
|
|
|
:copyright: Copyright 2011-2019 Simon Sapin and contributors, see AUTHORS.
|
|
:license: BSD, see LICENSE for details.
|
|
|
|
"""
|
|
|
|
import copy
|
|
|
|
from ..logger import LOGGER
|
|
|
|
|
|
class TargetLookupItem(object):
|
|
"""Item controlling pending targets and page based target counters.
|
|
|
|
Collected in the TargetCollector's ``items``.
|
|
|
|
"""
|
|
def __init__(self, state='pending'):
|
|
self.state = state
|
|
|
|
# Required by target-counter and target-counters to access the
|
|
# target's .cached_counter_values.
|
|
# Needed for target-text via TEXT_CONTENT_EXTRACTORS.
|
|
self.target_box = None
|
|
|
|
# Functions that have to been called to check pending targets.
|
|
# Keys are (source_box, css_token).
|
|
self.parse_again_functions = {}
|
|
|
|
# Anchor position during pagination (page_number - 1)
|
|
self.page_maker_index = None
|
|
|
|
# target_box's page_counters during pagination
|
|
self.cached_page_counter_values = {}
|
|
|
|
|
|
class CounterLookupItem(object):
|
|
"""Item controlling page based counters.
|
|
|
|
Collected in the TargetCollector's ``counter_lookup_items``.
|
|
|
|
"""
|
|
def __init__(self, parse_again, missing_counters, missing_target_counters):
|
|
# Function that have to been called to check pending counter.
|
|
self.parse_again = parse_again
|
|
|
|
# Missing counters and target counters
|
|
self.missing_counters = missing_counters
|
|
self.missing_target_counters = missing_target_counters
|
|
|
|
# Box position during pagination (page_number - 1)
|
|
self.page_maker_index = None
|
|
|
|
# Marker for remake_page
|
|
self.pending = False
|
|
|
|
# Targeting box's page_counters during pagination
|
|
self.cached_page_counter_values = {}
|
|
|
|
|
|
class TargetCollector(object):
|
|
"""Collector of HTML targets used by CSS content with ``target-*``."""
|
|
|
|
def __init__(self):
|
|
# Lookup items for targets and page counters
|
|
self.target_lookup_items = {}
|
|
self.counter_lookup_items = {}
|
|
|
|
# When collecting is True, compute_content_list() collects missing
|
|
# page counters in CounterLookupItems. Otherwise, it mixes in the
|
|
# TargetLookupItem's cached_page_counter_values.
|
|
# Is switched to False in check_pending_targets().
|
|
self.collecting = True
|
|
|
|
# had_pending_targets is set to True when a target is needed but has
|
|
# not been seen yet. check_pending_targets then uses this information
|
|
# to call the needed parse_again functions.
|
|
self.had_pending_targets = False
|
|
|
|
# List of anchors that have already been seen during parsing.
|
|
self.existing_anchors = []
|
|
|
|
def anchor_name_from_token(self, anchor_token):
|
|
"""Get anchor name from string or uri token."""
|
|
if anchor_token[0] == 'string' and anchor_token[1].startswith('#'):
|
|
return anchor_token[1][1:]
|
|
elif anchor_token[0] == 'url' and anchor_token[1][0] == 'internal':
|
|
return anchor_token[1][1]
|
|
|
|
def collect_anchor(self, anchor_name):
|
|
"""Store ``anchor_name`` in ``existing_anchors``."""
|
|
if anchor_name and isinstance(anchor_name, str):
|
|
if anchor_name in self.existing_anchors:
|
|
LOGGER.warning('Anchor defined twice: %s', anchor_name)
|
|
else:
|
|
self.existing_anchors.append(anchor_name)
|
|
|
|
def collect_computed_target(self, anchor_token):
|
|
"""Store a computed internal target's ``anchor_name``.
|
|
|
|
``anchor_name`` must not start with '#' and be already unquoted.
|
|
|
|
"""
|
|
anchor_name = self.anchor_name_from_token(anchor_token)
|
|
if anchor_name:
|
|
self.target_lookup_items.setdefault(
|
|
anchor_name, TargetLookupItem())
|
|
|
|
def lookup_target(self, anchor_token, source_box, css_token, parse_again):
|
|
"""Get a TargetLookupItem corresponding to ``anchor_token``.
|
|
|
|
If it is already filled by a previous anchor-element, the status is
|
|
'up-to-date'. Otherwise, it is 'pending', we must parse the whole
|
|
tree again.
|
|
|
|
"""
|
|
anchor_name = self.anchor_name_from_token(anchor_token)
|
|
item = self.target_lookup_items.get(
|
|
anchor_name, TargetLookupItem('undefined'))
|
|
|
|
if item.state == 'pending':
|
|
if anchor_name in self.existing_anchors:
|
|
self.had_pending_targets = True
|
|
item.parse_again_functions.setdefault(
|
|
(source_box, css_token), parse_again)
|
|
else:
|
|
item.state = 'undefined'
|
|
|
|
if item.state == 'undefined':
|
|
LOGGER.error(
|
|
'Content discarded: target points to undefined anchor "%s"',
|
|
anchor_token)
|
|
|
|
return item
|
|
|
|
def store_target(self, anchor_name, target_counter_values, target_box):
|
|
"""Store a target called ``anchor_name``.
|
|
|
|
If there is a pending TargetLookupItem, it is updated. Only previously
|
|
collected anchors are stored.
|
|
|
|
"""
|
|
item = self.target_lookup_items.get(anchor_name)
|
|
if item and item.state == 'pending':
|
|
item.state = 'up-to-date'
|
|
item.target_box = target_box
|
|
# Store the counter_values in the target_box like
|
|
# compute_content_list does.
|
|
# TODO: remove attribute or set a default value in Box class
|
|
if not hasattr(target_box, 'cached_counter_values'):
|
|
target_box.cached_counter_values = copy.deepcopy(
|
|
target_counter_values)
|
|
|
|
def collect_missing_counters(self, parent_box, css_token,
|
|
parse_again_function, missing_counters,
|
|
missing_target_counters):
|
|
"""Collect missing (probably page-based) counters during formatting.
|
|
|
|
The ``missing_counters`` are re-used during pagination.
|
|
|
|
The ``missing_link`` attribute added to the parent_box is required to
|
|
connect the paginated boxes to their originating ``parent_box``.
|
|
|
|
"""
|
|
# No counter collection during pagination
|
|
if not self.collecting:
|
|
return
|
|
|
|
# No need to add empty miss-lists
|
|
if missing_counters or missing_target_counters:
|
|
# TODO: remove attribute or set a default value in Box class
|
|
if not hasattr(parent_box, 'missing_link'):
|
|
parent_box.missing_link = parent_box
|
|
counter_lookup_item = CounterLookupItem(
|
|
parse_again_function, missing_counters,
|
|
missing_target_counters)
|
|
self.counter_lookup_items.setdefault(
|
|
(parent_box, css_token), counter_lookup_item)
|
|
|
|
def check_pending_targets(self):
|
|
"""Check pending targets if needed."""
|
|
if self.had_pending_targets:
|
|
for item in self.target_lookup_items.values():
|
|
for function in item.parse_again_functions.values():
|
|
function()
|
|
self.had_pending_targets = False
|
|
# Ready for pagination
|
|
self.collecting = False
|
|
|
|
def cache_target_page_counters(self, anchor_name, page_counter_values,
|
|
page_maker_index, page_maker):
|
|
"""Store target's current ``page_maker_index`` and page counter values.
|
|
|
|
Eventually update associated targeting boxes.
|
|
|
|
"""
|
|
# Only store page counters when paginating
|
|
if self.collecting:
|
|
return
|
|
|
|
item = self.target_lookup_items.get(anchor_name)
|
|
if item and item.state == 'up-to-date':
|
|
item.page_maker_index = page_maker_index
|
|
if item.cached_page_counter_values != page_counter_values:
|
|
item.cached_page_counter_values = copy.deepcopy(
|
|
page_counter_values)
|
|
|
|
# Spread the news: update boxes affected by a change in the
|
|
# anchor's page counter values.
|
|
for (_, css_token), item in self.counter_lookup_items.items():
|
|
# Only update items that need counters in their content
|
|
if css_token != 'content':
|
|
continue
|
|
|
|
# Don't update if item has no missing target counter
|
|
missing_counters = item.missing_target_counters.get(
|
|
anchor_name)
|
|
if missing_counters is None:
|
|
continue
|
|
|
|
# Pending marker for remake_page
|
|
if (item.page_maker_index is None or
|
|
item.page_maker_index >= len(page_maker)):
|
|
item.pending = True
|
|
continue
|
|
|
|
# TODO: Is the item at all interested in the new
|
|
# page_counter_values? It probably is and this check is a
|
|
# brake.
|
|
for counter_name in missing_counters:
|
|
counter_value = page_counter_values.get(counter_name)
|
|
if counter_value is not None:
|
|
remake_state = (
|
|
page_maker[item.page_maker_index][-1])
|
|
remake_state['content_changed'] = True
|
|
item.parse_again(item.cached_page_counter_values)
|
|
break
|
|
# Hint: the box's own cached page counters trigger a
|
|
# separate 'content_changed'.
|