debian-weasyprint/weasyprint/css/targets.py

249 lines
9.6 KiB
Python

"""
weasyprint.formatting_structure.targets
---------------------------------------
Handle target-counter, target-counters and target-text.
The TargetCollector is a structure providing required targets'
counter_values and stuff needed to build pending targets later,
when the layout of all targetted anchors has been done.
:copyright: Copyright 2011-2019 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
import copy
from ..logger import LOGGER
class TargetLookupItem(object):
"""Item controlling pending targets and page based target counters.
Collected in the TargetCollector's ``items``.
"""
def __init__(self, state='pending'):
self.state = state
# Required by target-counter and target-counters to access the
# target's .cached_counter_values.
# Needed for target-text via TEXT_CONTENT_EXTRACTORS.
self.target_box = None
# Functions that have to been called to check pending targets.
# Keys are (source_box, css_token).
self.parse_again_functions = {}
# Anchor position during pagination (page_number - 1)
self.page_maker_index = None
# target_box's page_counters during pagination
self.cached_page_counter_values = {}
class CounterLookupItem(object):
"""Item controlling page based counters.
Collected in the TargetCollector's ``counter_lookup_items``.
"""
def __init__(self, parse_again, missing_counters, missing_target_counters):
# Function that have to been called to check pending counter.
self.parse_again = parse_again
# Missing counters and target counters
self.missing_counters = missing_counters
self.missing_target_counters = missing_target_counters
# Box position during pagination (page_number - 1)
self.page_maker_index = None
# Marker for remake_page
self.pending = False
# Targeting box's page_counters during pagination
self.cached_page_counter_values = {}
class TargetCollector(object):
"""Collector of HTML targets used by CSS content with ``target-*``."""
def __init__(self):
# Lookup items for targets and page counters
self.target_lookup_items = {}
self.counter_lookup_items = {}
# When collecting is True, compute_content_list() collects missing
# page counters in CounterLookupItems. Otherwise, it mixes in the
# TargetLookupItem's cached_page_counter_values.
# Is switched to False in check_pending_targets().
self.collecting = True
# had_pending_targets is set to True when a target is needed but has
# not been seen yet. check_pending_targets then uses this information
# to call the needed parse_again functions.
self.had_pending_targets = False
# List of anchors that have already been seen during parsing.
self.existing_anchors = []
def anchor_name_from_token(self, anchor_token):
"""Get anchor name from string or uri token."""
if anchor_token[0] == 'string' and anchor_token[1].startswith('#'):
return anchor_token[1][1:]
elif anchor_token[0] == 'url' and anchor_token[1][0] == 'internal':
return anchor_token[1][1]
def collect_anchor(self, anchor_name):
"""Store ``anchor_name`` in ``existing_anchors``."""
if anchor_name and isinstance(anchor_name, str):
if anchor_name in self.existing_anchors:
LOGGER.warning('Anchor defined twice: %s', anchor_name)
else:
self.existing_anchors.append(anchor_name)
def collect_computed_target(self, anchor_token):
"""Store a computed internal target's ``anchor_name``.
``anchor_name`` must not start with '#' and be already unquoted.
"""
anchor_name = self.anchor_name_from_token(anchor_token)
if anchor_name:
self.target_lookup_items.setdefault(
anchor_name, TargetLookupItem())
def lookup_target(self, anchor_token, source_box, css_token, parse_again):
"""Get a TargetLookupItem corresponding to ``anchor_token``.
If it is already filled by a previous anchor-element, the status is
'up-to-date'. Otherwise, it is 'pending', we must parse the whole
tree again.
"""
anchor_name = self.anchor_name_from_token(anchor_token)
item = self.target_lookup_items.get(
anchor_name, TargetLookupItem('undefined'))
if item.state == 'pending':
if anchor_name in self.existing_anchors:
self.had_pending_targets = True
item.parse_again_functions.setdefault(
(source_box, css_token), parse_again)
else:
item.state = 'undefined'
if item.state == 'undefined':
LOGGER.error(
'Content discarded: target points to undefined anchor "%s"',
anchor_token)
return item
def store_target(self, anchor_name, target_counter_values, target_box):
"""Store a target called ``anchor_name``.
If there is a pending TargetLookupItem, it is updated. Only previously
collected anchors are stored.
"""
item = self.target_lookup_items.get(anchor_name)
if item and item.state == 'pending':
item.state = 'up-to-date'
item.target_box = target_box
# Store the counter_values in the target_box like
# compute_content_list does.
# TODO: remove attribute or set a default value in Box class
if not hasattr(target_box, 'cached_counter_values'):
target_box.cached_counter_values = copy.deepcopy(
target_counter_values)
def collect_missing_counters(self, parent_box, css_token,
parse_again_function, missing_counters,
missing_target_counters):
"""Collect missing (probably page-based) counters during formatting.
The ``missing_counters`` are re-used during pagination.
The ``missing_link`` attribute added to the parent_box is required to
connect the paginated boxes to their originating ``parent_box``.
"""
# No counter collection during pagination
if not self.collecting:
return
# No need to add empty miss-lists
if missing_counters or missing_target_counters:
# TODO: remove attribute or set a default value in Box class
if not hasattr(parent_box, 'missing_link'):
parent_box.missing_link = parent_box
counter_lookup_item = CounterLookupItem(
parse_again_function, missing_counters,
missing_target_counters)
self.counter_lookup_items.setdefault(
(parent_box, css_token), counter_lookup_item)
def check_pending_targets(self):
"""Check pending targets if needed."""
if self.had_pending_targets:
for item in self.target_lookup_items.values():
for function in item.parse_again_functions.values():
function()
self.had_pending_targets = False
# Ready for pagination
self.collecting = False
def cache_target_page_counters(self, anchor_name, page_counter_values,
page_maker_index, page_maker):
"""Store target's current ``page_maker_index`` and page counter values.
Eventually update associated targeting boxes.
"""
# Only store page counters when paginating
if self.collecting:
return
item = self.target_lookup_items.get(anchor_name)
if item and item.state == 'up-to-date':
item.page_maker_index = page_maker_index
if item.cached_page_counter_values != page_counter_values:
item.cached_page_counter_values = copy.deepcopy(
page_counter_values)
# Spread the news: update boxes affected by a change in the
# anchor's page counter values.
for (_, css_token), item in self.counter_lookup_items.items():
# Only update items that need counters in their content
if css_token != 'content':
continue
# Don't update if item has no missing target counter
missing_counters = item.missing_target_counters.get(
anchor_name)
if missing_counters is None:
continue
# Pending marker for remake_page
if (item.page_maker_index is None or
item.page_maker_index >= len(page_maker)):
item.pending = True
continue
# TODO: Is the item at all interested in the new
# page_counter_values? It probably is and this check is a
# brake.
for counter_name in missing_counters:
counter_value = page_counter_values.get(counter_name)
if counter_value is not None:
remake_state = (
page_maker[item.page_maker_index][-1])
remake_state['content_changed'] = True
item.parse_again(item.cached_page_counter_values)
break
# Hint: the box's own cached page counters trigger a
# separate 'content_changed'.