2015-11-25 08:38:01 +01:00
|
|
|
|
# coding: utf-8
|
2012-03-21 23:19:27 +01:00
|
|
|
|
"""
|
|
|
|
|
weasyprint.formatting_structure.build
|
|
|
|
|
-------------------------------------
|
2011-06-29 21:59:29 +02:00
|
|
|
|
|
2012-03-21 23:19:27 +01:00
|
|
|
|
Turn an element tree with associated CSS style (computed values)
|
|
|
|
|
into a "before layout" formatting structure / box tree.
|
2011-06-29 21:59:29 +02:00
|
|
|
|
|
2012-03-21 23:19:27 +01:00
|
|
|
|
This includes creating anonymous boxes and processing whitespace
|
|
|
|
|
as necessary.
|
2011-08-24 10:05:44 +02:00
|
|
|
|
|
2014-01-10 15:27:02 +01:00
|
|
|
|
:copyright: Copyright 2011-2014 Simon Sapin and contributors, see AUTHORS.
|
2012-03-21 23:19:27 +01:00
|
|
|
|
:license: BSD, see LICENSE for details.
|
2011-06-29 21:59:29 +02:00
|
|
|
|
|
2011-08-24 10:05:44 +02:00
|
|
|
|
"""
|
2011-06-29 21:59:29 +02:00
|
|
|
|
|
2012-02-17 18:49:58 +01:00
|
|
|
|
from __future__ import division, unicode_literals
|
|
|
|
|
|
2011-06-29 21:59:29 +02:00
|
|
|
|
import re
|
2012-07-11 14:11:26 +02:00
|
|
|
|
|
2017-03-26 11:42:50 +02:00
|
|
|
|
import tinycss2.color3
|
2012-07-11 14:11:26 +02:00
|
|
|
|
|
2011-12-07 17:09:59 +01:00
|
|
|
|
from . import boxes, counters
|
2011-08-25 12:48:00 +02:00
|
|
|
|
from .. import html
|
2017-03-25 00:33:36 +01:00
|
|
|
|
from ..compat import basestring, xrange
|
2017-07-28 22:28:29 +02:00
|
|
|
|
from ..css import properties
|
2011-08-20 15:07:14 +02:00
|
|
|
|
|
2011-11-14 14:29:40 +01:00
|
|
|
|
# Maps values of the ``display`` CSS property to box types.
|
|
|
|
|
BOX_TYPE_FROM_DISPLAY = {
|
|
|
|
|
'block': boxes.BlockBox,
|
|
|
|
|
'list-item': boxes.BlockBox,
|
|
|
|
|
'inline': boxes.InlineBox,
|
|
|
|
|
'inline-block': boxes.InlineBlockBox,
|
|
|
|
|
'table': boxes.TableBox,
|
|
|
|
|
'inline-table': boxes.InlineTableBox,
|
|
|
|
|
'table-row': boxes.TableRowBox,
|
|
|
|
|
'table-row-group': boxes.TableRowGroupBox,
|
|
|
|
|
'table-header-group': boxes.TableRowGroupBox,
|
|
|
|
|
'table-footer-group': boxes.TableRowGroupBox,
|
2011-12-01 18:14:43 +01:00
|
|
|
|
'table-column': boxes.TableColumnBox,
|
2011-11-14 14:29:40 +01:00
|
|
|
|
'table-column-group': boxes.TableColumnGroupBox,
|
|
|
|
|
'table-cell': boxes.TableCellBox,
|
|
|
|
|
'table-caption': boxes.TableCaptionBox,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2017-07-03 15:19:05 +02:00
|
|
|
|
def build_formatting_structure(element_tree, style_for, get_image_from_uri,
|
|
|
|
|
base_url):
|
2012-07-12 15:54:22 +02:00
|
|
|
|
"""Build a formatting structure (box tree) from an element tree."""
|
2017-07-03 15:19:05 +02:00
|
|
|
|
box_list = element_to_box(
|
|
|
|
|
element_tree, style_for, get_image_from_uri, base_url)
|
2013-08-04 14:52:47 +02:00
|
|
|
|
if box_list:
|
|
|
|
|
box, = box_list
|
|
|
|
|
else:
|
|
|
|
|
# No root element
|
2017-08-02 20:21:34 +02:00
|
|
|
|
def root_style_for(element, pseudo_type=None):
|
|
|
|
|
style = style_for(element, pseudo_type)
|
2013-08-04 14:52:47 +02:00
|
|
|
|
if style:
|
2017-08-14 14:11:20 +02:00
|
|
|
|
# TODO: we should check that the element has a parent instead.
|
|
|
|
|
if element.tag == 'html':
|
2013-08-04 14:52:47 +02:00
|
|
|
|
style.display = 'block'
|
|
|
|
|
else:
|
|
|
|
|
style.display = 'none'
|
|
|
|
|
return style
|
2017-07-03 15:19:05 +02:00
|
|
|
|
box, = element_to_box(
|
|
|
|
|
element_tree, root_style_for, get_image_from_uri, base_url)
|
2012-02-28 14:50:48 +01:00
|
|
|
|
box.is_for_root_element = True
|
2012-01-03 10:56:02 +01:00
|
|
|
|
# If this is changed, maybe update weasy.layout.pages.make_margin_boxes()
|
2012-04-03 18:58:55 +02:00
|
|
|
|
process_whitespace(box)
|
2011-11-14 14:29:40 +01:00
|
|
|
|
box = anonymous_table_boxes(box)
|
2011-07-20 11:35:43 +02:00
|
|
|
|
box = inline_in_block(box)
|
|
|
|
|
box = block_in_inline(box)
|
2012-02-07 16:59:22 +01:00
|
|
|
|
box = set_viewport_overflow(box)
|
2011-07-11 12:47:00 +02:00
|
|
|
|
return box
|
2011-06-29 21:59:29 +02:00
|
|
|
|
|
|
|
|
|
|
2017-07-01 00:28:14 +02:00
|
|
|
|
def make_box(element_tag, style, content, get_image_from_uri):
|
|
|
|
|
return BOX_TYPE_FROM_DISPLAY[style.display](
|
|
|
|
|
element_tag, style, content)
|
2012-07-11 16:20:43 +02:00
|
|
|
|
|
2013-04-11 12:08:53 +02:00
|
|
|
|
|
2017-07-03 15:19:05 +02:00
|
|
|
|
def element_to_box(element, style_for, get_image_from_uri, base_url,
|
|
|
|
|
state=None):
|
2012-07-12 15:54:22 +02:00
|
|
|
|
"""Convert an element and its children into a box with children.
|
2011-06-29 21:59:29 +02:00
|
|
|
|
|
2011-12-05 18:03:25 +01:00
|
|
|
|
Return a list of boxes. Most of the time the list will have one item but
|
2011-11-17 15:39:30 +01:00
|
|
|
|
may have zero or more than one.
|
|
|
|
|
|
2011-08-24 10:05:44 +02:00
|
|
|
|
Eg.::
|
2011-06-29 21:59:29 +02:00
|
|
|
|
|
2012-12-26 14:47:33 +01:00
|
|
|
|
<p>Some <em>emphasised</em> text.</p>
|
2011-06-29 21:59:29 +02:00
|
|
|
|
|
2011-08-24 10:05:44 +02:00
|
|
|
|
gives (not actual syntax)::
|
2011-06-29 21:59:29 +02:00
|
|
|
|
|
|
|
|
|
BlockBox[
|
2011-07-20 13:40:49 +02:00
|
|
|
|
TextBox['Some '],
|
2011-06-29 21:59:29 +02:00
|
|
|
|
InlineBox[
|
2011-07-20 13:40:49 +02:00
|
|
|
|
TextBox['emphasised'],
|
2011-06-29 21:59:29 +02:00
|
|
|
|
],
|
2011-07-20 13:40:49 +02:00
|
|
|
|
TextBox[' text.'],
|
2011-06-29 21:59:29 +02:00
|
|
|
|
]
|
|
|
|
|
|
2011-08-24 10:05:44 +02:00
|
|
|
|
``TextBox``es are anonymous inline boxes:
|
|
|
|
|
See http://www.w3.org/TR/CSS21/visuren.html#anonymous
|
|
|
|
|
|
2011-06-29 21:59:29 +02:00
|
|
|
|
"""
|
2012-02-22 15:52:49 +01:00
|
|
|
|
if not isinstance(element.tag, basestring):
|
2011-12-02 12:36:20 +01:00
|
|
|
|
# lxml.html already converts HTML entities to text.
|
|
|
|
|
# Here we ignore comments and XML processing instructions.
|
|
|
|
|
return []
|
|
|
|
|
|
2012-07-12 15:54:22 +02:00
|
|
|
|
style = style_for(element)
|
2011-12-02 16:51:41 +01:00
|
|
|
|
|
2011-08-25 12:48:00 +02:00
|
|
|
|
# TODO: should be the used value. When does the used value for `display`
|
|
|
|
|
# differ from the computer value?
|
2011-10-21 11:36:01 +02:00
|
|
|
|
display = style.display
|
2011-12-05 18:03:25 +01:00
|
|
|
|
if display == 'none':
|
2011-11-17 15:39:30 +01:00
|
|
|
|
return []
|
2011-08-25 12:48:00 +02:00
|
|
|
|
|
2017-07-01 00:28:14 +02:00
|
|
|
|
box = make_box(element.tag, style, [], get_image_from_uri)
|
2011-12-01 18:14:43 +01:00
|
|
|
|
|
2011-12-07 17:09:59 +01:00
|
|
|
|
if state is None:
|
|
|
|
|
# use a list to have a shared mutable object
|
|
|
|
|
state = (
|
|
|
|
|
# Shared mutable objects:
|
|
|
|
|
[0], # quote_depth: single integer
|
|
|
|
|
{}, # counter_values: name -> stacked/scoped values
|
2012-07-12 15:54:22 +02:00
|
|
|
|
[set()] # counter_scopes: element tree depths -> counter names
|
2011-12-07 17:09:59 +01:00
|
|
|
|
)
|
2011-12-26 12:47:26 +01:00
|
|
|
|
_quote_depth, counter_values, counter_scopes = state
|
2011-12-07 17:09:59 +01:00
|
|
|
|
|
|
|
|
|
update_counters(state, style)
|
2011-12-02 18:02:58 +01:00
|
|
|
|
|
2011-12-02 12:36:20 +01:00
|
|
|
|
children = []
|
2011-12-05 18:03:25 +01:00
|
|
|
|
if display == 'list-item':
|
2012-07-12 15:54:22 +02:00
|
|
|
|
children.extend(add_box_marker(
|
|
|
|
|
box, counter_values, get_image_from_uri))
|
2011-12-07 17:09:59 +01:00
|
|
|
|
|
|
|
|
|
# If this element’s direct children create new scopes, the counter
|
|
|
|
|
# names will be in this new list
|
2011-12-07 18:29:04 +01:00
|
|
|
|
counter_scopes.append(set())
|
2011-12-07 17:09:59 +01:00
|
|
|
|
|
2017-01-02 13:23:42 +01:00
|
|
|
|
box.first_letter_style = style_for(element, 'first-letter')
|
|
|
|
|
box.first_line_style = style_for(element, 'first-line')
|
|
|
|
|
|
|
|
|
|
children.extend(before_after_to_box(
|
|
|
|
|
element, 'before', state, style_for, get_image_from_uri))
|
2011-12-05 18:03:25 +01:00
|
|
|
|
text = element.text
|
|
|
|
|
if text:
|
|
|
|
|
children.append(boxes.TextBox.anonymous_from(box, text))
|
2015-03-09 04:02:51 +01:00
|
|
|
|
|
2011-12-05 18:03:25 +01:00
|
|
|
|
for child_element in element:
|
2012-07-12 15:54:22 +02:00
|
|
|
|
children.extend(element_to_box(
|
2017-07-03 15:19:05 +02:00
|
|
|
|
child_element, style_for, get_image_from_uri, base_url, state))
|
2011-12-05 18:03:25 +01:00
|
|
|
|
text = child_element.tail
|
|
|
|
|
if text:
|
2012-12-26 16:42:45 +01:00
|
|
|
|
text_box = boxes.TextBox.anonymous_from(box, text)
|
|
|
|
|
if children and isinstance(children[-1], boxes.TextBox):
|
|
|
|
|
children[-1].text += text_box.text
|
|
|
|
|
else:
|
|
|
|
|
children.append(text_box)
|
2017-01-02 13:23:42 +01:00
|
|
|
|
children.extend(before_after_to_box(
|
|
|
|
|
element, 'after', state, style_for, get_image_from_uri))
|
2011-12-07 17:09:59 +01:00
|
|
|
|
|
|
|
|
|
# Scopes created by this element’s children stop here.
|
|
|
|
|
for name in counter_scopes.pop():
|
|
|
|
|
counter_values[name].pop()
|
2012-05-29 17:56:46 +02:00
|
|
|
|
if not counter_values[name]:
|
|
|
|
|
counter_values.pop(name)
|
2011-12-05 18:03:25 +01:00
|
|
|
|
|
2016-11-01 04:31:15 +01:00
|
|
|
|
box.children = children
|
2017-07-22 13:51:03 +02:00
|
|
|
|
set_content_lists(element, box, style, counter_values)
|
2011-12-05 18:03:25 +01:00
|
|
|
|
|
|
|
|
|
# Specific handling for the element. (eg. replaced element)
|
2017-07-03 15:19:05 +02:00
|
|
|
|
return html.handle_element(element, box, get_image_from_uri, base_url)
|
2011-12-05 18:03:25 +01:00
|
|
|
|
|
|
|
|
|
|
2016-12-04 16:12:22 +01:00
|
|
|
|
def before_after_to_box(element, pseudo_type, state, style_for,
|
|
|
|
|
get_image_from_uri):
|
2016-12-04 16:43:49 +01:00
|
|
|
|
"""Yield the box for ::before or ::after pseudo-element if there is one."""
|
2012-07-12 15:54:22 +02:00
|
|
|
|
style = style_for(element, pseudo_type)
|
2011-12-05 18:03:25 +01:00
|
|
|
|
if pseudo_type and style is None:
|
|
|
|
|
# Pseudo-elements with no style at all do not get a StyleDict
|
|
|
|
|
# Their initial content property computes to 'none'.
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
# TODO: should be the used value. When does the used value for `display`
|
|
|
|
|
# differ from the computer value?
|
|
|
|
|
display = style.display
|
|
|
|
|
content = style.content
|
2011-12-28 15:34:30 +01:00
|
|
|
|
if 'none' in (display, content) or content == 'normal':
|
2011-12-05 18:03:25 +01:00
|
|
|
|
return
|
2011-12-01 18:14:43 +01:00
|
|
|
|
|
2012-07-11 16:20:43 +02:00
|
|
|
|
box = make_box(
|
2017-07-01 00:28:14 +02:00
|
|
|
|
'%s::%s' % (element.tag, pseudo_type), style, [], get_image_from_uri)
|
2011-12-01 18:14:43 +01:00
|
|
|
|
|
2011-12-26 12:47:26 +01:00
|
|
|
|
quote_depth, counter_values, _counter_scopes = state
|
2011-12-07 17:09:59 +01:00
|
|
|
|
update_counters(state, style)
|
2011-12-05 18:03:25 +01:00
|
|
|
|
children = []
|
2011-12-08 09:56:55 +01:00
|
|
|
|
if display == 'list-item':
|
2012-07-12 15:54:22 +02:00
|
|
|
|
children.extend(add_box_marker(
|
|
|
|
|
box, counter_values, get_image_from_uri))
|
2011-12-28 15:34:30 +01:00
|
|
|
|
children.extend(content_to_boxes(
|
2012-07-12 15:54:22 +02:00
|
|
|
|
style, box, quote_depth, counter_values, get_image_from_uri))
|
2011-12-28 15:34:30 +01:00
|
|
|
|
|
2016-11-01 04:31:15 +01:00
|
|
|
|
box.children = children
|
|
|
|
|
yield box
|
2011-12-28 15:34:30 +01:00
|
|
|
|
|
|
|
|
|
|
2012-07-12 15:54:22 +02:00
|
|
|
|
def content_to_boxes(style, parent_box, quote_depth, counter_values,
|
2015-03-09 04:02:51 +01:00
|
|
|
|
get_image_from_uri, context=None):
|
2011-12-28 15:34:30 +01:00
|
|
|
|
"""Takes the value of a ``content`` property and yield boxes."""
|
2011-12-05 18:03:25 +01:00
|
|
|
|
texts = []
|
2011-12-28 15:34:30 +01:00
|
|
|
|
for type_, value in style.content:
|
2011-12-05 18:03:25 +01:00
|
|
|
|
if type_ == 'STRING':
|
|
|
|
|
texts.append(value)
|
|
|
|
|
elif type_ == 'URI':
|
2012-07-12 15:54:22 +02:00
|
|
|
|
image = get_image_from_uri(value)
|
2011-12-08 18:11:32 +01:00
|
|
|
|
if image is not None:
|
2012-02-17 18:49:58 +01:00
|
|
|
|
text = ''.join(texts)
|
2011-12-05 18:03:25 +01:00
|
|
|
|
if text:
|
2011-12-28 15:34:30 +01:00
|
|
|
|
yield boxes.TextBox.anonymous_from(parent_box, text)
|
2011-12-05 18:03:25 +01:00
|
|
|
|
texts = []
|
2011-12-28 15:34:30 +01:00
|
|
|
|
yield boxes.InlineReplacedBox.anonymous_from(parent_box, image)
|
2011-12-08 11:32:35 +01:00
|
|
|
|
elif type_ == 'counter':
|
2011-12-07 17:09:59 +01:00
|
|
|
|
counter_name, counter_style = value
|
|
|
|
|
counter_value = counter_values.get(counter_name, [0])[-1]
|
|
|
|
|
texts.append(counters.format(counter_value, counter_style))
|
2011-12-08 11:32:35 +01:00
|
|
|
|
elif type_ == 'counters':
|
|
|
|
|
counter_name, separator, counter_style = value
|
|
|
|
|
texts.append(separator.join(
|
|
|
|
|
counters.format(counter_value, counter_style)
|
|
|
|
|
for counter_value in counter_values.get(counter_name, [0])
|
|
|
|
|
))
|
2015-03-21 05:08:12 +01:00
|
|
|
|
elif type_ == 'string' and context is not None:
|
|
|
|
|
text = context.get_string_set_for(*value)
|
2015-03-09 04:02:51 +01:00
|
|
|
|
texts.append(text)
|
2011-12-05 18:03:25 +01:00
|
|
|
|
else:
|
|
|
|
|
assert type_ == 'QUOTE'
|
|
|
|
|
is_open, insert = value
|
|
|
|
|
if not is_open:
|
|
|
|
|
quote_depth[0] = max(0, quote_depth[0] - 1)
|
|
|
|
|
if insert:
|
|
|
|
|
open_quotes, close_quotes = style.quotes
|
|
|
|
|
quotes = open_quotes if is_open else close_quotes
|
|
|
|
|
texts.append(quotes[min(quote_depth[0], len(quotes) - 1)])
|
|
|
|
|
if is_open:
|
|
|
|
|
quote_depth[0] += 1
|
2012-02-17 18:49:58 +01:00
|
|
|
|
text = ''.join(texts)
|
2011-12-05 18:03:25 +01:00
|
|
|
|
if text:
|
2011-12-28 15:34:30 +01:00
|
|
|
|
yield boxes.TextBox.anonymous_from(parent_box, text)
|
2011-06-29 21:59:29 +02:00
|
|
|
|
|
|
|
|
|
|
2015-05-01 21:14:10 +02:00
|
|
|
|
def compute_content_list_string(element, box, counter_values, content_list):
|
|
|
|
|
"""Compute the string corresponding to the content-list."""
|
|
|
|
|
string = ''
|
|
|
|
|
for type_, value in content_list:
|
|
|
|
|
if type_ == 'STRING':
|
|
|
|
|
string += value
|
|
|
|
|
elif type_ == 'content':
|
|
|
|
|
added_text = TEXT_CONTENT_EXTRACTORS[value](box)
|
|
|
|
|
# Simulate the step of white space processing
|
|
|
|
|
# (normally done during the layout)
|
|
|
|
|
added_text = added_text.strip()
|
|
|
|
|
string += added_text
|
|
|
|
|
elif type_ == 'counter':
|
|
|
|
|
counter_name, counter_style = value
|
|
|
|
|
counter_value = counter_values.get(counter_name, [0])[-1]
|
|
|
|
|
string += counters.format(counter_value, counter_style)
|
|
|
|
|
elif type_ == 'counters':
|
|
|
|
|
counter_name, separator, counter_style = value
|
|
|
|
|
string += separator.join(
|
|
|
|
|
counters.format(counter_value, counter_style)
|
|
|
|
|
for counter_value
|
|
|
|
|
in counter_values.get(counter_name, [0]))
|
|
|
|
|
elif type_ == 'attr':
|
|
|
|
|
string += element.get(value, '')
|
|
|
|
|
return string
|
|
|
|
|
|
|
|
|
|
|
2017-07-22 13:51:03 +02:00
|
|
|
|
def set_content_lists(element, box, style, counter_values):
|
|
|
|
|
"""Set the content-lists by strings.
|
2015-05-01 20:28:42 +02:00
|
|
|
|
|
|
|
|
|
These content-lists are used in GCPM properties like ``string-set`` and
|
|
|
|
|
``bookmark-label``.
|
|
|
|
|
|
|
|
|
|
"""
|
2015-05-01 21:14:10 +02:00
|
|
|
|
string_set = []
|
2017-07-22 13:51:03 +02:00
|
|
|
|
if style['string_set'] != 'none':
|
|
|
|
|
for i, (string_name, string_values) in enumerate(style['string_set']):
|
2015-05-01 21:14:10 +02:00
|
|
|
|
string_set.append((string_name, compute_content_list_string(
|
|
|
|
|
element, box, counter_values, string_values)))
|
2017-07-22 13:51:03 +02:00
|
|
|
|
box.string_set = string_set
|
2015-05-01 21:14:10 +02:00
|
|
|
|
|
2017-07-22 13:51:03 +02:00
|
|
|
|
if style['bookmark_label'] == 'none':
|
|
|
|
|
box.bookmark_label = ''
|
2015-05-01 21:14:10 +02:00
|
|
|
|
else:
|
2017-07-22 13:51:03 +02:00
|
|
|
|
box.bookmark_label = compute_content_list_string(
|
|
|
|
|
element, box, counter_values, style['bookmark_label'])
|
2015-05-01 20:28:42 +02:00
|
|
|
|
|
|
|
|
|
|
2011-12-07 17:09:59 +01:00
|
|
|
|
def update_counters(state, style):
|
|
|
|
|
"""Handle the ``counter-*`` properties."""
|
2011-12-26 12:47:26 +01:00
|
|
|
|
_quote_depth, counter_values, counter_scopes = state
|
2011-12-07 18:29:04 +01:00
|
|
|
|
sibling_scopes = counter_scopes[-1]
|
2011-12-07 17:09:59 +01:00
|
|
|
|
|
|
|
|
|
for name, value in style.counter_reset:
|
2011-12-07 18:29:04 +01:00
|
|
|
|
if name in sibling_scopes:
|
|
|
|
|
counter_values[name].pop()
|
|
|
|
|
else:
|
|
|
|
|
sibling_scopes.add(name)
|
2011-12-07 17:09:59 +01:00
|
|
|
|
counter_values.setdefault(name, []).append(value)
|
|
|
|
|
|
2012-04-02 14:45:44 +02:00
|
|
|
|
# XXX Disabled for now, only exists in Lists3’s editor’s draft.
|
|
|
|
|
# for name, value in style.counter_set:
|
|
|
|
|
# values = counter_values.setdefault(name, [])
|
|
|
|
|
# if not values:
|
|
|
|
|
# assert name not in sibling_scopes
|
|
|
|
|
# sibling_scopes.add(name)
|
|
|
|
|
# values.append(0)
|
|
|
|
|
# values[-1] = value
|
2011-12-07 17:09:59 +01:00
|
|
|
|
|
2011-12-08 09:56:55 +01:00
|
|
|
|
counter_increment = style.counter_increment
|
|
|
|
|
if counter_increment == 'auto':
|
|
|
|
|
# 'auto' is the initial value but is not valid in stylesheet:
|
|
|
|
|
# there was no counter-increment declaration for this element.
|
|
|
|
|
# (Or the winning value was 'initial'.)
|
|
|
|
|
# http://dev.w3.org/csswg/css3-lists/#declaring-a-list-item
|
|
|
|
|
if style.display == 'list-item':
|
|
|
|
|
counter_increment = [('list-item', 1)]
|
|
|
|
|
else:
|
|
|
|
|
counter_increment = []
|
|
|
|
|
for name, value in counter_increment:
|
2011-12-07 17:09:59 +01:00
|
|
|
|
values = counter_values.setdefault(name, [])
|
|
|
|
|
if not values:
|
2012-04-02 14:45:44 +02:00
|
|
|
|
assert name not in sibling_scopes
|
|
|
|
|
sibling_scopes.add(name)
|
2011-12-07 17:09:59 +01:00
|
|
|
|
values.append(0)
|
|
|
|
|
values[-1] += value
|
|
|
|
|
|
|
|
|
|
|
2012-07-12 15:54:22 +02:00
|
|
|
|
def add_box_marker(box, counter_values, get_image_from_uri):
|
2011-12-05 18:03:25 +01:00
|
|
|
|
"""Add a list marker to boxes for elements with ``display: list-item``,
|
|
|
|
|
and yield children to add a the start of the box.
|
2011-08-24 10:05:44 +02:00
|
|
|
|
|
2011-08-20 15:07:14 +02:00
|
|
|
|
See http://www.w3.org/TR/CSS21/generate.html#lists
|
2011-08-24 10:05:44 +02:00
|
|
|
|
|
2011-08-20 15:07:14 +02:00
|
|
|
|
"""
|
2011-12-05 18:03:25 +01:00
|
|
|
|
style = box.style
|
2013-04-04 15:33:15 +02:00
|
|
|
|
image_type, image = style.list_style_image
|
|
|
|
|
if image_type == 'url':
|
2011-08-25 22:16:04 +02:00
|
|
|
|
# surface may be None here too, in case the image is not available.
|
2012-07-12 15:54:22 +02:00
|
|
|
|
image = get_image_from_uri(image)
|
2011-08-25 22:16:04 +02:00
|
|
|
|
|
2011-12-08 18:11:32 +01:00
|
|
|
|
if image is None:
|
2011-12-05 18:03:25 +01:00
|
|
|
|
type_ = style.list_style_type
|
2011-08-20 22:59:27 +02:00
|
|
|
|
if type_ == 'none':
|
2011-12-05 18:03:25 +01:00
|
|
|
|
return
|
2012-06-01 09:56:19 +02:00
|
|
|
|
counter_value = counter_values.get('list-item', [0])[-1]
|
2011-12-07 17:09:59 +01:00
|
|
|
|
marker_text = counters.format_list_marker(counter_value, type_)
|
|
|
|
|
marker_box = boxes.TextBox.anonymous_from(box, marker_text)
|
2011-08-20 18:02:04 +02:00
|
|
|
|
else:
|
2011-12-08 18:11:32 +01:00
|
|
|
|
marker_box = boxes.InlineReplacedBox.anonymous_from(box, image)
|
2011-11-08 15:24:31 +01:00
|
|
|
|
marker_box.is_list_marker = True
|
2011-12-07 17:09:59 +01:00
|
|
|
|
marker_box.element_tag += '::marker'
|
2011-08-20 18:02:04 +02:00
|
|
|
|
|
2011-12-05 18:03:25 +01:00
|
|
|
|
position = style.list_style_position
|
2011-08-20 15:07:14 +02:00
|
|
|
|
if position == 'inside':
|
2011-12-05 18:03:25 +01:00
|
|
|
|
yield marker_box
|
2011-08-20 15:07:14 +02:00
|
|
|
|
elif position == 'outside':
|
|
|
|
|
box.outside_list_marker = marker_box
|
|
|
|
|
|
|
|
|
|
|
2011-11-14 14:29:40 +01:00
|
|
|
|
def is_whitespace(box, _has_non_whitespace=re.compile('\S').search):
|
|
|
|
|
"""Return True if ``box`` is a TextBox with only whitespace."""
|
2015-05-01 11:06:54 +02:00
|
|
|
|
return isinstance(box, boxes.TextBox) and not _has_non_whitespace(box.text)
|
2011-11-14 14:29:40 +01:00
|
|
|
|
|
|
|
|
|
|
2011-11-17 17:05:55 +01:00
|
|
|
|
def wrap_improper(box, children, wrapper_type, test=None):
|
2011-11-14 14:29:40 +01:00
|
|
|
|
"""
|
|
|
|
|
Wrap consecutive children that do not pass ``test`` in a box of type
|
2011-11-14 17:42:15 +01:00
|
|
|
|
``wrapper_type``.
|
2011-11-14 14:29:40 +01:00
|
|
|
|
|
2011-11-17 17:05:55 +01:00
|
|
|
|
``test`` defaults to children being of the same type as ``wrapper_type``.
|
|
|
|
|
|
2011-11-14 14:29:40 +01:00
|
|
|
|
"""
|
2011-11-17 17:05:55 +01:00
|
|
|
|
if test is None:
|
2016-01-15 12:47:03 +01:00
|
|
|
|
def test(child):
|
|
|
|
|
return isinstance(child, wrapper_type)
|
2011-11-14 14:29:40 +01:00
|
|
|
|
improper = []
|
|
|
|
|
for child in children:
|
|
|
|
|
if test(child):
|
|
|
|
|
if improper:
|
2011-12-02 11:31:06 +01:00
|
|
|
|
wrapper = wrapper_type.anonymous_from(box, children=[])
|
2011-11-15 15:05:12 +01:00
|
|
|
|
# Apply the rules again on the new wrapper
|
|
|
|
|
yield table_boxes_children(wrapper, improper)
|
2011-11-14 14:29:40 +01:00
|
|
|
|
improper = []
|
|
|
|
|
yield child
|
|
|
|
|
else:
|
2011-11-15 15:05:12 +01:00
|
|
|
|
# Whitespace either fail the test or were removed earlier,
|
|
|
|
|
# so there is no need to take special care with the definition
|
|
|
|
|
# of "consecutive".
|
2011-11-14 14:29:40 +01:00
|
|
|
|
improper.append(child)
|
|
|
|
|
if improper:
|
2011-12-02 11:31:06 +01:00
|
|
|
|
wrapper = wrapper_type.anonymous_from(box, children=[])
|
2011-11-15 15:05:12 +01:00
|
|
|
|
# Apply the rules again on the new wrapper
|
|
|
|
|
yield table_boxes_children(wrapper, improper)
|
2011-11-14 14:29:40 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def anonymous_table_boxes(box):
|
|
|
|
|
"""Remove and add boxes according to the table model.
|
|
|
|
|
|
2011-11-15 15:05:12 +01:00
|
|
|
|
Take and return a ``Box`` object.
|
|
|
|
|
|
2011-11-14 14:29:40 +01:00
|
|
|
|
See http://www.w3.org/TR/CSS21/tables.html#anonymous-boxes
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
if not isinstance(box, boxes.ParentBox):
|
|
|
|
|
return box
|
|
|
|
|
|
2011-11-14 17:42:15 +01:00
|
|
|
|
# Do recursion.
|
2012-02-17 18:49:58 +01:00
|
|
|
|
children = [anonymous_table_boxes(child) for child in box.children]
|
2011-11-15 15:05:12 +01:00
|
|
|
|
return table_boxes_children(box, children)
|
2011-11-14 17:42:15 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def table_boxes_children(box, children):
|
2011-11-15 18:22:00 +01:00
|
|
|
|
"""Internal implementation of anonymous_table_boxes()."""
|
2011-12-05 14:21:02 +01:00
|
|
|
|
if isinstance(box, boxes.TableColumnBox): # rule 1.1
|
|
|
|
|
# Remove all children.
|
|
|
|
|
children = []
|
|
|
|
|
elif isinstance(box, boxes.TableColumnGroupBox): # rule 1.2
|
2011-11-14 14:29:40 +01:00
|
|
|
|
# Remove children other than table-column.
|
|
|
|
|
children = [
|
2011-11-14 17:42:15 +01:00
|
|
|
|
child for child in children
|
2011-11-14 14:29:40 +01:00
|
|
|
|
if isinstance(child, boxes.TableColumnBox)
|
|
|
|
|
]
|
2011-12-14 18:33:55 +01:00
|
|
|
|
# Rule XXX (not in the spec): column groups have at least
|
|
|
|
|
# one column child.
|
|
|
|
|
if not children:
|
|
|
|
|
children = [boxes.TableColumnBox.anonymous_from(box, [])
|
2011-12-26 12:47:26 +01:00
|
|
|
|
for _i in xrange(box.span)]
|
2011-11-14 14:29:40 +01:00
|
|
|
|
|
2011-11-15 15:05:12 +01:00
|
|
|
|
# rule 1.3
|
2011-11-14 14:29:40 +01:00
|
|
|
|
if box.tabular_container and len(children) >= 2:
|
|
|
|
|
# TODO: Maybe only remove text if internal is also
|
|
|
|
|
# a proper table descendant of box.
|
2011-12-02 15:31:23 +01:00
|
|
|
|
# This is what the spec says, but maybe not what browsers do:
|
2011-11-14 14:29:40 +01:00
|
|
|
|
# http://lists.w3.org/Archives/Public/www-style/2011Oct/0567
|
|
|
|
|
|
|
|
|
|
# Last child
|
|
|
|
|
internal, text = children[-2:]
|
|
|
|
|
if (internal.internal_table_or_caption and is_whitespace(text)):
|
|
|
|
|
children.pop()
|
|
|
|
|
|
|
|
|
|
# First child
|
|
|
|
|
if len(children) >= 2:
|
|
|
|
|
text, internal = children[:2]
|
|
|
|
|
if (internal.internal_table_or_caption and is_whitespace(text)):
|
|
|
|
|
children.pop(0)
|
|
|
|
|
|
2011-11-15 15:05:12 +01:00
|
|
|
|
# Children other than first and last that would be removed by
|
|
|
|
|
# rule 1.3 are also removed by rule 1.4 below.
|
2011-11-14 14:29:40 +01:00
|
|
|
|
|
|
|
|
|
children = [
|
|
|
|
|
child
|
|
|
|
|
for prev_child, child, next_child in zip(
|
|
|
|
|
[None] + children[:-1],
|
|
|
|
|
children,
|
|
|
|
|
children[1:] + [None]
|
|
|
|
|
)
|
|
|
|
|
if not (
|
|
|
|
|
# Ignore some whitespace: rule 1.4
|
|
|
|
|
prev_child and prev_child.internal_table_or_caption and
|
|
|
|
|
next_child and next_child.internal_table_or_caption and
|
|
|
|
|
is_whitespace(child)
|
|
|
|
|
)
|
|
|
|
|
]
|
|
|
|
|
|
2012-04-12 12:51:21 +02:00
|
|
|
|
if isinstance(box, boxes.TableBox):
|
2011-11-14 14:29:40 +01:00
|
|
|
|
# Rule 2.1
|
2013-04-11 12:08:53 +02:00
|
|
|
|
children = wrap_improper(
|
|
|
|
|
box, children, boxes.TableRowBox,
|
2011-11-14 14:29:40 +01:00
|
|
|
|
lambda child: child.proper_table_child)
|
|
|
|
|
elif isinstance(box, boxes.TableRowGroupBox):
|
|
|
|
|
# Rule 2.2
|
2011-11-17 17:05:55 +01:00
|
|
|
|
children = wrap_improper(box, children, boxes.TableRowBox)
|
2011-11-14 14:29:40 +01:00
|
|
|
|
|
2011-11-14 17:42:15 +01:00
|
|
|
|
if isinstance(box, boxes.TableRowBox):
|
2011-11-14 14:29:40 +01:00
|
|
|
|
# Rule 2.3
|
2011-11-17 17:05:55 +01:00
|
|
|
|
children = wrap_improper(box, children, boxes.TableCellBox)
|
2011-11-14 14:29:40 +01:00
|
|
|
|
else:
|
|
|
|
|
# Rule 3.1
|
2013-04-11 12:08:53 +02:00
|
|
|
|
children = wrap_improper(
|
|
|
|
|
box, children, boxes.TableRowBox,
|
2011-11-14 14:29:40 +01:00
|
|
|
|
lambda child: not isinstance(child, boxes.TableCellBox))
|
|
|
|
|
|
|
|
|
|
# Rule 3.2
|
2011-11-14 17:42:15 +01:00
|
|
|
|
if isinstance(box, boxes.InlineBox):
|
2013-04-11 12:08:53 +02:00
|
|
|
|
children = wrap_improper(
|
|
|
|
|
box, children, boxes.InlineTableBox,
|
2011-11-14 14:29:40 +01:00
|
|
|
|
lambda child: not child.proper_table_child)
|
|
|
|
|
else:
|
|
|
|
|
parent_type = type(box)
|
2013-04-11 12:08:53 +02:00
|
|
|
|
children = wrap_improper(
|
|
|
|
|
box, children, boxes.TableBox,
|
|
|
|
|
lambda child: (not child.proper_table_child or
|
|
|
|
|
parent_type in child.proper_parents))
|
2011-11-15 18:22:00 +01:00
|
|
|
|
|
|
|
|
|
if isinstance(box, boxes.TableBox):
|
|
|
|
|
return wrap_table(box, children)
|
|
|
|
|
else:
|
2016-11-01 04:31:15 +01:00
|
|
|
|
box.children = list(children)
|
|
|
|
|
return box
|
2011-11-15 18:22:00 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def wrap_table(box, children):
|
|
|
|
|
"""Take a table box and return it in its table wrapper box.
|
|
|
|
|
|
2012-07-11 14:11:26 +02:00
|
|
|
|
Also re-order children and assign grid positions to each column and cell.
|
|
|
|
|
|
|
|
|
|
Because of colspan/rowspan works, grid_y is implicitly the index of a row,
|
|
|
|
|
but grid_x is an explicit attribute on cells, columns and column group.
|
2011-11-21 13:51:22 +01:00
|
|
|
|
|
2011-11-15 18:22:00 +01:00
|
|
|
|
http://www.w3.org/TR/CSS21/tables.html#model
|
2011-11-21 13:51:22 +01:00
|
|
|
|
http://www.w3.org/TR/CSS21/tables.html#table-layout
|
2011-11-15 18:22:00 +01:00
|
|
|
|
|
|
|
|
|
"""
|
2011-11-21 13:51:22 +01:00
|
|
|
|
# Group table children by type
|
2011-11-17 17:54:15 +01:00
|
|
|
|
columns = []
|
|
|
|
|
rows = []
|
|
|
|
|
all_captions = []
|
|
|
|
|
by_type = {
|
|
|
|
|
boxes.TableColumnBox: columns,
|
|
|
|
|
boxes.TableColumnGroupBox: columns,
|
|
|
|
|
boxes.TableRowBox: rows,
|
|
|
|
|
boxes.TableRowGroupBox: rows,
|
|
|
|
|
boxes.TableCaptionBox: all_captions,
|
|
|
|
|
}
|
2011-11-15 18:22:00 +01:00
|
|
|
|
for child in children:
|
2011-11-17 17:54:15 +01:00
|
|
|
|
by_type[type(child)].append(child)
|
2011-11-17 17:05:55 +01:00
|
|
|
|
|
2011-11-17 17:54:15 +01:00
|
|
|
|
# Split top and bottom captions
|
|
|
|
|
captions = {'top': [], 'bottom': []}
|
|
|
|
|
for caption in all_captions:
|
|
|
|
|
captions[caption.style.caption_side].append(caption)
|
2011-11-17 17:05:55 +01:00
|
|
|
|
|
2011-11-17 17:54:15 +01:00
|
|
|
|
# Assign X positions on the grid to column boxes
|
2011-11-17 17:05:55 +01:00
|
|
|
|
column_groups = list(wrap_improper(
|
|
|
|
|
box, columns, boxes.TableColumnGroupBox))
|
|
|
|
|
grid_x = 0
|
|
|
|
|
for group in column_groups:
|
|
|
|
|
group.grid_x = grid_x
|
|
|
|
|
if group.children:
|
|
|
|
|
for column in group.children:
|
2016-01-14 18:15:36 +01:00
|
|
|
|
# There's no need to take care of group's span, as "span=x"
|
|
|
|
|
# already generates x TableColumnBox children
|
2011-11-17 17:05:55 +01:00
|
|
|
|
column.grid_x = grid_x
|
|
|
|
|
grid_x += 1
|
|
|
|
|
group.span = len(group.children)
|
|
|
|
|
else:
|
|
|
|
|
grid_x += group.span
|
2012-07-11 14:11:26 +02:00
|
|
|
|
grid_width = grid_x
|
2011-11-15 18:22:00 +01:00
|
|
|
|
|
2013-04-11 12:08:53 +02:00
|
|
|
|
row_groups = wrap_improper(box, rows, boxes.TableRowGroupBox)
|
2013-04-30 10:56:43 +02:00
|
|
|
|
# Extract the optional header and footer groups.
|
|
|
|
|
body_row_groups = []
|
|
|
|
|
header = None
|
|
|
|
|
footer = None
|
|
|
|
|
for group in row_groups:
|
|
|
|
|
display = group.style.display
|
|
|
|
|
if display == 'table-header-group' and header is None:
|
|
|
|
|
group.is_header = True
|
|
|
|
|
header = group
|
|
|
|
|
elif display == 'table-footer-group' and footer is None:
|
|
|
|
|
group.is_footer = True
|
|
|
|
|
footer = group
|
|
|
|
|
else:
|
|
|
|
|
body_row_groups.append(group)
|
|
|
|
|
row_groups = (
|
|
|
|
|
([header] if header is not None else []) +
|
|
|
|
|
body_row_groups +
|
|
|
|
|
([footer] if footer is not None else []))
|
2011-11-17 16:13:00 +01:00
|
|
|
|
|
2011-11-21 13:51:22 +01:00
|
|
|
|
# Assign a (x,y) position in the grid to each cell.
|
|
|
|
|
# rowspan can not extend beyond a row group, so each row group
|
|
|
|
|
# is independent.
|
|
|
|
|
# http://www.w3.org/TR/CSS21/tables.html#table-layout
|
|
|
|
|
# Column 0 is on the left if direction is ltr, right if rtl.
|
|
|
|
|
# This algorithm does not change.
|
2012-07-11 14:11:26 +02:00
|
|
|
|
grid_height = 0
|
2011-11-21 13:51:22 +01:00
|
|
|
|
for group in row_groups:
|
|
|
|
|
# Indexes: row number in the group.
|
|
|
|
|
# Values: set of cells already occupied by row-spanning cells.
|
|
|
|
|
occupied_cells_by_row = [set() for row in group.children]
|
|
|
|
|
for row in group.children:
|
|
|
|
|
occupied_cells_in_this_row = occupied_cells_by_row.pop(0)
|
2011-11-22 13:06:50 +01:00
|
|
|
|
# The list is now about rows after this one.
|
2011-11-21 13:51:22 +01:00
|
|
|
|
grid_x = 0
|
|
|
|
|
for cell in row.children:
|
|
|
|
|
# Make sure that the first grid cell is free.
|
|
|
|
|
while grid_x in occupied_cells_in_this_row:
|
|
|
|
|
grid_x += 1
|
|
|
|
|
cell.grid_x = grid_x
|
|
|
|
|
new_grid_x = grid_x + cell.colspan
|
|
|
|
|
# http://www.w3.org/TR/html401/struct/tables.html#adef-rowspan
|
|
|
|
|
if cell.rowspan != 1:
|
2012-06-02 09:02:01 +02:00
|
|
|
|
max_rowspan = len(occupied_cells_by_row) + 1
|
2011-11-21 13:51:22 +01:00
|
|
|
|
if cell.rowspan == 0:
|
|
|
|
|
# All rows until the end of the group
|
|
|
|
|
spanned_rows = occupied_cells_by_row
|
2012-06-02 09:02:01 +02:00
|
|
|
|
cell.rowspan = max_rowspan
|
2011-11-21 13:51:22 +01:00
|
|
|
|
else:
|
2012-06-02 09:02:01 +02:00
|
|
|
|
cell.rowspan = min(cell.rowspan, max_rowspan)
|
2011-11-21 13:51:22 +01:00
|
|
|
|
spanned_rows = occupied_cells_by_row[:cell.rowspan - 1]
|
|
|
|
|
spanned_columns = range(grid_x, new_grid_x)
|
|
|
|
|
for occupied_cells in spanned_rows:
|
|
|
|
|
occupied_cells.update(spanned_columns)
|
|
|
|
|
grid_x = new_grid_x
|
2012-07-11 14:11:26 +02:00
|
|
|
|
grid_width = max(grid_width, grid_x)
|
|
|
|
|
grid_height += len(group.children)
|
2011-11-21 13:51:22 +01:00
|
|
|
|
|
2016-11-01 04:31:15 +01:00
|
|
|
|
table = box.copy_with_children(row_groups)
|
2011-11-17 17:05:55 +01:00
|
|
|
|
table.column_groups = tuple(column_groups)
|
2012-07-11 14:11:26 +02:00
|
|
|
|
if table.style.border_collapse == 'collapse':
|
|
|
|
|
table.collapsed_border_grid = collapse_table_borders(
|
|
|
|
|
table, grid_width, grid_height)
|
2011-11-22 13:06:50 +01:00
|
|
|
|
|
2012-04-12 12:51:21 +02:00
|
|
|
|
if isinstance(box, boxes.InlineTableBox):
|
2011-11-15 18:58:43 +01:00
|
|
|
|
wrapper_type = boxes.InlineBlockBox
|
2011-11-15 18:22:00 +01:00
|
|
|
|
else:
|
2011-11-15 18:58:43 +01:00
|
|
|
|
wrapper_type = boxes.BlockBox
|
|
|
|
|
|
2011-12-02 11:31:06 +01:00
|
|
|
|
wrapper = wrapper_type.anonymous_from(
|
|
|
|
|
box, captions['top'] + [table] + captions['bottom'])
|
2017-08-14 14:10:24 +02:00
|
|
|
|
wrapper.style = wrapper.style.copy()
|
2011-11-22 13:06:50 +01:00
|
|
|
|
wrapper.is_table_wrapper = True
|
2011-12-05 15:25:42 +01:00
|
|
|
|
if not table.style.anonymous:
|
2011-11-15 18:58:43 +01:00
|
|
|
|
# Non-inherited properties of the table element apply to one
|
|
|
|
|
# of the wrapper and the table. The other get the initial value.
|
2017-07-28 13:35:06 +02:00
|
|
|
|
# TODO: put this in a method of the table object
|
2017-08-02 16:32:37 +02:00
|
|
|
|
for name in properties.TABLE_WRAPPER_BOX_PROPERTIES:
|
|
|
|
|
wrapper.style[name] = table.style[name]
|
|
|
|
|
table.style[name] = properties.INITIAL_VALUES[name]
|
2011-11-15 18:58:43 +01:00
|
|
|
|
# else: non-inherited properties already have their initial values
|
|
|
|
|
|
|
|
|
|
return wrapper
|
2011-11-14 14:29:40 +01:00
|
|
|
|
|
|
|
|
|
|
2017-03-26 11:42:50 +02:00
|
|
|
|
TRANSPARENT = tinycss2.color3.parse_color('transparent')
|
|
|
|
|
|
|
|
|
|
|
2012-07-11 14:11:26 +02:00
|
|
|
|
def collapse_table_borders(table, grid_width, grid_height):
|
|
|
|
|
"""Resolve border conflicts for a table in the collapsing border model.
|
|
|
|
|
|
|
|
|
|
Take a :class:`TableBox`; set appropriate border widths on the table,
|
|
|
|
|
column group, column, row group, row, and cell boxes; and return
|
|
|
|
|
a data structure for the resolved collapsed border grid.
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
if not (grid_width and grid_height):
|
|
|
|
|
# Don’t bother with empty tables
|
|
|
|
|
return [], []
|
|
|
|
|
|
2012-07-11 18:21:20 +02:00
|
|
|
|
style_scores = dict((v, i) for i, v in enumerate(reversed([
|
2012-07-11 14:11:26 +02:00
|
|
|
|
'hidden', 'double', 'solid', 'dashed', 'dotted', 'ridge',
|
|
|
|
|
'outset', 'groove', 'inset', 'none'])))
|
2012-07-11 18:21:20 +02:00
|
|
|
|
style_map = {'inset': 'ridge', 'outset': 'groove'}
|
2017-03-26 11:42:50 +02:00
|
|
|
|
transparent = TRANSPARENT
|
2013-01-23 16:41:46 +01:00
|
|
|
|
weak_null_border = (
|
2012-07-11 14:11:26 +02:00
|
|
|
|
(0, 0, style_scores['none']), ('none', 0, transparent))
|
|
|
|
|
vertical_borders = [[weak_null_border for x in xrange(grid_width + 1)]
|
|
|
|
|
for y in xrange(grid_height)]
|
|
|
|
|
horizontal_borders = [[weak_null_border for x in xrange(grid_width)]
|
|
|
|
|
for y in xrange(grid_height + 1)]
|
|
|
|
|
|
|
|
|
|
def set_one_border(border_grid, box_style, side, grid_x, grid_y):
|
|
|
|
|
style = box_style['border_%s_style' % side]
|
|
|
|
|
width = box_style['border_%s_width' % side]
|
2013-01-23 16:52:47 +01:00
|
|
|
|
color = box_style.get_color('border_%s_color' % side)
|
2012-07-11 14:11:26 +02:00
|
|
|
|
|
|
|
|
|
# http://www.w3.org/TR/CSS21/tables.html#border-conflict-resolution
|
|
|
|
|
score = ((1 if style == 'hidden' else 0), width, style_scores[style])
|
|
|
|
|
|
2015-05-31 10:09:34 +02:00
|
|
|
|
style = style_map.get(style, style)
|
2012-07-11 14:11:26 +02:00
|
|
|
|
previous_score, _ = border_grid[grid_y][grid_x]
|
|
|
|
|
# Strict < so that the earlier call wins in case of a tie.
|
|
|
|
|
if previous_score < score:
|
|
|
|
|
border_grid[grid_y][grid_x] = (score, (style, width, color))
|
|
|
|
|
|
|
|
|
|
def set_borders(box, x, y, w, h):
|
|
|
|
|
style = box.style
|
|
|
|
|
for yy in xrange(y, y + h):
|
|
|
|
|
set_one_border(vertical_borders, style, 'left', x, yy)
|
|
|
|
|
set_one_border(vertical_borders, style, 'right', x + w, yy)
|
|
|
|
|
for xx in xrange(x, x + w):
|
|
|
|
|
set_one_border(horizontal_borders, style, 'top', xx, y)
|
|
|
|
|
set_one_border(horizontal_borders, style, 'bottom', xx, y + h)
|
|
|
|
|
|
|
|
|
|
# The order is important here:
|
|
|
|
|
# "A style set on a cell wins over one on a row, which wins over a
|
|
|
|
|
# row group, column, column group and, lastly, table"
|
|
|
|
|
# See http://www.w3.org/TR/CSS21/tables.html#border-conflict-resolution
|
2013-01-23 16:41:46 +01:00
|
|
|
|
strong_null_border = (
|
2012-07-11 14:11:26 +02:00
|
|
|
|
(1, 0, style_scores['hidden']), ('hidden', 0, transparent))
|
|
|
|
|
grid_y = 0
|
|
|
|
|
for row_group in table.children:
|
|
|
|
|
for row in row_group.children:
|
|
|
|
|
for cell in row.children:
|
|
|
|
|
# No border inside of a cell with rowspan or colspan
|
|
|
|
|
for xx in xrange(cell.grid_x + 1, cell.grid_x + cell.colspan):
|
|
|
|
|
for yy in xrange(grid_y, grid_y + cell.rowspan):
|
|
|
|
|
vertical_borders[yy][xx] = strong_null_border
|
|
|
|
|
for xx in xrange(cell.grid_x, cell.grid_x + cell.colspan):
|
|
|
|
|
for yy in xrange(grid_y + 1, grid_y + cell.rowspan):
|
|
|
|
|
horizontal_borders[yy][xx] = strong_null_border
|
|
|
|
|
# The cell’s own borders
|
|
|
|
|
set_borders(cell, x=cell.grid_x, y=grid_y,
|
|
|
|
|
w=cell.colspan, h=cell.rowspan)
|
|
|
|
|
grid_y += 1
|
|
|
|
|
|
|
|
|
|
grid_y = 0
|
|
|
|
|
for row_group in table.children:
|
|
|
|
|
for row in row_group.children:
|
|
|
|
|
set_borders(row, x=0, y=grid_y, w=grid_width, h=1)
|
|
|
|
|
grid_y += 1
|
|
|
|
|
|
|
|
|
|
grid_y = 0
|
|
|
|
|
for row_group in table.children:
|
|
|
|
|
rowspan = len(row_group.children)
|
|
|
|
|
set_borders(row_group, x=0, y=grid_y, w=grid_width, h=rowspan)
|
|
|
|
|
grid_y += rowspan
|
|
|
|
|
|
|
|
|
|
for column_group in table.column_groups:
|
|
|
|
|
for column in column_group.children:
|
|
|
|
|
set_borders(column, x=column.grid_x, y=0, w=1, h=grid_height)
|
|
|
|
|
|
|
|
|
|
for column_group in table.column_groups:
|
|
|
|
|
set_borders(column_group, x=column_group.grid_x, y=0,
|
|
|
|
|
w=column_group.span, h=grid_height)
|
|
|
|
|
|
|
|
|
|
set_borders(table, x=0, y=0, w=grid_width, h=grid_height)
|
|
|
|
|
|
|
|
|
|
# Now that all conflicts are resolved, set transparent borders of
|
|
|
|
|
# the correct widths on each box. The actual border grid will be
|
|
|
|
|
# painted separately.
|
|
|
|
|
def set_transparent_border(box, side, twice_width):
|
2017-08-02 16:32:37 +02:00
|
|
|
|
box.style['border_%s_style' % side] = 'solid',
|
|
|
|
|
box.style['border_%s_width' % side] = twice_width / 2
|
|
|
|
|
box.style['border_%s_color' % side] = transparent
|
2012-07-11 14:11:26 +02:00
|
|
|
|
|
|
|
|
|
def remove_borders(box):
|
|
|
|
|
set_transparent_border(box, 'top', 0)
|
|
|
|
|
set_transparent_border(box, 'right', 0)
|
|
|
|
|
set_transparent_border(box, 'bottom', 0)
|
|
|
|
|
set_transparent_border(box, 'left', 0)
|
|
|
|
|
|
|
|
|
|
def max_vertical_width(x, y, h):
|
2013-04-11 12:08:53 +02:00
|
|
|
|
return max(
|
2017-03-25 00:24:27 +01:00
|
|
|
|
width for grid_row in vertical_borders[y:y + h]
|
2013-04-11 12:08:53 +02:00
|
|
|
|
for _, (_, width, _) in [grid_row[x]])
|
2012-07-11 14:11:26 +02:00
|
|
|
|
|
|
|
|
|
def max_horizontal_width(x, y, w):
|
2017-03-25 00:24:27 +01:00
|
|
|
|
return max(
|
|
|
|
|
width for _, (_, width, _) in horizontal_borders[y][x:x + w])
|
2012-07-11 14:11:26 +02:00
|
|
|
|
|
|
|
|
|
grid_y = 0
|
|
|
|
|
for row_group in table.children:
|
|
|
|
|
remove_borders(row_group)
|
|
|
|
|
for row in row_group.children:
|
|
|
|
|
remove_borders(row)
|
|
|
|
|
for cell in row.children:
|
|
|
|
|
set_transparent_border(cell, 'top', max_horizontal_width(
|
|
|
|
|
x=cell.grid_x, y=grid_y, w=cell.colspan))
|
|
|
|
|
set_transparent_border(cell, 'bottom', max_horizontal_width(
|
|
|
|
|
x=cell.grid_x, y=grid_y + cell.rowspan, w=cell.colspan))
|
|
|
|
|
set_transparent_border(cell, 'left', max_vertical_width(
|
|
|
|
|
x=cell.grid_x, y=grid_y, h=cell.rowspan))
|
|
|
|
|
set_transparent_border(cell, 'right', max_vertical_width(
|
|
|
|
|
x=cell.grid_x + cell.colspan, y=grid_y, h=cell.rowspan))
|
|
|
|
|
grid_y += 1
|
|
|
|
|
|
|
|
|
|
for column_group in table.column_groups:
|
|
|
|
|
remove_borders(column_group)
|
|
|
|
|
for column in column_group.children:
|
|
|
|
|
remove_borders(column)
|
|
|
|
|
|
|
|
|
|
set_transparent_border(table, 'top', max_horizontal_width(
|
|
|
|
|
x=0, y=0, w=grid_width))
|
|
|
|
|
set_transparent_border(table, 'bottom', max_horizontal_width(
|
|
|
|
|
x=0, y=grid_height, w=grid_width))
|
|
|
|
|
# "UAs must compute an initial left and right border width for the table
|
|
|
|
|
# by examining the first and last cells in the first row of the table."
|
|
|
|
|
# http://www.w3.org/TR/CSS21/tables.html#collapsing-borders
|
|
|
|
|
# ... so h=1, not grid_height:
|
|
|
|
|
set_transparent_border(table, 'left', max_vertical_width(
|
|
|
|
|
x=0, y=0, h=1))
|
|
|
|
|
set_transparent_border(table, 'right', max_vertical_width(
|
|
|
|
|
x=grid_width, y=0, h=1))
|
|
|
|
|
|
|
|
|
|
return vertical_borders, horizontal_borders
|
|
|
|
|
|
|
|
|
|
|
2012-04-03 18:58:55 +02:00
|
|
|
|
def process_whitespace(box, following_collapsible_space=False):
|
2011-08-24 10:05:44 +02:00
|
|
|
|
"""First part of "The 'white-space' processing model".
|
|
|
|
|
|
|
|
|
|
See http://www.w3.org/TR/CSS21/text.html#white-space-model
|
2012-06-28 13:05:22 +02:00
|
|
|
|
http://dev.w3.org/csswg/css3-text/#white-space-rules
|
2011-08-24 10:05:44 +02:00
|
|
|
|
|
2011-06-29 21:59:29 +02:00
|
|
|
|
"""
|
2012-04-03 18:58:55 +02:00
|
|
|
|
if isinstance(box, boxes.TextBox):
|
|
|
|
|
text = box.text
|
2011-11-16 17:34:02 +01:00
|
|
|
|
if not text:
|
2012-04-03 18:58:55 +02:00
|
|
|
|
return following_collapsible_space
|
2011-10-19 18:17:53 +02:00
|
|
|
|
|
|
|
|
|
# Normalize line feeds
|
2012-02-17 18:49:58 +01:00
|
|
|
|
text = re.sub('\r\n?', '\n', text)
|
2011-10-19 18:17:53 +02:00
|
|
|
|
|
2016-05-08 16:44:57 +02:00
|
|
|
|
new_line_collapse = box.style.white_space in ('normal', 'nowrap')
|
|
|
|
|
space_collapse = box.style.white_space in (
|
|
|
|
|
'normal', 'nowrap', 'pre-line')
|
2011-06-29 21:59:29 +02:00
|
|
|
|
|
2016-05-08 16:44:57 +02:00
|
|
|
|
if space_collapse:
|
2011-10-19 18:17:53 +02:00
|
|
|
|
# \r characters were removed/converted earlier
|
|
|
|
|
text = re.sub('[\t ]*\n[\t ]*', '\n', text)
|
2016-05-08 16:44:57 +02:00
|
|
|
|
|
|
|
|
|
if new_line_collapse:
|
2011-06-29 21:59:29 +02:00
|
|
|
|
# TODO: this should be language-specific
|
|
|
|
|
# Could also replace with a zero width space character (U+200B),
|
|
|
|
|
# or no character
|
|
|
|
|
# CSS3: http://www.w3.org/TR/css3-text/#line-break-transform
|
|
|
|
|
text = text.replace('\n', ' ')
|
|
|
|
|
|
2016-05-08 16:44:57 +02:00
|
|
|
|
if space_collapse:
|
2011-06-29 21:59:29 +02:00
|
|
|
|
text = text.replace('\t', ' ')
|
|
|
|
|
text = re.sub(' +', ' ', text)
|
2012-06-28 13:05:22 +02:00
|
|
|
|
previous_text = text
|
2011-06-29 21:59:29 +02:00
|
|
|
|
if following_collapsible_space and text.startswith(' '):
|
|
|
|
|
text = text[1:]
|
2017-12-26 12:39:20 +01:00
|
|
|
|
box.leading_collapsible_space = True
|
2012-06-28 13:05:22 +02:00
|
|
|
|
following_collapsible_space = previous_text.endswith(' ')
|
2011-06-29 21:59:29 +02:00
|
|
|
|
else:
|
|
|
|
|
following_collapsible_space = False
|
|
|
|
|
|
2012-04-03 18:58:55 +02:00
|
|
|
|
box.text = text
|
|
|
|
|
return following_collapsible_space
|
|
|
|
|
|
|
|
|
|
if isinstance(box, boxes.ParentBox):
|
|
|
|
|
for child in box.children:
|
2012-06-28 13:05:22 +02:00
|
|
|
|
if isinstance(child, (boxes.TextBox, boxes.InlineBox)):
|
|
|
|
|
following_collapsible_space = process_whitespace(
|
|
|
|
|
child, following_collapsible_space)
|
|
|
|
|
else:
|
|
|
|
|
process_whitespace(child)
|
|
|
|
|
if child.is_in_normal_flow():
|
|
|
|
|
following_collapsible_space = False
|
2012-04-03 18:58:55 +02:00
|
|
|
|
|
|
|
|
|
return following_collapsible_space
|
2011-06-29 21:59:29 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def inline_in_block(box):
|
2011-10-04 15:03:58 +02:00
|
|
|
|
"""Build the structure of lines inside blocks and return a new box tree.
|
2011-08-24 10:05:44 +02:00
|
|
|
|
|
2011-06-29 21:59:29 +02:00
|
|
|
|
Consecutive inline-level boxes in a block container box are wrapped into a
|
|
|
|
|
line box, itself wrapped into an anonymous block box.
|
2011-08-24 10:05:44 +02:00
|
|
|
|
|
|
|
|
|
This line box will be broken into multiple lines later.
|
2011-06-29 21:59:29 +02:00
|
|
|
|
|
|
|
|
|
This is the first case in
|
|
|
|
|
http://www.w3.org/TR/CSS21/visuren.html#anonymous-block-level
|
|
|
|
|
|
2011-08-24 10:05:44 +02:00
|
|
|
|
Eg.::
|
2011-06-29 21:59:29 +02:00
|
|
|
|
|
|
|
|
|
BlockBox[
|
2011-07-20 13:40:49 +02:00
|
|
|
|
TextBox['Some '],
|
|
|
|
|
InlineBox[TextBox['text']],
|
2011-06-29 21:59:29 +02:00
|
|
|
|
BlockBox[
|
2011-07-20 13:40:49 +02:00
|
|
|
|
TextBox['More text'],
|
2011-06-29 21:59:29 +02:00
|
|
|
|
]
|
|
|
|
|
]
|
|
|
|
|
|
2011-08-24 10:05:44 +02:00
|
|
|
|
is turned into::
|
2011-06-29 21:59:29 +02:00
|
|
|
|
|
|
|
|
|
BlockBox[
|
|
|
|
|
AnonymousBlockBox[
|
|
|
|
|
LineBox[
|
2011-07-20 13:40:49 +02:00
|
|
|
|
TextBox['Some '],
|
|
|
|
|
InlineBox[TextBox['text']],
|
2011-06-29 21:59:29 +02:00
|
|
|
|
]
|
|
|
|
|
]
|
|
|
|
|
BlockBox[
|
|
|
|
|
LineBox[
|
2011-07-20 13:40:49 +02:00
|
|
|
|
TextBox['More text'],
|
2011-06-29 21:59:29 +02:00
|
|
|
|
]
|
|
|
|
|
]
|
|
|
|
|
]
|
2011-08-24 10:05:44 +02:00
|
|
|
|
|
2011-06-29 21:59:29 +02:00
|
|
|
|
"""
|
2011-11-22 13:06:50 +01:00
|
|
|
|
if not isinstance(box, boxes.ParentBox):
|
2011-10-03 18:57:26 +02:00
|
|
|
|
return box
|
2011-12-01 15:29:22 +01:00
|
|
|
|
|
2017-12-26 12:39:20 +01:00
|
|
|
|
box_children = list(box.children)
|
|
|
|
|
|
|
|
|
|
if box_children and box.leading_collapsible_space is False:
|
|
|
|
|
box.leading_collapsible_space = (
|
|
|
|
|
box_children[0].leading_collapsible_space)
|
|
|
|
|
|
|
|
|
|
children = []
|
|
|
|
|
trailing_collapsible_space = False
|
|
|
|
|
for child in box_children:
|
|
|
|
|
# Keep track of removed collapsing spaces for wrap opportunities, and
|
|
|
|
|
# remove empty text boxes.
|
|
|
|
|
# (They may have been emptied by process_whitespace().)
|
|
|
|
|
|
|
|
|
|
if trailing_collapsible_space:
|
|
|
|
|
child.leading_collapsible_space = True
|
|
|
|
|
|
|
|
|
|
if isinstance(child, boxes.TextBox) and not child.text:
|
|
|
|
|
trailing_collapsible_space = child.leading_collapsible_space
|
|
|
|
|
else:
|
|
|
|
|
trailing_collapsible_space = False
|
|
|
|
|
children.append(inline_in_block(child))
|
|
|
|
|
|
|
|
|
|
if box.trailing_collapsible_space is False:
|
|
|
|
|
box.trailing_collapsible_space = trailing_collapsible_space
|
2011-12-01 15:29:22 +01:00
|
|
|
|
|
2011-11-22 13:06:50 +01:00
|
|
|
|
if not isinstance(box, boxes.BlockContainerBox):
|
2016-11-01 04:31:15 +01:00
|
|
|
|
box.children = children
|
|
|
|
|
return box
|
2011-06-29 21:59:29 +02:00
|
|
|
|
|
2011-10-03 18:57:26 +02:00
|
|
|
|
new_line_children = []
|
|
|
|
|
new_children = []
|
2011-11-14 11:13:56 +01:00
|
|
|
|
for child_box in children:
|
|
|
|
|
assert not isinstance(child_box, boxes.LineBox)
|
2012-06-01 17:08:54 +02:00
|
|
|
|
if new_line_children and child_box.is_absolutely_positioned():
|
2012-05-09 14:14:42 +02:00
|
|
|
|
new_line_children.append(child_box)
|
2012-06-15 17:59:15 +02:00
|
|
|
|
elif isinstance(child_box, boxes.InlineLevelBox) or (
|
|
|
|
|
new_line_children and child_box.is_floated()):
|
2011-12-01 15:29:22 +01:00
|
|
|
|
# Do not append white space at the start of a line:
|
|
|
|
|
# It would be removed during layout.
|
|
|
|
|
if new_line_children or not (
|
|
|
|
|
isinstance(child_box, boxes.TextBox) and
|
|
|
|
|
# Sequence of white-space was collapsed to a single
|
|
|
|
|
# space by process_whitespace().
|
|
|
|
|
child_box.text == ' ' and
|
|
|
|
|
child_box.style.white_space in (
|
|
|
|
|
'normal', 'nowrap', 'pre-line')):
|
|
|
|
|
new_line_children.append(child_box)
|
2011-11-14 11:13:56 +01:00
|
|
|
|
else:
|
2011-10-03 18:57:26 +02:00
|
|
|
|
if new_line_children:
|
2011-06-29 21:59:29 +02:00
|
|
|
|
# Inlines are consecutive no more: add this line box
|
|
|
|
|
# and create a new one.
|
2011-12-02 11:31:06 +01:00
|
|
|
|
line_box = boxes.LineBox.anonymous_from(box, new_line_children)
|
|
|
|
|
anonymous = boxes.BlockBox.anonymous_from(box, [line_box])
|
2011-10-03 18:57:26 +02:00
|
|
|
|
new_children.append(anonymous)
|
|
|
|
|
new_line_children = []
|
|
|
|
|
new_children.append(child_box)
|
|
|
|
|
if new_line_children:
|
2011-06-29 21:59:29 +02:00
|
|
|
|
# There were inlines at the end
|
2011-12-02 11:31:06 +01:00
|
|
|
|
line_box = boxes.LineBox.anonymous_from(box, new_line_children)
|
2011-10-03 18:57:26 +02:00
|
|
|
|
if new_children:
|
2011-12-02 11:31:06 +01:00
|
|
|
|
anonymous = boxes.BlockBox.anonymous_from(box, [line_box])
|
2011-10-03 18:57:26 +02:00
|
|
|
|
new_children.append(anonymous)
|
2011-06-29 21:59:29 +02:00
|
|
|
|
else:
|
|
|
|
|
# Only inline-level children: one line box
|
2011-10-03 18:57:26 +02:00
|
|
|
|
new_children.append(line_box)
|
|
|
|
|
|
2016-11-01 04:31:15 +01:00
|
|
|
|
box.children = new_children
|
|
|
|
|
return box
|
2011-06-29 21:59:29 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def block_in_inline(box):
|
2011-08-24 10:05:44 +02:00
|
|
|
|
"""Build the structure of blocks inside lines.
|
|
|
|
|
|
2011-06-29 21:59:29 +02:00
|
|
|
|
Inline boxes containing block-level boxes will be broken in two
|
|
|
|
|
boxes on each side on consecutive block-level boxes, each side wrapped
|
|
|
|
|
in an anonymous block-level box.
|
|
|
|
|
|
|
|
|
|
This is the second case in
|
|
|
|
|
http://www.w3.org/TR/CSS21/visuren.html#anonymous-block-level
|
|
|
|
|
|
2011-09-29 17:18:10 +02:00
|
|
|
|
Eg. if this is given::
|
2011-06-29 21:59:29 +02:00
|
|
|
|
|
|
|
|
|
BlockBox[
|
|
|
|
|
LineBox[
|
|
|
|
|
InlineBox[
|
2011-07-20 13:40:49 +02:00
|
|
|
|
TextBox['Hello.'],
|
2011-06-29 21:59:29 +02:00
|
|
|
|
],
|
|
|
|
|
InlineBox[
|
2011-07-20 13:40:49 +02:00
|
|
|
|
TextBox['Some '],
|
2011-06-29 21:59:29 +02:00
|
|
|
|
InlineBox[
|
2011-07-20 13:40:49 +02:00
|
|
|
|
TextBox['text']
|
|
|
|
|
BlockBox[LineBox[TextBox['More text']]],
|
|
|
|
|
BlockBox[LineBox[TextBox['More text again']]],
|
2011-06-29 21:59:29 +02:00
|
|
|
|
],
|
2011-07-20 13:40:49 +02:00
|
|
|
|
BlockBox[LineBox[TextBox['And again.']]],
|
2011-06-29 21:59:29 +02:00
|
|
|
|
]
|
|
|
|
|
]
|
|
|
|
|
]
|
|
|
|
|
|
2011-09-29 17:18:10 +02:00
|
|
|
|
this is returned::
|
2011-06-29 21:59:29 +02:00
|
|
|
|
|
|
|
|
|
BlockBox[
|
|
|
|
|
AnonymousBlockBox[
|
|
|
|
|
LineBox[
|
|
|
|
|
InlineBox[
|
2011-07-20 13:40:49 +02:00
|
|
|
|
TextBox['Hello.'],
|
2011-06-29 21:59:29 +02:00
|
|
|
|
],
|
|
|
|
|
InlineBox[
|
2011-07-20 13:40:49 +02:00
|
|
|
|
TextBox['Some '],
|
|
|
|
|
InlineBox[TextBox['text']],
|
2011-06-29 21:59:29 +02:00
|
|
|
|
]
|
|
|
|
|
]
|
|
|
|
|
],
|
2011-07-20 13:40:49 +02:00
|
|
|
|
BlockBox[LineBox[TextBox['More text']]],
|
|
|
|
|
BlockBox[LineBox[TextBox['More text again']]],
|
2011-06-29 21:59:29 +02:00
|
|
|
|
AnonymousBlockBox[
|
|
|
|
|
LineBox[
|
|
|
|
|
InlineBox[
|
|
|
|
|
]
|
|
|
|
|
]
|
|
|
|
|
],
|
2011-07-20 13:40:49 +02:00
|
|
|
|
BlockBox[LineBox[TextBox['And again.']]],
|
2011-06-29 21:59:29 +02:00
|
|
|
|
AnonymousBlockBox[
|
|
|
|
|
LineBox[
|
|
|
|
|
InlineBox[
|
|
|
|
|
]
|
|
|
|
|
]
|
|
|
|
|
],
|
|
|
|
|
]
|
2011-08-24 10:05:44 +02:00
|
|
|
|
|
2011-06-29 21:59:29 +02:00
|
|
|
|
"""
|
2011-07-20 11:35:43 +02:00
|
|
|
|
if not isinstance(box, boxes.ParentBox):
|
|
|
|
|
return box
|
2011-06-29 21:59:29 +02:00
|
|
|
|
|
2011-09-29 19:03:50 +02:00
|
|
|
|
new_children = []
|
2011-09-29 18:13:06 +02:00
|
|
|
|
changed = False
|
|
|
|
|
|
2011-07-20 11:35:43 +02:00
|
|
|
|
for child in box.children:
|
|
|
|
|
if isinstance(child, boxes.LineBox):
|
2013-04-11 12:08:53 +02:00
|
|
|
|
assert len(box.children) == 1, (
|
|
|
|
|
'Line boxes should have no '
|
2011-07-20 11:35:43 +02:00
|
|
|
|
'siblings at this stage, got %r.' % box.children)
|
2011-09-29 17:18:10 +02:00
|
|
|
|
stack = None
|
2011-07-20 11:35:43 +02:00
|
|
|
|
while 1:
|
2012-06-15 17:59:15 +02:00
|
|
|
|
new_line, block, stack = _inner_block_in_inline(
|
2012-06-21 16:03:29 +02:00
|
|
|
|
child, skip_stack=stack)
|
2011-09-29 17:18:10 +02:00
|
|
|
|
if block is None:
|
2011-07-20 11:35:43 +02:00
|
|
|
|
break
|
2011-12-02 11:31:06 +01:00
|
|
|
|
anon = boxes.BlockBox.anonymous_from(box, [new_line])
|
2011-09-29 19:03:50 +02:00
|
|
|
|
new_children.append(anon)
|
|
|
|
|
new_children.append(block_in_inline(block))
|
2011-09-29 17:18:10 +02:00
|
|
|
|
# Loop with the same child and the new stack.
|
2011-09-29 19:03:50 +02:00
|
|
|
|
if new_children:
|
2011-07-20 11:35:43 +02:00
|
|
|
|
# Some children were already added, this became a block
|
|
|
|
|
# context.
|
2011-12-02 11:31:06 +01:00
|
|
|
|
new_child = boxes.BlockBox.anonymous_from(box, [new_line])
|
2011-07-20 11:35:43 +02:00
|
|
|
|
else:
|
|
|
|
|
# Keep the single line box as-is, without anonymous blocks.
|
2011-09-29 18:13:06 +02:00
|
|
|
|
new_child = new_line
|
2011-07-20 11:35:43 +02:00
|
|
|
|
else:
|
|
|
|
|
# Not in an inline formatting context.
|
2011-09-29 18:13:06 +02:00
|
|
|
|
new_child = block_in_inline(child)
|
|
|
|
|
|
|
|
|
|
if new_child is not child:
|
|
|
|
|
changed = True
|
2011-09-29 19:03:50 +02:00
|
|
|
|
new_children.append(new_child)
|
2011-09-29 18:13:06 +02:00
|
|
|
|
|
|
|
|
|
if changed:
|
2016-11-01 04:31:15 +01:00
|
|
|
|
box.children = new_children
|
|
|
|
|
return box
|
2011-06-29 21:59:29 +02:00
|
|
|
|
|
|
|
|
|
|
2012-06-21 16:03:29 +02:00
|
|
|
|
def _inner_block_in_inline(box, skip_stack=None):
|
2011-09-29 17:18:10 +02:00
|
|
|
|
"""Find a block-level box in an inline formatting context.
|
2011-08-24 10:05:44 +02:00
|
|
|
|
|
2011-09-29 17:18:10 +02:00
|
|
|
|
If one is found, return ``(new_box, block_level_box, resume_at)``.
|
|
|
|
|
``new_box`` contains all of ``box`` content before the block-level box.
|
|
|
|
|
``resume_at`` can be passed as ``skip_stack`` in a new call to
|
2012-01-27 16:53:16 +01:00
|
|
|
|
this function to resume the search just after the block-level box.
|
2011-08-24 10:05:44 +02:00
|
|
|
|
|
2011-09-29 17:18:10 +02:00
|
|
|
|
If no block-level box is found after the position marked by
|
|
|
|
|
``skip_stack``, return ``(new_box, None, None)``
|
2011-07-20 11:35:43 +02:00
|
|
|
|
|
2011-09-29 17:18:10 +02:00
|
|
|
|
"""
|
2011-10-03 18:57:26 +02:00
|
|
|
|
new_children = []
|
2011-07-20 11:35:43 +02:00
|
|
|
|
block_level_box = None
|
2011-09-29 17:18:10 +02:00
|
|
|
|
resume_at = None
|
2011-09-29 18:13:06 +02:00
|
|
|
|
changed = False
|
2011-09-29 17:18:10 +02:00
|
|
|
|
|
2012-06-06 13:27:50 +02:00
|
|
|
|
is_start = skip_stack is None
|
|
|
|
|
if is_start:
|
2011-09-29 17:18:10 +02:00
|
|
|
|
skip = 0
|
|
|
|
|
else:
|
|
|
|
|
skip, skip_stack = skip_stack
|
|
|
|
|
|
|
|
|
|
for index, child in box.enumerate_skip(skip):
|
2012-05-09 14:14:42 +02:00
|
|
|
|
if isinstance(child, boxes.BlockLevelBox) and \
|
|
|
|
|
child.is_in_normal_flow():
|
2011-10-05 13:01:44 +02:00
|
|
|
|
assert skip_stack is None # Should not skip here
|
2011-09-29 17:18:10 +02:00
|
|
|
|
block_level_box = child
|
|
|
|
|
index += 1 # Resume *after* the block
|
2011-07-20 11:35:43 +02:00
|
|
|
|
else:
|
2011-09-29 17:18:10 +02:00
|
|
|
|
if isinstance(child, boxes.InlineBox):
|
2012-06-21 16:03:29 +02:00
|
|
|
|
recursion = _inner_block_in_inline(child, skip_stack)
|
2011-09-30 19:04:05 +02:00
|
|
|
|
skip_stack = None
|
2011-09-29 17:18:10 +02:00
|
|
|
|
new_child, block_level_box, resume_at = recursion
|
|
|
|
|
else:
|
2011-10-05 13:01:44 +02:00
|
|
|
|
assert skip_stack is None # Should not skip here
|
2012-06-15 17:59:15 +02:00
|
|
|
|
new_child = block_in_inline(child)
|
2011-09-29 17:18:10 +02:00
|
|
|
|
# block_level_box is still None.
|
2011-09-29 18:13:06 +02:00
|
|
|
|
if new_child is not child:
|
|
|
|
|
changed = True
|
2011-10-03 18:57:26 +02:00
|
|
|
|
new_children.append(new_child)
|
2011-07-20 11:35:43 +02:00
|
|
|
|
if block_level_box is not None:
|
2011-09-29 17:18:10 +02:00
|
|
|
|
resume_at = (index, resume_at)
|
2012-06-06 13:27:50 +02:00
|
|
|
|
box = box.copy_with_children(
|
|
|
|
|
new_children, is_start=is_start, is_end=False)
|
2011-07-20 11:35:43 +02:00
|
|
|
|
break
|
2011-09-29 18:13:06 +02:00
|
|
|
|
else:
|
2011-10-03 18:57:26 +02:00
|
|
|
|
if changed or skip:
|
2012-06-06 13:27:50 +02:00
|
|
|
|
box = box.copy_with_children(
|
2016-11-01 04:31:15 +01:00
|
|
|
|
new_children, is_start=is_start, is_end=True)
|
2011-10-03 18:57:26 +02:00
|
|
|
|
|
|
|
|
|
return box, block_level_box, resume_at
|
2012-01-27 16:53:16 +01:00
|
|
|
|
|
|
|
|
|
|
2012-02-07 16:59:22 +01:00
|
|
|
|
def set_viewport_overflow(root_box):
|
|
|
|
|
"""
|
|
|
|
|
Set a ``viewport_overflow`` attribute on the box for the root element.
|
|
|
|
|
|
|
|
|
|
Like backgrounds, ``overflow`` on the root element must be propagated
|
|
|
|
|
to the viewport.
|
|
|
|
|
|
|
|
|
|
See http://www.w3.org/TR/CSS21/visufx.html#overflow
|
|
|
|
|
"""
|
|
|
|
|
chosen_box = root_box
|
|
|
|
|
if (root_box.element_tag.lower() == 'html' and
|
|
|
|
|
root_box.style.overflow == 'visible'):
|
|
|
|
|
for child in root_box.children:
|
|
|
|
|
if child.element_tag.lower() == 'body':
|
|
|
|
|
chosen_box = child
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
root_box.viewport_overflow = chosen_box.style.overflow
|
2017-08-02 16:32:37 +02:00
|
|
|
|
chosen_box.style['overflow'] = 'visible'
|
2012-01-27 16:53:16 +01:00
|
|
|
|
return root_box
|
2012-05-15 19:29:54 +02:00
|
|
|
|
|
|
|
|
|
|
2015-04-30 21:54:19 +02:00
|
|
|
|
def box_text(box):
|
2012-05-15 19:29:54 +02:00
|
|
|
|
if isinstance(box, boxes.TextBox):
|
|
|
|
|
return box.text
|
|
|
|
|
elif isinstance(box, boxes.ParentBox):
|
2015-03-21 05:08:12 +01:00
|
|
|
|
return ''.join(
|
|
|
|
|
child.text for child in box.descendants()
|
2016-12-04 16:43:49 +01:00
|
|
|
|
if not child.element_tag.endswith('::before') and
|
|
|
|
|
not child.element_tag.endswith('::after') and
|
2015-04-30 21:54:19 +02:00
|
|
|
|
isinstance(child, boxes.TextBox))
|
2012-05-15 19:29:54 +02:00
|
|
|
|
else:
|
|
|
|
|
return ''
|
|
|
|
|
|
|
|
|
|
|
2018-01-23 00:45:28 +01:00
|
|
|
|
def box_text_first_letter(box):
|
|
|
|
|
text = box_text(box)
|
|
|
|
|
return text[0] if text else ''
|
|
|
|
|
|
|
|
|
|
|
2015-04-30 21:54:19 +02:00
|
|
|
|
def box_text_before(box):
|
2012-05-15 19:29:54 +02:00
|
|
|
|
if isinstance(box, boxes.ParentBox):
|
|
|
|
|
return ''.join(
|
2015-04-30 21:54:19 +02:00
|
|
|
|
box_text(child) for child in box.descendants()
|
2016-12-04 16:43:49 +01:00
|
|
|
|
if child.element_tag.endswith('::before') and
|
2015-04-30 21:54:19 +02:00
|
|
|
|
not isinstance(child, boxes.ParentBox))
|
2012-05-15 19:29:54 +02:00
|
|
|
|
else:
|
|
|
|
|
return ''
|
|
|
|
|
|
|
|
|
|
|
2015-04-30 21:54:19 +02:00
|
|
|
|
def box_text_after(box):
|
2012-05-15 19:29:54 +02:00
|
|
|
|
if isinstance(box, boxes.ParentBox):
|
|
|
|
|
return ''.join(
|
2015-04-30 21:54:19 +02:00
|
|
|
|
box_text(child) for child in box.descendants()
|
2016-12-04 16:43:49 +01:00
|
|
|
|
if child.element_tag.endswith('::after') and
|
2015-04-30 21:54:19 +02:00
|
|
|
|
not isinstance(child, boxes.ParentBox))
|
2012-05-15 19:29:54 +02:00
|
|
|
|
else:
|
|
|
|
|
return ''
|
|
|
|
|
|
|
|
|
|
|
2015-04-30 21:54:19 +02:00
|
|
|
|
TEXT_CONTENT_EXTRACTORS = {
|
|
|
|
|
'text': box_text,
|
|
|
|
|
'before': box_text_before,
|
2018-01-23 00:45:28 +01:00
|
|
|
|
'after': box_text_after,
|
|
|
|
|
'first-letter': box_text_first_letter}
|