debian-weasyprint/weasyprint/css/utils.py

"""
    weasyprint.css.utils
    --------------------

    Utils for CSS properties.
    See http://www.w3.org/TR/CSS21/propidx.html and various CSS3 modules.

    :copyright: Copyright 2011-2019 Simon Sapin and contributors, see AUTHORS.
    :license: BSD, see LICENSE for details.

"""

import functools
import math
from urllib.parse import unquote, urljoin

from tinycss2.color3 import parse_color

from ..formatting_structure import counters
from ..urls import iri_to_uri, url_is_absolute
from .properties import Dimension

# http://dev.w3.org/csswg/css3-values/#angles
# 1<unit> is this many radians.
ANGLE_TO_RADIANS = {
    'rad': 1,
    'turn': 2 * math.pi,
    'deg': math.pi / 180,
    'grad': math.pi / 200,
}

# How many CSS pixels is one <unit>?
# http://www.w3.org/TR/CSS21/syndata.html#length-units
LENGTHS_TO_PIXELS = {
    'px': 1,
    'pt': 1. / 0.75,
    'pc': 16.,  # LENGTHS_TO_PIXELS['pt'] * 12
    'in': 96.,  # LENGTHS_TO_PIXELS['pt'] * 72
    'cm': 96. / 2.54,  # LENGTHS_TO_PIXELS['in'] / 2.54
    'mm': 96. / 25.4,  # LENGTHS_TO_PIXELS['in'] / 25.4
    'q': 96. / 25.4 / 4.,  # LENGTHS_TO_PIXELS['mm'] / 4
}

# http://dev.w3.org/csswg/css-values/#resolution
RESOLUTION_TO_DPPX = {
    'dppx': 1,
    'dpi': 1 / LENGTHS_TO_PIXELS['in'],
    'dpcm': 1 / LENGTHS_TO_PIXELS['cm'],
}

# Sets of possible length units
LENGTH_UNITS = set(LENGTHS_TO_PIXELS) | set(['ex', 'em', 'ch', 'rem'])

# Constants about background positions
ZERO_PERCENT = Dimension(0, '%')
FIFTY_PERCENT = Dimension(50, '%')
HUNDRED_PERCENT = Dimension(100, '%')
BACKGROUND_POSITION_PERCENTAGES = {
    'top': ZERO_PERCENT,
    'left': ZERO_PERCENT,
    'center': FIFTY_PERCENT,
    'bottom': HUNDRED_PERCENT,
    'right': HUNDRED_PERCENT,
}

# Direction keywords used for gradients
DIRECTION_KEYWORDS = {
    # ('angle', radians)  0 upwards, then clockwise
    ('to', 'top'): ('angle', 0),
    ('to', 'right'): ('angle', math.pi / 2),
    ('to', 'bottom'): ('angle', math.pi),
    ('to', 'left'): ('angle', math.pi * 3 / 2),
    # ('corner', keyword)
    ('to', 'top', 'left'): ('corner', 'top_left'),
    ('to', 'left', 'top'): ('corner', 'top_left'),
    ('to', 'top', 'right'): ('corner', 'top_right'),
    ('to', 'right', 'top'): ('corner', 'top_right'),
    ('to', 'bottom', 'left'): ('corner', 'bottom_left'),
    ('to', 'left', 'bottom'): ('corner', 'bottom_left'),
    ('to', 'bottom', 'right'): ('corner', 'bottom_right'),
    ('to', 'right', 'bottom'): ('corner', 'bottom_right'),
}

# Default fallback values used in attr() functions
ATTR_FALLBACKS = {
    'string': ('string', ''),
    'color': ('ident', 'currentcolor'),
    'url': ('external', 'about:invalid'),
    'integer': ('number', 0),
    'number': ('number', 0),
    '%': ('number', 0),
}
for unit in LENGTH_UNITS:
    ATTR_FALLBACKS[unit] = ('length', Dimension('0', unit))
for unit in ANGLE_TO_RADIANS:
    ATTR_FALLBACKS[unit] = ('angle', Dimension('0', unit))


class InvalidValues(ValueError):
    """Invalid or unsupported values for a known CSS property."""


class CenterKeywordFakeToken(object):
    type = 'ident'
    lower_value = 'center'
    unit = None


def split_on_comma(tokens):
    """Split a list of tokens on commas, ie ``LiteralToken(',')``.

    Only "top-level" comma tokens are splitting points, not commas inside a
    function or blocks.

    """
    parts = []
    this_part = []
    for token in tokens:
        if token.type == 'literal' and token.value == ',':
            parts.append(this_part)
            this_part = []
        else:
            this_part.append(token)
    parts.append(this_part)
    return tuple(parts)


def split_on_optional_comma(tokens):
    """Split a list of tokens on optional commas, ie ``LiteralToken(',')``."""
    parts = []
    for split_part in split_on_comma(tokens):
        if not split_part:
            # Happens when there's a comma at the beginning, at the end, or
            # when two commas are next to each other.
            return
        for part in split_part:
            parts.append(part)
    return parts


def remove_whitespace(tokens):
    """Remove any top-level whitespace and comments in a token list."""
    return tuple(
        token for token in tokens
        if token.type not in ('whitespace', 'comment'))


def safe_urljoin(base_url, url):
    if url_is_absolute(url):
        return iri_to_uri(url)
    elif base_url:
        return iri_to_uri(urljoin(base_url, url))
    else:
        raise InvalidValues(
            'Relative URI reference without a base URI: %r' % url)


def comma_separated_list(function):
    """Decorator for validators that accept a comma separated list."""
    @functools.wraps(function)
    def wrapper(tokens, *args):
        results = []
        for part in split_on_comma(tokens):
            result = function(remove_whitespace(part), *args)
            if result is None:
                return None
            results.append(result)
        return tuple(results)
    wrapper.single_value = function
    return wrapper


def get_keyword(token):
    """If ``token`` is a keyword, return its lowercase name.

    Otherwise return ``None``.

    """
    if token.type == 'ident':
        return token.lower_value


def get_custom_ident(token):
    """If ``token`` is a keyword, return its name.

    Otherwise return ``None``.

    """
    if token.type == 'ident':
        return token.value


def get_single_keyword(tokens):
    """If ``values`` is a 1-element list of keywords, return its name.

    Otherwise return ``None``.

    """
    if len(tokens) == 1:
        token = tokens[0]
        if token.type == 'ident':
            return token.lower_value


def single_keyword(function):
    """Decorator for validators that only accept a single keyword."""
    @functools.wraps(function)
    def keyword_validator(tokens):
        """Wrap a validator to call get_single_keyword on tokens."""
        keyword = get_single_keyword(tokens)
        if function(keyword):
            return keyword
    return keyword_validator


def single_token(function):
    """Decorator for validators that only accept a single token."""
    @functools.wraps(function)
    def single_token_validator(tokens, *args):
        """Validate a property whose token is single."""
        if len(tokens) == 1:
            return function(tokens[0], *args)
    single_token_validator.__func__ = function
    return single_token_validator


def parse_linear_gradient_parameters(arguments):
    first_arg = arguments[0]
    if len(first_arg) == 1:
        angle = get_angle(first_arg[0])
        if angle is not None:
            return ('angle', angle), arguments[1:]
    else:
        result = DIRECTION_KEYWORDS.get(tuple(map(get_keyword, first_arg)))
        if result is not None:
            return result, arguments[1:]
    return ('angle', math.pi), arguments  # Default direction is 'to bottom'


def parse_2d_position(tokens):
    """Common syntax of background-position and transform-origin."""
    if len(tokens) == 1:
        tokens = [tokens[0], CenterKeywordFakeToken]
    elif len(tokens) != 2:
        return None

    token_1, token_2 = tokens
    length_1 = get_length(token_1, percentage=True)
    length_2 = get_length(token_2, percentage=True)
    if length_1 and length_2:
        return length_1, length_2
    keyword_1, keyword_2 = map(get_keyword, tokens)
    if length_1 and keyword_2 in ('top', 'center', 'bottom'):
        return length_1, BACKGROUND_POSITION_PERCENTAGES[keyword_2]
    elif length_2 and keyword_1 in ('left', 'center', 'right'):
        return BACKGROUND_POSITION_PERCENTAGES[keyword_1], length_2
    elif (keyword_1 in ('left', 'center', 'right') and
          keyword_2 in ('top', 'center', 'bottom')):
        return (BACKGROUND_POSITION_PERCENTAGES[keyword_1],
                BACKGROUND_POSITION_PERCENTAGES[keyword_2])
    elif (keyword_1 in ('top', 'center', 'bottom') and
          keyword_2 in ('left', 'center', 'right')):
        # Swap tokens. They need to be in (horizontal, vertical) order.
        return (BACKGROUND_POSITION_PERCENTAGES[keyword_2],
                BACKGROUND_POSITION_PERCENTAGES[keyword_1])


def parse_position(tokens):
    """Parse background-position and object-position.

    See http://dev.w3.org/csswg/css3-background/#the-background-position
    https://drafts.csswg.org/css-images-3/#propdef-object-position

    """
    result = parse_2d_position(tokens)
    if result is not None:
        pos_x, pos_y = result
        return 'left', pos_x, 'top', pos_y

    if len(tokens) == 4:
        keyword_1 = get_keyword(tokens[0])
        keyword_2 = get_keyword(tokens[2])
        length_1 = get_length(tokens[1], percentage=True)
        length_2 = get_length(tokens[3], percentage=True)
        if length_1 and length_2:
            if (keyword_1 in ('left', 'right') and
                    keyword_2 in ('top', 'bottom')):
                return keyword_1, length_1, keyword_2, length_2
            if (keyword_2 in ('left', 'right') and
                    keyword_1 in ('top', 'bottom')):
                return keyword_2, length_2, keyword_1, length_1

    if len(tokens) == 3:
        length = get_length(tokens[2], percentage=True)
        if length is not None:
            keyword = get_keyword(tokens[1])
            other_keyword = get_keyword(tokens[0])
        else:
            length = get_length(tokens[1], percentage=True)
            other_keyword = get_keyword(tokens[2])
            keyword = get_keyword(tokens[0])

        if length is not None:
            if other_keyword == 'center':
                if keyword in ('top', 'bottom'):
                    return 'left', FIFTY_PERCENT, keyword, length
                if keyword in ('left', 'right'):
                    return keyword, length, 'top', FIFTY_PERCENT
            elif (keyword in ('left', 'right') and
                    other_keyword in ('top', 'bottom')):
                return keyword, length, other_keyword, ZERO_PERCENT
            elif (keyword in ('top', 'bottom') and
                    other_keyword in ('left', 'right')):
                return other_keyword, ZERO_PERCENT, keyword, length


def parse_radial_gradient_parameters(arguments):
    shape = None
    position = None
    size = None
    size_shape = None
    stack = arguments[0][::-1]
    while stack:
        token = stack.pop()
        keyword = get_keyword(token)
        if keyword == 'at':
            position = parse_position(stack[::-1])
            if position is None:
                return
            break
        elif keyword in ('circle', 'ellipse') and shape is None:
            shape = keyword
        elif keyword in ('closest-corner', 'farthest-corner',
                         'closest-side', 'farthest-side') and size is None:
            size = 'keyword', keyword
        else:
            if stack and size is None:
                length_1 = get_length(token, percentage=True)
                length_2 = get_length(stack[-1], percentage=True)
                if None not in (length_1, length_2):
                    size = 'explicit', (length_1, length_2)
                    size_shape = 'ellipse'
                    stack.pop()
            if size is None:
                length_1 = get_length(token)
                if length_1 is not None:
                    size = 'explicit', (length_1, length_1)
                    size_shape = 'circle'
            if size is None:
                return
    if (shape, size_shape) in (('circle', 'ellipse'), ('circle', 'ellipse')):
        return
    return (
        shape or size_shape or 'ellipse',
        size or ('keyword', 'farthest-corner'),
        position or ('left', FIFTY_PERCENT, 'top', FIFTY_PERCENT),
        arguments[1:])


def parse_color_stop(tokens):
    if len(tokens) == 1:
        color = parse_color(tokens[0])
        if color == 'currentColor':
            # TODO: return the current color instead
            return parse_color('black'), None
        if color is not None:
            return color, None
    elif len(tokens) == 2:
        color = parse_color(tokens[0])
        position = get_length(tokens[1], negative=True, percentage=True)
        if color is not None and position is not None:
            return color, position
    raise InvalidValues


def parse_function(function_token):
    """Parse functional notation.

    Return ``(name, args)`` if the given token is a function with comma- or
    space-separated arguments. Return ``None`` otherwise.

    """
    if not getattr(function_token, 'type', None) == 'function':
        return

    content = list(remove_whitespace(function_token.arguments))
    arguments = []
    last_is_comma = False
    while content:
        token = content.pop(0)
        is_comma = token.type == 'literal' and token.value == ','
        if last_is_comma and is_comma:
            return
        if is_comma:
            last_is_comma = True
        else:
            last_is_comma = False
            if token.type == 'function':
                argument_function = parse_function(token)
                if argument_function is None:
                    return
            arguments.append(token)
    if last_is_comma:
        return
    return function_token.lower_name, arguments


def check_attr_function(token, allowed_type=None):
    function = parse_function(token)
    if function is None:
        return
    name, args = function
    if name == 'attr' and len(args) in (1, 2, 3):
        if args[0].type != 'ident':
            return
        attr_name = args[0].value
        if len(args) == 1:
            type_or_unit = 'string'
            fallback = ''
        else:
            if args[1].type != 'ident':
                return
            type_or_unit = args[1].value
            if type_or_unit not in ATTR_FALLBACKS:
                return
            if len(args) == 2:
                fallback = ATTR_FALLBACKS[type_or_unit]
            else:
                fallback_type = args[2].type
                if fallback_type == 'string':
                    fallback = args[2].value
                else:
                    # TODO: handle other fallback types
                    return
        if allowed_type in (None, type_or_unit):
            return ('attr()', (attr_name, type_or_unit, fallback))


def check_counter_function(token, allowed_type=None):
    function = parse_function(token)
    if function is None:
        return
    name, args = function
    arguments = []
    if (name == 'counter' and len(args) in (1, 2)) or (
            name == 'counters' and len(args) in (2, 3)):
        ident = args.pop(0)
        if ident.type != 'ident':
            return
        arguments.append(ident.value)

        if name == 'counters':
            string = args.pop(0)
            if string.type != 'string':
                return
            arguments.append(string.value)

        if args:
            counter_style = get_keyword(args.pop(0))
            if counter_style not in ['none'] + list(counters.STYLES):
                return
            arguments.append(counter_style)
        else:
            arguments.append('decimal')

        return ('%s()' % name, tuple(arguments))


def check_content_function(token):
    function = parse_function(token)
    if function is None:
        return
    name, args = function
    if name == 'content':
        if len(args) == 0:
            return ('content()', 'text')
        elif len(args) == 1:
            ident = args.pop(0)
            if ident.type == 'ident' and ident.lower_value in (
                    'text', 'before', 'after', 'first-letter', 'marker'):
                return ('content()', ident.lower_value)


def check_string_or_element_function(string_or_element, token):
    function = parse_function(token)
    if function is None:
        return
    name, args = function
    if name == string_or_element and len(args) in (1, 2):
        custom_ident = args.pop(0)
        if custom_ident.type != 'ident':
            return
        custom_ident = custom_ident.value

        if args:
            ident = args.pop(0)
            if ident.type != 'ident' or ident.lower_value not in (
                    'first', 'start', 'last', 'first-except'):
                return
            ident = ident.lower_value
        else:
            ident = 'first'

        return ('%s()' % string_or_element, (custom_ident, ident))


def check_var_function(token):
    function = parse_function(token)
    if function is None:
        return
    name, args = function
    if name == 'var' and args:
        ident = args.pop(0)
        if ident.type != 'ident' or not ident.value.startswith('--'):
            return

        # TODO: we should check authorized tokens
        # https://drafts.csswg.org/css-syntax-3/#typedef-declaration-value
        return ('var()', (ident.value.replace('-', '_'), args or None))


def get_string(token):
    """Parse a <string> token."""
    if token.type == 'string':
        return ('string', token.value)
    if token.type == 'function':
        if token.name == 'attr':
            return check_attr_function(token, 'string')
        elif token.name in ('counter', 'counters'):
            return check_counter_function(token)
        elif token.name == 'content':
            return check_content_function(token)
        elif token.name == 'string':
            return check_string_or_element_function('string', token)


def get_length(token, negative=True, percentage=False):
    """Parse a <length> token."""
    if percentage and token.type == 'percentage':
        if negative or token.value >= 0:
            return Dimension(token.value, '%')
    if token.type == 'dimension' and token.unit in LENGTH_UNITS:
        if negative or token.value >= 0:
            return Dimension(token.value, token.unit)
    if token.type == 'number' and token.value == 0:
        return Dimension(0, None)


def get_angle(token):
    """Parse an <angle> token in radians."""
    if token.type == 'dimension':
        factor = ANGLE_TO_RADIANS.get(token.unit)
        if factor is not None:
            return token.value * factor


def get_resolution(token):
    """Parse a <resolution> token in ddpx."""
    if token.type == 'dimension':
        factor = RESOLUTION_TO_DPPX.get(token.unit)
        if factor is not None:
            return token.value * factor


def get_image(token, base_url):
    """Parse an <image> token."""
    from ..images import LinearGradient, RadialGradient

    if token.type != 'function':
        parsed_url = get_url(token, base_url)
        if parsed_url:
            assert parsed_url[0] == 'url'
            if parsed_url[1][0] == 'external':
                return 'url', parsed_url[1][1]
        return
    arguments = split_on_comma(remove_whitespace(token.arguments))
    name = token.lower_name
    if name in ('linear-gradient', 'repeating-linear-gradient'):
        direction, color_stops = parse_linear_gradient_parameters(arguments)
        if color_stops:
            return 'linear-gradient', LinearGradient(
                [parse_color_stop(stop) for stop in color_stops],
                direction, 'repeating' in name)
    elif name in ('radial-gradient', 'repeating-radial-gradient'):
        result = parse_radial_gradient_parameters(arguments)
        if result is not None:
            shape, size, position, color_stops = result
        else:
            shape = 'ellipse'
            size = 'keyword', 'farthest-corner'
            position = 'left', FIFTY_PERCENT, 'top', FIFTY_PERCENT
            color_stops = arguments
        if color_stops:
            return 'radial-gradient', RadialGradient(
                [parse_color_stop(stop) for stop in color_stops],
                shape, size, position, 'repeating' in name)


def get_url(token, base_url):
    """Parse an <url> token."""
    if token.type == 'url':
        if token.value.startswith('#'):
            return ('url', ('internal', unquote(token.value[1:])))
        else:
            return ('url', ('external', safe_urljoin(base_url, token.value)))
    elif token.type == 'function':
        if token.name == 'attr':
            return check_attr_function(token, 'url')


def get_quote(token):
    """Parse a <quote> token."""
    keyword = get_keyword(token)
    if keyword in (
            'open-quote', 'close-quote',
            'no-open-quote', 'no-close-quote'):
        return keyword


def get_target(token, base_url):
    """Parse a <target> token."""
    function = parse_function(token)
    if function is None:
        return
    name, args = function
    args = split_on_optional_comma(args)
    if not args:
        return

    if name == 'target-counter':
        if len(args) not in (2, 3):
            return
    elif name == 'target-counters':
        if len(args) not in (3, 4):
            return
    elif name == 'target-text':
        if len(args) not in (1, 2):
            return
    else:
        return

    values = []

    link = args.pop(0)
    string_link = get_string(link)
    if string_link is None:
        url = get_url(link, base_url)
        if url is None:
            return
        values.append(url)
    else:
        values.append(string_link)

    if name.startswith('target-counter'):
        if not args:
            return

        ident = args.pop(0)
        if ident.type != 'ident':
            return
        values.append(ident.value)

        if name == 'target-counters':
            string = get_string(args.pop(0))
            if string is None:
                return
            values.append(string)

        if args:
            counter_style = get_keyword(args.pop(0))
            if counter_style not in counters.STYLES:
                return
        else:
            counter_style = 'decimal'
        values.append(counter_style)
    else:
        if args:
            content = get_keyword(args.pop(0))
            if content not in ('content', 'before', 'after', 'first-letter'):
                return
        else:
            content = 'content'
        values.append(content)

    return ('%s()' % name, tuple(values))


def get_content_list(tokens, base_url):
    """Parse <content-list> tokens."""
    # See https://www.w3.org/TR/css-content-3/#typedef-content-list
    parsed_tokens = [
        get_content_list_token(token, base_url) for token in tokens]
    if None not in parsed_tokens:
        return parsed_tokens


def get_content_list_token(token, base_url):
    """Parse one of the <content-list> tokens."""
    # See https://www.w3.org/TR/css-content-3/#typedef-content-list

    # <string>
    string = get_string(token)
    if string is not None:
        return string

    # contents
    if get_keyword(token) == 'contents':
        return ('content', 'text')

    # <uri>
    url = get_url(token, base_url)
    if url is not None:
        return url

    # <quote>
    quote = get_quote(token)
    if quote is not None:
        return ('quote', quote)

    # <target>
    target = get_target(token, base_url)
    if target is not None:
        return target

    # <leader>
    function = parse_function(token)
    if function is None:
        return
    name, args = function
    if name == 'leader':
        if len(args) != 1:
            return
        arg, = args
        if arg.type == 'ident':
            if arg.value == 'dotted':
                string = '.'
            elif arg.value == 'solid':
                string = '_'
            elif arg.value == 'space':
                string = ' '
            else:
                return
        elif arg.type == 'string':
            string = arg.value
        return ('leader()', ('string', string))
    elif name == 'element':
        return check_string_or_element_function('element', token)