debian-weasyprint/weasyprint/tests/test_api.py

# coding: utf-8
"""
    weasyprint.tests.test_api
    -------------------------

    Test the public API.

    :copyright: Copyright 2011-2014 Simon Sapin and contributors, see AUTHORS.
    :license: BSD, see LICENSE for details.

"""

from __future__ import division, unicode_literals

import contextlib
import gzip
import io
import math
import os
import sys
import threading
import unicodedata
import zlib

import cairocffi as cairo
import pytest
from pdfrw import PdfReader

from .. import CSS, HTML, __main__, default_url_fetcher, navigator
from ..compat import iteritems, urlencode, urljoin, urlparse_uses_relative
from ..urls import path2url
from .test_draw import image_to_pixels
from .testing_utils import (
    FakeHTML, assert_no_logs, capture_logs, http_server, resource_filename,
    temp_directory)

CHDIR_LOCK = threading.Lock()


@contextlib.contextmanager
def chdir(path):
    """Change the current directory in a context manager."""
    with CHDIR_LOCK:
        old_dir = os.getcwd()
        try:
            os.chdir(path)
            yield
        finally:
            os.chdir(old_dir)


def read_file(filename):
    """Shortcut for reading a file."""
    with open(filename, 'rb') as fd:
        return fd.read()


def write_file(filename, content):
    """Shortcut for reading a file."""
    with open(filename, 'wb') as fd:
        fd.write(content)


def _test_resource(class_, basename, check, **kwargs):
    """Common code for testing the HTML and CSS classes."""
    absolute_filename = resource_filename(basename)
    url = path2url(absolute_filename)
    check(class_(absolute_filename, **kwargs))
    check(class_(guess=absolute_filename, **kwargs))
    check(class_(filename=absolute_filename, **kwargs))
    check(class_(url, **kwargs))
    check(class_(guess=url, **kwargs))
    check(class_(url=url, **kwargs))
    with open(absolute_filename, 'rb') as fd:
        check(class_(fd, **kwargs))
    with open(absolute_filename, 'rb') as fd:
        check(class_(guess=fd, **kwargs))
    with open(absolute_filename, 'rb') as fd:
        check(class_(file_obj=fd, **kwargs))
    with open(absolute_filename, 'rb') as fd:
        content = fd.read()
    with chdir(os.path.dirname(__file__)):
        relative_filename = os.path.join('resources', basename)
        check(class_(relative_filename, **kwargs))
        check(class_(string=content, base_url=relative_filename, **kwargs))
        encoding = kwargs.get('encoding') or 'utf8'
        check(class_(string=content.decode(encoding),  # unicode
                     base_url=relative_filename, **kwargs))
    with pytest.raises(TypeError):
        class_(filename='foo', url='bar')


def _assert_equivalent_pdf(pdf_bytes1, pdf_bytes2):
    """Assert that 2 pdf bytestrings are equivalent.

    We have to compare various PDF objects to compare PDF files as pdfrw
    doesn't produce the same PDF files from the same input.

    """
    pdf1, pdf2 = PdfReader(fdata=pdf_bytes1), PdfReader(fdata=pdf_bytes2)
    assert pdf1.Size == pdf2.Size
    assert len(pdf1.Root.Pages.Kids) == len(pdf2.Root.Pages.Kids)
    for page1, page2 in zip(pdf1.Root.Pages.Kids, pdf2.Root.Pages.Kids):
        assert page1.MediaBox == page2.MediaBox
        assert page1.TrimBox == page2.TrimBox
        assert page1.BleedBox == page2.BleedBox


@assert_no_logs
def test_html_parsing():
    """Test the constructor for the HTML class."""
    def check_doc1(html, has_base_url=True):
        """Check that a parsed HTML document looks like resources/doc1.html"""
        root = html.etree_element
        assert root.tag == 'html'
        assert [child.tag for child in root] == ['head', 'body']
        _head, body = root
        assert [child.tag for child in body] == ['h1', 'p', 'ul', 'div']
        h1, p, ul, div = body
        assert h1.text == 'WeasyPrint test document (with Ünicōde)'
        if has_base_url:
            url = urljoin(html.base_url, 'pattern.png')
            assert url.startswith('file:')
            assert url.endswith('weasyprint/tests/resources/pattern.png')
        else:
            assert html.base_url is None

    _test_resource(FakeHTML, 'doc1.html', check_doc1)
    _test_resource(FakeHTML, 'doc1_UTF-16BE.html', check_doc1,
                   encoding='UTF-16BE')

    with chdir(os.path.dirname(__file__)):
        filename = os.path.join('resources', 'doc1.html')
        with open(filename) as fd:
            string = fd.read()
        check_doc1(FakeHTML(string=string, base_url=filename))
        check_doc1(FakeHTML(string=string), has_base_url=False)
        string_with_meta = string.replace(
            '<meta', '<base href="resources/"><meta')
        check_doc1(FakeHTML(string=string_with_meta, base_url='.'))


@assert_no_logs
def test_css_parsing():
    """Test the constructor for the CSS class."""
    def check_css(css):
        """Check that a parsed stylsheet looks like resources/utf8-test.css"""
        # Using 'encoding' adds a CSSCharsetRule
        h1_rule, = css.matcher.lower_local_name_selectors['h1']
        assert h1_rule[3] == 'before'
        assert h1_rule[4][0][0] == 'content'
        assert h1_rule[4][0][1][0][1] == 'I løvë Unicode'
        assert h1_rule[4][1][0] == 'background_image'
        assert h1_rule[4][1][1][0][0] == 'url'
        assert h1_rule[4][1][1][0][1].startswith('file:')
        assert h1_rule[4][1][1][0][1].endswith(
            'weasyprint/tests/resources/pattern.png')

    _test_resource(CSS, 'utf8-test.css', check_css)
    _test_resource(CSS, 'latin1-test.css', check_css, encoding='latin1')


def check_png_pattern(png_bytes, x2=False, blank=False, rotated=False):
    from .test_draw import _, r, B, assert_pixels_equal
    if blank:
        expected_pixels = [
            _ + _ + _ + _ + _ + _ + _ + _,
            _ + _ + _ + _ + _ + _ + _ + _,
            _ + _ + _ + _ + _ + _ + _ + _,
            _ + _ + _ + _ + _ + _ + _ + _,
            _ + _ + _ + _ + _ + _ + _ + _,
            _ + _ + _ + _ + _ + _ + _ + _,
            _ + _ + _ + _ + _ + _ + _ + _,
            _ + _ + _ + _ + _ + _ + _ + _,
        ]
        size = 8
    elif x2:
        expected_pixels = [
            _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _,
            _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _,
            _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _,
            _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _,
            _ + _ + _ + _ + r + r + B + B + B + B + B + B + _ + _ + _ + _,
            _ + _ + _ + _ + r + r + B + B + B + B + B + B + _ + _ + _ + _,
            _ + _ + _ + _ + B + B + B + B + B + B + B + B + _ + _ + _ + _,
            _ + _ + _ + _ + B + B + B + B + B + B + B + B + _ + _ + _ + _,
            _ + _ + _ + _ + B + B + B + B + B + B + B + B + _ + _ + _ + _,
            _ + _ + _ + _ + B + B + B + B + B + B + B + B + _ + _ + _ + _,
            _ + _ + _ + _ + B + B + B + B + B + B + B + B + _ + _ + _ + _,
            _ + _ + _ + _ + B + B + B + B + B + B + B + B + _ + _ + _ + _,
            _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _,
            _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _,
            _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _,
            _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _ + _,
        ]
        size = 16
    elif rotated:
        expected_pixels = [
            _ + _ + _ + _ + _ + _ + _ + _,
            _ + _ + _ + _ + _ + _ + _ + _,
            _ + _ + B + B + B + B + _ + _,
            _ + _ + B + B + B + B + _ + _,
            _ + _ + B + B + B + B + _ + _,
            _ + _ + r + B + B + B + _ + _,
            _ + _ + _ + _ + _ + _ + _ + _,
            _ + _ + _ + _ + _ + _ + _ + _,
        ]
        size = 8
    else:
        expected_pixels = [
            _ + _ + _ + _ + _ + _ + _ + _,
            _ + _ + _ + _ + _ + _ + _ + _,
            _ + _ + r + B + B + B + _ + _,
            _ + _ + B + B + B + B + _ + _,
            _ + _ + B + B + B + B + _ + _,
            _ + _ + B + B + B + B + _ + _,
            _ + _ + _ + _ + _ + _ + _ + _,
            _ + _ + _ + _ + _ + _ + _ + _,
        ]
        size = 8
    surface = cairo.ImageSurface.create_from_png(io.BytesIO(png_bytes))
    assert_pixels_equal('api_png', size, size,
                        image_to_pixels(surface, size, size),
                        b''.join(expected_pixels))


@assert_no_logs
def test_python_render():
    """Test rendering with the Python API."""
    base_url = resource_filename('dummy.html')
    html_string = '<body><img src=pattern.png>'
    css_string = '''
        @page { margin: 2px; size: 8px; background: #fff }
        body { margin: 0; font-size: 0 }
        img { image-rendering: pixelated }

        @media screen { img { transform: rotate(-90deg) } }
    '''
    html = FakeHTML(string=html_string, base_url=base_url)
    css = CSS(string=css_string)

    png_bytes = html.write_png(stylesheets=[css])
    pdf_bytes = html.write_pdf(stylesheets=[css])
    assert png_bytes.startswith(b'\211PNG\r\n\032\n')
    assert pdf_bytes.startswith(b'%PDF')

    check_png_pattern(png_bytes)
    # TODO: check PDF content? How?

    class fake_file(object):
        def __init__(self):
            self.chunks = []

        def write(self, data):
            self.chunks.append(bytes(data[:]))

        def getvalue(self):
            return b''.join(self.chunks)
    png_file = fake_file()
    html.write_png(png_file, stylesheets=[css])
    assert png_file.getvalue() == png_bytes
    pdf_file = fake_file()
    html.write_pdf(pdf_file, stylesheets=[css])
    _assert_equivalent_pdf(pdf_file.getvalue(), pdf_bytes)

    with temp_directory() as temp:
        png_filename = os.path.join(temp, '1.png')
        pdf_filename = os.path.join(temp, '1.pdf')
        html.write_png(png_filename, stylesheets=[css])
        html.write_pdf(pdf_filename, stylesheets=[css])
        assert read_file(png_filename) == png_bytes
        _assert_equivalent_pdf(read_file(pdf_filename), pdf_bytes)

        png_filename = os.path.join(temp, '2.png')
        pdf_filename = os.path.join(temp, '2.pdf')
        with open(png_filename, 'wb') as png_file:
            html.write_png(png_file, stylesheets=[css])
        with open(pdf_filename, 'wb') as pdf_file:
            html.write_pdf(pdf_file, stylesheets=[css])
        assert read_file(png_filename) == png_bytes
        _assert_equivalent_pdf(read_file(pdf_filename), pdf_bytes)

    x2_png_bytes = html.write_png(stylesheets=[css], resolution=192)
    check_png_pattern(x2_png_bytes, x2=True)

    screen_css = CSS(string=css_string, media_type='screen')
    rotated_png_bytes = html.write_png(stylesheets=[screen_css])
    check_png_pattern(rotated_png_bytes, rotated=True)

    assert FakeHTML(
        string=html_string, base_url=base_url, media_type='screen'
    ).write_png(
        stylesheets=[io.BytesIO(css_string.encode('utf8'))]
    ) == rotated_png_bytes
    assert FakeHTML(
        string='<style>%s</style>%s' % (css_string, html_string),
        base_url=base_url, media_type='screen'
    ).write_png() == rotated_png_bytes


@assert_no_logs
def test_command_line_render():
    """Test rendering with the command-line API."""
    css = b'''
        @page { margin: 2px; size: 8px; background: #fff }
        @media screen { img { transform: rotate(-90deg) } }
        body { margin: 0; font-size: 0 }
    '''
    html = b'<body><img src=pattern.png>'
    combined = b'<style>' + css + b'</style>' + html
    linked = b'<link rel=stylesheet href=style.css>' + html

    with chdir(resource_filename('')):
        # Reference
        html_obj = FakeHTML(string=combined, base_url='dummy.html')
        pdf_bytes = html_obj.write_pdf()
        png_bytes = html_obj.write_png()
        x2_png_bytes = html_obj.write_png(resolution=192)
        rotated_png_bytes = FakeHTML(string=combined, base_url='dummy.html',
                                     media_type='screen').write_png()
        empty_png_bytes = FakeHTML(
            string=b'<style>' + css + b'</style>').write_png()
    check_png_pattern(png_bytes)
    check_png_pattern(rotated_png_bytes, rotated=True)
    check_png_pattern(empty_png_bytes, blank=True)

    def run(args, stdin=b''):
        stdin = io.BytesIO(stdin)
        stdout = io.BytesIO()
        try:
            __main__.HTML = FakeHTML
            __main__.main(args.split(), stdin=stdin, stdout=stdout)
        finally:
            __main__.HTML = HTML
        return stdout.getvalue()

    with temp_directory() as temp:
        with chdir(temp):
            pattern_bytes = read_file(resource_filename('pattern.png'))
            write_file('pattern.png', pattern_bytes)
            write_file('no_css.html', html)
            write_file('combined.html', combined)
            write_file('combined-UTF-16BE.html',
                       combined.decode('ascii').encode('UTF-16BE'))
            write_file('linked.html', linked)
            write_file('style.css', css)

            run('combined.html out1.png')
            run('combined.html out2.pdf')
            assert read_file('out1.png') == png_bytes
            _assert_equivalent_pdf(read_file('out2.pdf'), pdf_bytes)

            run('combined-UTF-16BE.html out3.png --encoding UTF-16BE')
            assert read_file('out3.png') == png_bytes

            combined_absolute = os.path.join(temp, 'combined.html')
            run(combined_absolute + ' out4.png')
            assert read_file('out4.png') == png_bytes

            combined_url = path2url(os.path.join(temp, 'combined.html'))
            run(combined_url + ' out5.png')
            assert read_file('out5.png') == png_bytes

            run('linked.html out6.png')  # test relative URLs
            assert read_file('out6.png') == png_bytes

            run('combined.html out7 -f png')
            run('combined.html out8 --format pdf')
            assert read_file('out7') == png_bytes
            _assert_equivalent_pdf(read_file('out8'), pdf_bytes)

            run('no_css.html out9.png')
            run('no_css.html out10.png -s style.css')
            assert read_file('out9.png') != png_bytes
            assert read_file('out10.png') == png_bytes

            stdout = run('--format png combined.html -')
            assert stdout == png_bytes

            run('- out11.png', stdin=combined)
            check_png_pattern(read_file('out11.png'))
            assert read_file('out11.png') == png_bytes

            stdout = run('--format png - -', stdin=combined)
            assert stdout == png_bytes

            run('combined.html out13.png --media-type screen')
            run('combined.html out12.png -m screen')
            run('linked.html out14.png -m screen')
            assert read_file('out12.png') == rotated_png_bytes
            assert read_file('out13.png') == rotated_png_bytes
            assert read_file('out14.png') == rotated_png_bytes

            stdout = run('-f pdf combined.html -')
            assert stdout.count(b'attachment') == 0
            stdout = run('-f pdf -a pattern.png combined.html -')
            assert stdout.count(b'attachment') == 1
            stdout = run('-f pdf -a style.css -a pattern.png combined.html -')
            assert stdout.count(b'attachment') == 2

            stdout = run('-f png -r 192 linked.html -')
            assert stdout == x2_png_bytes
            stdout = run('-f png --resolution 192 linked.html -')
            assert run('linked.html - -f png --resolution 192') == x2_png_bytes
            assert stdout == x2_png_bytes

            os.mkdir('subdirectory')
            os.chdir('subdirectory')
            with capture_logs() as logs:
                stdout = run('--format png - -', stdin=combined)
            assert len(logs) == 1
            assert logs[0].startswith('ERROR: Failed to load image')
            assert stdout == empty_png_bytes

            stdout = run('--format png --base-url .. - -', stdin=combined)
            assert stdout == png_bytes


@assert_no_logs
def test_unicode_filenames():
    """Test non-ASCII filenames both in Unicode or bytes form."""
    # Replicate pattern.png in CSS so that base_url does not matter.
    html = b'''
        <style>
            @page { margin: 2px; size: 8px; background: #fff }
            html { background: #00f; }
            body { background: #f00; width: 1px; height: 1px }
        </style>
        <body>
    '''
    png_bytes = FakeHTML(string=html).write_png()
    check_png_pattern(png_bytes)
    # Remember we have __future__.unicode_literals
    unicode_filename = 'Unicödé'
    if sys.platform.startswith('darwin'):
        unicode_filename = unicodedata.normalize('NFD', unicode_filename)
    with temp_directory() as temp:
        with chdir(temp):
            write_file(unicode_filename, html)
            assert os.listdir('.') == [unicode_filename]
            # This should be independent of the encoding used by the filesystem
            bytes_filename, = os.listdir(b'.')

            assert FakeHTML(unicode_filename).write_png() == png_bytes
            assert FakeHTML(bytes_filename).write_png() == png_bytes

            os.remove(unicode_filename)
            assert os.listdir('.') == []

            FakeHTML(string=html).write_png(unicode_filename)
            assert read_file(bytes_filename) == png_bytes

            # Surface.write_to_png does not accept bytes filenames
            # on Python 3
            if sys.version_info[0] < 3:
                os.remove(unicode_filename)
                assert os.listdir('.') == []

                FakeHTML(string=html).write_png(bytes_filename)
                assert read_file(unicode_filename) == png_bytes


@assert_no_logs
def test_low_level_api():
    html = FakeHTML(string='<body>')
    css = CSS(string='''
        @page { margin: 2px; size: 8px; background: #fff }
        html { background: #00f; }
        body { background: #f00; width: 1px; height: 1px }
    ''')
    pdf_bytes = html.write_pdf(stylesheets=[css])
    assert pdf_bytes.startswith(b'%PDF')
    _assert_equivalent_pdf(html.render([css]).write_pdf(), pdf_bytes)

    png_bytes = html.write_png(stylesheets=[css])
    document = html.render([css], enable_hinting=True)
    page, = document.pages
    assert page.width == 8
    assert page.height == 8
    assert document.write_png() == (png_bytes, 8, 8)
    assert document.copy([page]).write_png() == (png_bytes, 8, 8)

    surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, 8, 8)
    page.paint(cairo.Context(surface))
    file_obj = io.BytesIO()
    surface.write_to_png(file_obj)
    check_png_pattern(file_obj.getvalue())

    surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, 8, 8)
    context = cairo.Context(surface)
    # Rotate at the center
    context.translate(4, 4)
    context.rotate(-math.pi / 2)
    context.translate(-4, -4)
    page.paint(context)
    file_obj = io.BytesIO()
    surface.write_to_png(file_obj)
    check_png_pattern(file_obj.getvalue(), rotated=True)

    document = html.render([css], enable_hinting=True)
    page, = document.pages
    assert (page.width, page.height) == (8, 8)
    png_bytes, width, height = document.write_png(resolution=192)
    assert (width, height) == (16, 16)
    check_png_pattern(png_bytes, x2=True)

    def png_size(result):
        png_bytes, width, height = result
        surface = cairo.ImageSurface.create_from_png(io.BytesIO(png_bytes))
        assert (surface.get_width(), surface.get_height()) == (width, height)
        return width, height

    document = html.render([css], enable_hinting=True)
    page, = document.pages
    assert (page.width, page.height) == (8, 8)
    # A resolution that is not multiple of 96:
    assert png_size(document.write_png(resolution=145.2)) == (13, 13)

    document = FakeHTML(string='''
        <style>
            @page:first { size: 5px 10px } @page { size: 6px 4px }
            p { page-break-before: always }
        </style>
        <p></p>
        <p></p>
    ''').render()
    page_1, page_2 = document.pages
    assert (page_1.width, page_1.height) == (5, 10)
    assert (page_2.width, page_2.height) == (6, 4)

    result = document.write_png()
    # (Max of both widths, Sum of both heights)
    assert png_size(result) == (6, 14)
    assert document.copy([page_1, page_2]).write_png() == result
    assert png_size(document.copy([page_1]).write_png()) == (5, 10)
    assert png_size(document.copy([page_2]).write_png()) == (6, 4)


def round_meta(pages):
    """Eliminate errors of floating point arithmetic for metadata.
    (eg. 49.99999999999994 instead of 50)

    """
    for page in pages:
        anchors = page.anchors
        for anchor_name, (pos_x, pos_y) in iteritems(anchors):
            anchors[anchor_name] = round(pos_x, 6), round(pos_y, 6)
        links = page.links
        for i, link in enumerate(links):
            link_type, target, (pos_x, pos_y, width, height) = link
            link = (
                link_type, target, (round(pos_x, 6), round(pos_y, 6),
                                    round(width, 6), round(height, 6)))
            links[i] = link
        bookmarks = page.bookmarks
        for i, (level, label, (pos_x, pos_y)) in enumerate(bookmarks):
            bookmarks[i] = level, label, (round(pos_x, 6), round(pos_y, 6))


@assert_no_logs
def test_bookmarks():
    def assert_bookmarks(html, expected_by_page, expected_tree, round=False):
        document = FakeHTML(string=html).render()
        if round:
            round_meta(document.pages)
        assert [p.bookmarks for p in document.pages] == expected_by_page
        assert document.make_bookmark_tree() == expected_tree
    assert_bookmarks('''
        <style>* { height: 10px }</style>
        <h1>a</h1>
        <h4 style="page-break-after: always">b</h4>
        <h3 style="position: relative; top: 2px; left: 3px">c</h3>
        <h2>d</h2>
        <h1>e</h1>
    ''', [
        [(1, 'a', (0, 0)), (4, 'b', (0, 10))],
        [(3, 'c', (3, 2)), (2, 'd', (0, 10)), (1, 'e', (0, 20))],
    ], [
        ('a', (0, 0, 0), [
            ('b', (0, 0, 10), []),
            ('c', (1, 3, 2), []),
            ('d', (1, 0, 10), [])]),
        ('e', (1, 0, 20), []),
    ])
    assert_bookmarks('''
        <style>
            * { height: 90px; margin: 0 0 10px 0 }
        </style>
        <h1>Title 1</h1>
        <h1>Title 2</h1>
        <h2 style="position: relative; left: 20px">Title 3</h2>
        <h2>Title 4</h2>
        <h3>Title 5</h3>
        <span style="display: block; page-break-before: always"></span>
        <h2>Title 6</h2>
        <h1>Title 7</h1>
        <h2>Title 8</h2>
        <h3>Title 9</h3>
        <h1>Title 10</h1>
        <h2>Title 11</h2>
    ''', [
        [
            (1, 'Title 1', (0, 0)),
            (1, 'Title 2', (0, 100)),
            (2, 'Title 3', (20, 200)),
            (2, 'Title 4', (0, 300)),
            (3, 'Title 5', (0, 400))
        ], [
            (2, 'Title 6', (0, 100)),
            (1, 'Title 7', (0, 200)),
            (2, 'Title 8', (0, 300)),
            (3, 'Title 9', (0, 400)),
            (1, 'Title 10', (0, 500)),
            (2, 'Title 11', (0, 600))
        ],
    ], [
        ('Title 1', (0, 0, 0), []),
        ('Title 2', (0, 0, 100), [
            ('Title 3', (0, 20, 200), []),
            ('Title 4', (0, 0, 300), [
                ('Title 5', (0, 0, 400), [])]),
            ('Title 6', (1, 0, 100), [])]),
        ('Title 7', (1, 0, 200), [
            ('Title 8', (1, 0, 300), [
                ('Title 9', (1, 0, 400), [])])]),
        ('Title 10', (1, 0, 500), [
            ('Title 11', (1, 0, 600), [])]),
    ])
    assert_bookmarks('''
        <style>* { height: 10px }</style>
        <h2>A</h2> <p>depth 1</p>
        <h4>B</h4> <p>depth 2</p>
        <h2>C</h2> <p>depth 1</p>
        <h3>D</h3> <p>depth 2</p>
        <h4>E</h4> <p>depth 3</p>
    ''', [[
        (2, 'A', (0, 0)),
        (4, 'B', (0, 20)),
        (2, 'C', (0, 40)),
        (3, 'D', (0, 60)),
        (4, 'E', (0, 80)),
    ]], [
        ('A', (0, 0, 0), [
            ('B', (0, 0, 20), [])]),
        ('C', (0, 0, 40), [
            ('D', (0, 0, 60), [
                ('E', (0, 0, 80), [])])]),
    ])
    assert_bookmarks('''
        <style>* { height: 10px; font-size: 0 }</style>
        <h2>A</h2> <p>h2 depth 1</p>
        <h4>B</h4> <p>h4 depth 2</p>
        <h3>C</h3> <p>h3 depth 2</p>
        <h5>D</h5> <p>h5 depth 3</p>
        <h1>E</h1> <p>h1 depth 1</p>
        <h2>F</h2> <p>h2 depth 2</p>
        <h2>G</h2> <p>h2 depth 2</p>
        <h4>H</h4> <p>h4 depth 3</p>
        <h1>I</h1> <p>h1 depth 1</p>
    ''', [[
        (2, 'A', (0, 0)),
        (4, 'B', (0, 20)),
        (3, 'C', (0, 40)),
        (5, 'D', (0, 60)),
        (1, 'E', (0, 70)),
        (2, 'F', (0, 90)),
        (2, 'G', (0, 110)),
        (4, 'H', (0, 130)),
        (1, 'I', (0, 150)),
    ]], [
        ('A', (0, 0, 0), [
            ('B', (0, 0, 20), []),
            ('C', (0, 0, 40), [
                ('D', (0, 0, 60), [])])]),
        ('E', (0, 0, 70), [
            ('F', (0, 0, 90), []),
            ('G', (0, 0, 110), [
                ('H', (0, 0, 130), [])])]),
        ('I', (0, 0, 150), []),
    ])
    assert_bookmarks('<h1>é', [[(1, 'é', (0, 0))]], [('é', (0, 0, 0), [])])
    assert_bookmarks('''
        <h1 style="transform: translateX(50px)">!
    ''', [[(1, '!', (50, 0))]], [('!', (0, 50, 0), [])])
    assert_bookmarks('''
        <h1 style="transform-origin: 0 0;
                   transform: rotate(90deg) translateX(50px)">!
    ''', [[(1, '!', (0, 50))]], [('!', (0, 0, 50), [])], round=True)
    assert_bookmarks('''
        <body style="transform-origin: 0 0; transform: rotate(90deg)">
        <h1 style="transform: translateX(50px)">!
    ''', [[(1, '!', (0, 50))]], [('!', (0, 0, 50), [])], round=True)


@assert_no_logs
def test_links():
    def assert_links(html, expected_links_by_page, expected_anchors_by_page,
                     expected_resolved_links,
                     base_url=resource_filename('<inline HTML>'),
                     warnings=(), round=False):
        with capture_logs() as logs:
            document = FakeHTML(string=html, base_url=base_url).render()
            if round:
                round_meta(document.pages)
            resolved_links = list(document.resolve_links())
        assert len(logs) == len(warnings)
        for message, expected in zip(logs, warnings):
            assert expected in message
        assert [p.links for p in document.pages] == expected_links_by_page
        assert [p.anchors for p in document.pages] == expected_anchors_by_page
        assert resolved_links == expected_resolved_links

    assert_links('''
        <style>
            body { font-size: 10px; line-height: 2; width: 200px }
            p { height: 90px; margin: 0 0 10px 0 }
            img { width: 30px; vertical-align: top }
        </style>
        <p><a href="http://weasyprint.org"><img src=pattern.png></a></p>
        <p style="padding: 0 10px"><a
            href="#lipsum"><img style="border: solid 1px"
                                src=pattern.png></a></p>
        <p id=hello>Hello, World</p>
        <p id=lipsum>
            <a style="display: block; page-break-before: always; height: 30px"
               href="#hel%6Co"></a>
        </p>
    ''', [
        [
            ('external', 'http://weasyprint.org', (0, 0, 30, 20)),
            ('external', 'http://weasyprint.org', (0, 0, 30, 30)),
            ('internal', 'lipsum', (10, 100, 32, 20)),
            ('internal', 'lipsum', (10, 100, 32, 32))
        ],
        [('internal', 'hello', (0, 0, 200, 30))],
    ], [
        {'hello': (0, 200)},
        {'lipsum': (0, 0)}
    ], [
        [
            ('external', 'http://weasyprint.org', (0, 0, 30, 20)),
            ('external', 'http://weasyprint.org', (0, 0, 30, 30)),
            ('internal', (1, 0, 0), (10, 100, 32, 20)),
            ('internal', (1, 0, 0), (10, 100, 32, 32))
        ],
        [('internal', (0, 0, 200), (0, 0, 200, 30))],
    ])

    assert_links(
        '''
            <body style="width: 200px">
            <a href="../lipsum/é_%E9" style="display: block; margin: 10px 5px">
        ''', [[('external', 'http://weasyprint.org/foo/lipsum/%C3%A9_%E9',
                (5, 10, 190, 0))]],
        [{}], [[('external', 'http://weasyprint.org/foo/lipsum/%C3%A9_%E9',
                 (5, 10, 190, 0))]],
        base_url='http://weasyprint.org/foo/bar/')
    assert_links(
        '''
            <body style="width: 200px">
            <div style="display: block; margin: 10px 5px;
                        -weasy-link: url(../lipsum/é_%E9)">
        ''', [[('external', 'http://weasyprint.org/foo/lipsum/%C3%A9_%E9',
                (5, 10, 190, 0))]],
        [{}], [[('external', 'http://weasyprint.org/foo/lipsum/%C3%A9_%E9',
                 (5, 10, 190, 0))]],
        base_url='http://weasyprint.org/foo/bar/')

    # Relative URI reference without a base URI: allowed for links
    assert_links(
        '''
            <body style="width: 200px">
            <a href="../lipsum" style="display: block; margin: 10px 5px">
        ''', [[('external', '../lipsum', (5, 10, 190, 0))]], [{}],
        [[('external', '../lipsum', (5, 10, 190, 0))]], base_url=None)

    # Relative URI reference without a base URI: not supported for -weasy-link
    assert_links(
        '''
            <body style="width: 200px">
            <div style="-weasy-link: url(../lipsum);
                        display: block; margin: 10px 5px">
        ''', [[]], [{}], [[]], base_url=None, warnings=[
            'WARNING: Ignored `-weasy-link: url("../lipsum")` at 1:1, '
            'Relative URI reference without a base URI'])

    # Internal or absolute URI reference without a base URI: OK
    assert_links(
        '''
            <body style="width: 200px">
            <a href="#lipsum" id="lipsum"
                style="display: block; margin: 10px 5px"></a>
            <a href="http://weasyprint.org/" style="display: block"></a>
        ''', [[('internal', 'lipsum', (5, 10, 190, 0)),
               ('external', 'http://weasyprint.org/', (0, 10, 200, 0))]],
        [{'lipsum': (5, 10)}],
        [[('internal', (0, 5, 10), (5, 10, 190, 0)),
          ('external', 'http://weasyprint.org/', (0, 10, 200, 0))]],
        base_url=None)

    assert_links(
        '''
            <body style="width: 200px">
            <div style="-weasy-link: url(#lipsum);
                        margin: 10px 5px" id="lipsum">
        ''',
        [[('internal', 'lipsum', (5, 10, 190, 0))]],
        [{'lipsum': (5, 10)}],
        [[('internal', (0, 5, 10), (5, 10, 190, 0))]],
        base_url=None)

    assert_links(
        '''
            <style> a { display: block; height: 15px } </style>
            <body style="width: 200px">
                <a href="#lipsum"></a>
                <a href="#missing" id="lipsum"></a>
        ''',
        [[('internal', 'lipsum', (0, 0, 200, 15)),
          ('internal', 'missing', (0, 15, 200, 15))]],
        [{'lipsum': (0, 15)}],
        [[('internal', (0, 0, 15), (0, 0, 200, 15))]],
        base_url=None,
        warnings=[
            'ERROR: No anchor #missing for internal URI reference'])

    assert_links(
        '''
            <body style="width: 100px; transform: translateY(100px)">
            <a href="#lipsum" id="lipsum" style="display: block; height: 20px;
                transform: rotate(90deg) scale(2)">
        ''',
        [[('internal', 'lipsum', (30, 10, 40, 200))]],
        [{'lipsum': (70, 10)}],
        [[('internal', (0, 70, 10), (30, 10, 40, 200))]],
        round=True)


def wsgi_client(path_info, qs_args=None):
    start_response_calls = []

    def start_response(status, headers):
        start_response_calls.append((status, headers))
    environ = {'PATH_INFO': path_info,
               'QUERY_STRING': urlencode(qs_args or {})}
    response = b''.join(navigator.app(environ, start_response))
    assert len(start_response_calls) == 1
    status, headers = start_response_calls[0]
    return status, dict(headers), response


@assert_no_logs
def test_navigator():
    with temp_directory() as temp:
        status, headers, body = wsgi_client('/favicon.ico')
        assert status == '200 OK'
        assert headers['Content-Type'] == 'image/x-icon'
        assert body == read_file(navigator.FAVICON)

        status, headers, body = wsgi_client('/lipsum')
        assert status == '404 Not Found'

        status, headers, body = wsgi_client('/')
        body = body.decode('utf8')
        assert status == '200 OK'
        assert headers['Content-Type'].startswith('text/html;')
        assert '<title>WeasyPrint Navigator</title>' in body
        assert '<img' not in body
        assert '></a>' not in body

        filename = os.path.join(temp, 'test.html')
        write_file(filename, b'''
            <h1 id=foo><a href="http://weasyprint.org">Lorem ipsum</a></h1>
            <h2><a href="#foo">bar</a></h2>
        ''')

        url = path2url(filename)
        for status, headers, body in [
            wsgi_client('/view/' + url),
            wsgi_client('/', {'url': url}),
        ]:
            body = body.decode('utf8')
            assert status == '200 OK'
            assert headers['Content-Type'].startswith('text/html;')
            assert '<title>WeasyPrint Navigator</title>' in body
            assert '<img src="data:image/png;base64,' in body
            assert ' name="foo"></a>' in body
            assert ' href="#foo"></a>' in body
            assert ' href="/view/http://weasyprint.org"></a>' in body

        status, headers, body = wsgi_client('/pdf/' + url)
        assert status == '200 OK'
        assert headers['Content-Type'] == 'application/pdf'
        pdf = PdfReader(fdata=body)
        assert pdf.Root.Pages.Kids[0].Annots[0].A == {
            '/Type': '/Action', '/URI': '(http://weasyprint.org)',
            '/S': '/URI'}
        assert pdf.Root.Outlines.First.Title == '(Lorem ipsum)'
        assert pdf.Root.Outlines.Last.Title == '(Lorem ipsum)'


# Make relative URL references work with our custom URL scheme.
urlparse_uses_relative.append('weasyprint-custom')


@assert_no_logs
def test_url_fetcher():
    pattern_png = read_file(resource_filename('pattern.png'))

    def fetcher(url):
        if url == 'weasyprint-custom:foo/%C3%A9_%e9_pattern':
            return dict(string=pattern_png, mime_type='image/png')
        elif url == 'weasyprint-custom:foo/bar.css':
            return dict(string='body { background: url(é_%e9_pattern)',
                        mime_type='text/css')
        else:
            return default_url_fetcher(url)

    base_url = resource_filename('dummy.html')
    css = CSS(string='''
        @page { size: 8px; margin: 2px; background: #fff }
        body { margin: 0; font-size: 0 }
    ''', base_url=base_url)

    def test(html, blank=False):
        html = FakeHTML(string=html, url_fetcher=fetcher, base_url=base_url)
        check_png_pattern(html.write_png(stylesheets=[css]), blank=blank)

    test('<body><img src="pattern.png">')  # Test a "normal" URL
    test('<body><img src="weasyprint-custom:foo/é_%e9_pattern">')
    test('<body style="background: url(weasyprint-custom:foo/é_%e9_pattern)">')
    test('<body><li style="list-style: inside '
         'url(weasyprint-custom:foo/é_%e9_pattern)">')
    test('<link rel=stylesheet href="weasyprint-custom:foo/bar.css"><body>')
    test('<style>@import "weasyprint-custom:foo/bar.css";</style><body>')

    with capture_logs() as logs:
        test('<body><img src="custom:foo/bar">', blank=True)
    assert len(logs) == 1
    assert logs[0].startswith(
        'ERROR: Failed to load image at "custom:foo/bar"')

    def fetcher_2(url):
        assert url == 'weasyprint-custom:%C3%A9_%e9.css'
        return dict(string='', mime_type='text/css')
    FakeHTML(string='<link rel=stylesheet href="weasyprint-custom:'
                    'é_%e9.css"><body>', url_fetcher=fetcher_2).render()


@assert_no_logs
def test_html_meta():
    def assert_meta(html, **meta):
        meta.setdefault('title', None)
        meta.setdefault('authors', [])
        meta.setdefault('keywords', [])
        meta.setdefault('generator', None)
        meta.setdefault('description', None)
        meta.setdefault('created', None)
        meta.setdefault('modified', None)
        meta.setdefault('attachments', [])
        assert vars(FakeHTML(string=html).render().metadata) == meta

    assert_meta('<body>')
    assert_meta(
        '''
            <meta name=author content="I Me &amp; Myself">
            <meta name=author content="Smith, John">
            <title>Test document</title>
            <h1>Another title</h1>
            <meta name=generator content="Human after all">
            <meta name=dummy content=ignored>
            <meta name=dummy>
            <meta content=ignored>
            <meta>
            <meta name=keywords content="html ,\tcss,
                                         pdf,css">
            <meta name=dcterms.created content=2011-04>
            <meta name=dcterms.created content=2011-05>
            <meta name=dcterms.modified content=2013>
            <meta name=keywords content="Python; cairo">
            <meta name=description content="Blah… ">
        ''',
        authors=['I Me & Myself', 'Smith, John'],
        title='Test document',
        generator='Human after all',
        keywords=['html', 'css', 'pdf', 'Python; cairo'],
        description="Blah… ",
        created='2011-04',
        modified='2013')
    assert_meta(
        '''
            <title>One</title>
            <meta name=Author>
            <title>Two</title>
            <title>Three</title>
            <meta name=author content=Me>
        ''',
        title='One',
        authors=['', 'Me'])


@assert_no_logs
def test_http():
    def gzip_compress(data):
        file_obj = io.BytesIO()
        gzip_file = gzip.GzipFile(fileobj=file_obj, mode='wb')
        gzip_file.write(data)
        gzip_file.close()
        return file_obj.getvalue()

    with http_server({
        '/gzip': lambda env: (
            (gzip_compress(b'<html test=ok>'), [('Content-Encoding', 'gzip')])
            if 'gzip' in env.get('HTTP_ACCEPT_ENCODING', '') else
            (b'<html test=accept-encoding-header-fail>', [])
        ),
        '/deflate': lambda env: (
            (zlib.compress(b'<html test=ok>'),
             [('Content-Encoding', 'deflate')])
            if 'deflate' in env.get('HTTP_ACCEPT_ENCODING', '') else
            (b'<html test=accept-encoding-header-fail>', [])
        ),
        '/raw-deflate': lambda env: (
            # Remove zlib header and checksum
            (zlib.compress(b'<html test=ok>')[2:-4],
             [('Content-Encoding', 'deflate')])
            if 'deflate' in env.get('HTTP_ACCEPT_ENCODING', '') else
            (b'<html test=accept-encoding-header-fail>', [])
        ),
    }) as root_url:
        assert HTML(root_url + '/gzip').etree_element.get('test') == 'ok'
        assert HTML(root_url + '/deflate').etree_element.get('test') == 'ok'
        assert HTML(
            root_url + '/raw-deflate').etree_element.get('test') == 'ok'