commit 43611e54adbe6116d6b7c7b458e8d41f2e4060a1 Author: Scott Kitterman Date: Wed Dec 25 07:16:42 2019 +0100 Import weasyprint_51.orig.tar.gz [dgit import orig weasyprint_51.orig.tar.gz] diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..6824298e --- /dev/null +++ b/LICENSE @@ -0,0 +1,29 @@ +BSD 3-Clause License + +Copyright (c) 2011-2019, Simon Sapin and contributors (see AUTHORS). +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/PKG-INFO b/PKG-INFO new file mode 100644 index 00000000..eecc5fb5 --- /dev/null +++ b/PKG-INFO @@ -0,0 +1,58 @@ +Metadata-Version: 2.1 +Name: WeasyPrint +Version: 51 +Summary: The Awesome Document Factory +Home-page: https://weasyprint.org/ +Author: Simon Sapin +Author-email: community@kozea.fr +License: BSD +Project-URL: Documentation, https://weasyprint.readthedocs.io/ +Project-URL: Code, https://github.com/Kozea/WeasyPrint/ +Project-URL: Issue tracker, https://github.com/Kozea/WeasyPrint/issues +Project-URL: Donation, https://www.patreon.com/kozea +Description: ========== + WeasyPrint + ========== + + **The Awesome Document Factory** + + WeasyPrint is a smart solution helping web developers to create PDF + documents. It turns simple HTML pages into gorgeous statistical reports, + invoices, tickets… + + From a technical point of view, WeasyPrint is a visual rendering engine for + HTML and CSS that can export to PDF and PNG. It aims to support web standards + for printing. WeasyPrint is free software made available under a BSD license. + + It is based on various libraries but *not* on a full rendering engine like + WebKit or Gecko. The CSS layout engine is written in Python, designed for + pagination, and meant to be easy to hack on. + + * Free software: BSD licensed + * Python 3.5+ + * Website: https://weasyprint.org/ + * Documentation: https://weasyprint.readthedocs.io/ + * Source code and issue tracker: https://github.com/Kozea/WeasyPrint + * Tests: https://travis-ci.org/Kozea/WeasyPrint + * Support: https://www.patreon.com/kozea + +Keywords: html,css,pdf,converter +Platform: Linux +Platform: macOS +Platform: Windows +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: BSD License +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.5 +Classifier: Programming Language :: Python :: 3.6 +Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Topic :: Internet :: WWW/HTTP +Classifier: Topic :: Text Processing :: Markup :: HTML +Classifier: Topic :: Multimedia :: Graphics :: Graphics Conversion +Classifier: Topic :: Printing +Requires-Python: >=3.5 +Description-Content-Type: text/x-rst +Provides-Extra: doc +Provides-Extra: test diff --git a/README.rst b/README.rst new file mode 100644 index 00000000..e661b9fc --- /dev/null +++ b/README.rst @@ -0,0 +1,25 @@ +========== +WeasyPrint +========== + +**The Awesome Document Factory** + +WeasyPrint is a smart solution helping web developers to create PDF +documents. It turns simple HTML pages into gorgeous statistical reports, +invoices, tickets… + +From a technical point of view, WeasyPrint is a visual rendering engine for +HTML and CSS that can export to PDF and PNG. It aims to support web standards +for printing. WeasyPrint is free software made available under a BSD license. + +It is based on various libraries but *not* on a full rendering engine like +WebKit or Gecko. The CSS layout engine is written in Python, designed for +pagination, and meant to be easy to hack on. + +* Free software: BSD licensed +* Python 3.5+ +* Website: https://weasyprint.org/ +* Documentation: https://weasyprint.readthedocs.io/ +* Source code and issue tracker: https://github.com/Kozea/WeasyPrint +* Tests: https://travis-ci.org/Kozea/WeasyPrint +* Support: https://www.patreon.com/kozea diff --git a/WeasyPrint.egg-info/PKG-INFO b/WeasyPrint.egg-info/PKG-INFO new file mode 100644 index 00000000..eecc5fb5 --- /dev/null +++ b/WeasyPrint.egg-info/PKG-INFO @@ -0,0 +1,58 @@ +Metadata-Version: 2.1 +Name: WeasyPrint +Version: 51 +Summary: The Awesome Document Factory +Home-page: https://weasyprint.org/ +Author: Simon Sapin +Author-email: community@kozea.fr +License: BSD +Project-URL: Documentation, https://weasyprint.readthedocs.io/ +Project-URL: Code, https://github.com/Kozea/WeasyPrint/ +Project-URL: Issue tracker, https://github.com/Kozea/WeasyPrint/issues +Project-URL: Donation, https://www.patreon.com/kozea +Description: ========== + WeasyPrint + ========== + + **The Awesome Document Factory** + + WeasyPrint is a smart solution helping web developers to create PDF + documents. It turns simple HTML pages into gorgeous statistical reports, + invoices, tickets… + + From a technical point of view, WeasyPrint is a visual rendering engine for + HTML and CSS that can export to PDF and PNG. It aims to support web standards + for printing. WeasyPrint is free software made available under a BSD license. + + It is based on various libraries but *not* on a full rendering engine like + WebKit or Gecko. The CSS layout engine is written in Python, designed for + pagination, and meant to be easy to hack on. + + * Free software: BSD licensed + * Python 3.5+ + * Website: https://weasyprint.org/ + * Documentation: https://weasyprint.readthedocs.io/ + * Source code and issue tracker: https://github.com/Kozea/WeasyPrint + * Tests: https://travis-ci.org/Kozea/WeasyPrint + * Support: https://www.patreon.com/kozea + +Keywords: html,css,pdf,converter +Platform: Linux +Platform: macOS +Platform: Windows +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: BSD License +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.5 +Classifier: Programming Language :: Python :: 3.6 +Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Topic :: Internet :: WWW/HTTP +Classifier: Topic :: Text Processing :: Markup :: HTML +Classifier: Topic :: Multimedia :: Graphics :: Graphics Conversion +Classifier: Topic :: Printing +Requires-Python: >=3.5 +Description-Content-Type: text/x-rst +Provides-Extra: doc +Provides-Extra: test diff --git a/WeasyPrint.egg-info/SOURCES.txt b/WeasyPrint.egg-info/SOURCES.txt new file mode 100644 index 00000000..1f929b0f --- /dev/null +++ b/WeasyPrint.egg-info/SOURCES.txt @@ -0,0 +1,124 @@ +LICENSE +README.rst +setup.cfg +setup.py +WeasyPrint.egg-info/PKG-INFO +WeasyPrint.egg-info/SOURCES.txt +WeasyPrint.egg-info/dependency_links.txt +WeasyPrint.egg-info/entry_points.txt +WeasyPrint.egg-info/not-zip-safe +WeasyPrint.egg-info/requires.txt +WeasyPrint.egg-info/top_level.txt +weasyprint/VERSION +weasyprint/__init__.py +weasyprint/__main__.py +weasyprint/document.py +weasyprint/draw.py +weasyprint/fonts.py +weasyprint/html.py +weasyprint/images.py +weasyprint/logger.py +weasyprint/pdf.py +weasyprint/stacking.py +weasyprint/text.py +weasyprint/urls.py +weasyprint/css/__init__.py +weasyprint/css/computed_values.py +weasyprint/css/html5_ph.css +weasyprint/css/html5_ua.css +weasyprint/css/media_queries.py +weasyprint/css/properties.py +weasyprint/css/targets.py +weasyprint/css/tests_ua.css +weasyprint/css/utils.py +weasyprint/css/validation/__init__.py +weasyprint/css/validation/descriptors.py +weasyprint/css/validation/expanders.py +weasyprint/css/validation/properties.py +weasyprint/formatting_structure/__init__.py +weasyprint/formatting_structure/boxes.py +weasyprint/formatting_structure/build.py +weasyprint/formatting_structure/counters.py +weasyprint/layout/__init__.py +weasyprint/layout/absolute.py +weasyprint/layout/backgrounds.py +weasyprint/layout/blocks.py +weasyprint/layout/columns.py +weasyprint/layout/flex.py +weasyprint/layout/float.py +weasyprint/layout/inlines.py +weasyprint/layout/min_max.py +weasyprint/layout/pages.py +weasyprint/layout/percentages.py +weasyprint/layout/preferred.py +weasyprint/layout/replaced.py +weasyprint/layout/tables.py +weasyprint/tests/__init__.py +weasyprint/tests/test_acid2.py +weasyprint/tests/test_api.py +weasyprint/tests/test_boxes.py +weasyprint/tests/test_css.py +weasyprint/tests/test_css_descriptors.py +weasyprint/tests/test_css_validation.py +weasyprint/tests/test_float.py +weasyprint/tests/test_fonts.py +weasyprint/tests/test_pdf.py +weasyprint/tests/test_presentational_hints.py +weasyprint/tests/test_stacking.py +weasyprint/tests/test_target.py +weasyprint/tests/test_text.py +weasyprint/tests/test_tools.py +weasyprint/tests/test_unicode.py +weasyprint/tests/test_variables.py +weasyprint/tests/testing_utils.py +weasyprint/tests/resources/AHEM____.TTF +weasyprint/tests/resources/acid2-reference.html +weasyprint/tests/resources/acid2-test.html +weasyprint/tests/resources/blue.jpg +weasyprint/tests/resources/doc1.html +weasyprint/tests/resources/doc1_UTF-16BE.html +weasyprint/tests/resources/icon.png +weasyprint/tests/resources/latin1-test.css +weasyprint/tests/resources/logo_small.png +weasyprint/tests/resources/mini_ua.css +weasyprint/tests/resources/pattern.gif +weasyprint/tests/resources/pattern.palette.png +weasyprint/tests/resources/pattern.png +weasyprint/tests/resources/pattern.svg +weasyprint/tests/resources/really-a-png.svg +weasyprint/tests/resources/really-a-svg.png +weasyprint/tests/resources/sheet2.css +weasyprint/tests/resources/user.css +weasyprint/tests/resources/utf8-test.css +weasyprint/tests/resources/weasyprint.otf +weasyprint/tests/resources/sub_directory/sheet1.css +weasyprint/tests/test_draw/__init__.py +weasyprint/tests/test_draw/test_background.py +weasyprint/tests/test_draw/test_before_after.py +weasyprint/tests/test_draw/test_box.py +weasyprint/tests/test_draw/test_column.py +weasyprint/tests/test_draw/test_current_color.py +weasyprint/tests/test_draw/test_gradient.py +weasyprint/tests/test_draw/test_image.py +weasyprint/tests/test_draw/test_list.py +weasyprint/tests/test_draw/test_opacity.py +weasyprint/tests/test_draw/test_overflow.py +weasyprint/tests/test_draw/test_table.py +weasyprint/tests/test_draw/test_text.py +weasyprint/tests/test_draw/test_transform.py +weasyprint/tests/test_draw/test_visibility.py +weasyprint/tests/test_layout/__init__.py +weasyprint/tests/test_layout/test_block.py +weasyprint/tests/test_layout/test_column.py +weasyprint/tests/test_layout/test_flex.py +weasyprint/tests/test_layout/test_image.py +weasyprint/tests/test_layout/test_inline.py +weasyprint/tests/test_layout/test_inline_block.py +weasyprint/tests/test_layout/test_list.py +weasyprint/tests/test_layout/test_page.py +weasyprint/tests/test_layout/test_position.py +weasyprint/tests/test_layout/test_shrink_to_fit.py +weasyprint/tests/test_layout/test_table.py +weasyprint/tools/__init__.py +weasyprint/tools/navigator.py +weasyprint/tools/renderer.py \ No newline at end of file diff --git a/WeasyPrint.egg-info/dependency_links.txt b/WeasyPrint.egg-info/dependency_links.txt new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/WeasyPrint.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/WeasyPrint.egg-info/entry_points.txt b/WeasyPrint.egg-info/entry_points.txt new file mode 100644 index 00000000..66f42c36 --- /dev/null +++ b/WeasyPrint.egg-info/entry_points.txt @@ -0,0 +1,3 @@ +[console_scripts] +weasyprint = weasyprint.__main__:main + diff --git a/WeasyPrint.egg-info/not-zip-safe b/WeasyPrint.egg-info/not-zip-safe new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/WeasyPrint.egg-info/not-zip-safe @@ -0,0 +1 @@ + diff --git a/WeasyPrint.egg-info/requires.txt b/WeasyPrint.egg-info/requires.txt new file mode 100644 index 00000000..418964fb --- /dev/null +++ b/WeasyPrint.egg-info/requires.txt @@ -0,0 +1,18 @@ +setuptools>=39.2.0 +cffi>=0.6 +html5lib>=0.999999999 +cairocffi>=0.9.0 +tinycss2>=1.0.0 +cssselect2>=0.1 +CairoSVG>=2.4.0 +Pyphen>=0.9.1 + +[doc] +sphinx +sphinx_rtd_theme + +[test] +pytest-runner +pytest-cov +pytest-flake8 +pytest-isort diff --git a/WeasyPrint.egg-info/top_level.txt b/WeasyPrint.egg-info/top_level.txt new file mode 100644 index 00000000..b40cf4db --- /dev/null +++ b/WeasyPrint.egg-info/top_level.txt @@ -0,0 +1 @@ +weasyprint diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 00000000..f18b7090 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,111 @@ +[metadata] +name = WeasyPrint +url = https://weasyprint.org/ +version = file: weasyprint/VERSION +license = BSD +license_file = LICENSE +description = The Awesome Document Factory +long_description = file: README.rst +long_description_content_type = text/x-rst +author = Simon Sapin +author_email = community@kozea.fr +platforms = + Linux + macOS + Windows +keywords = + html + css + pdf + converter +classifiers = + Development Status :: 5 - Production/Stable + Intended Audience :: Developers + License :: OSI Approved :: BSD License + Programming Language :: Python :: 3 + Programming Language :: Python :: 3.5 + Programming Language :: Python :: 3.6 + Programming Language :: Python :: 3.7 + Programming Language :: Python :: 3.8 + Topic :: Internet :: WWW/HTTP + Topic :: Text Processing :: Markup :: HTML + Topic :: Multimedia :: Graphics :: Graphics Conversion + Topic :: Printing +project_urls = + Documentation = https://weasyprint.readthedocs.io/ + Code = https://github.com/Kozea/WeasyPrint/ + Issue tracker = https://github.com/Kozea/WeasyPrint/issues + Donation = https://www.patreon.com/kozea + +[options] +packages = find: +zip_safe = false +setup_requires = pytest-runner +install_requires = + setuptools>=39.2.0 + cffi>=0.6 + html5lib>=0.999999999 + cairocffi>=0.9.0 + tinycss2>=1.0.0 + cssselect2>=0.1 + CairoSVG>=2.4.0 + Pyphen>=0.9.1 +tests_require = + pytest-runner + pytest-cov + pytest-flake8 + pytest-isort +python_requires = >= 3.5 + +[options.entry_points] +console-scripts = weasyprint = weasyprint.__main__:main + +[options.package_data] +weasyprint = VERSION +weasyprint.tests = resources/*.*, resources/*/* +weasyprint.css = *.css + +[options.extras_require] +doc = + sphinx + sphinx_rtd_theme +test = + pytest-runner + pytest-cov + pytest-flake8 + pytest-isort + +[bdist_wheel] +python-tag = py3 + +[build_sphinx] +source-dir = docs +build-dir = docs/_build + +[aliases] +test = pytest + +[tool:pytest] +addopts = --flake8 --isort +norecursedirs = build dist .cache .eggs .git + +[coverage:run] +branch = True +include = weasyprint/* + +[coverage:report] +exclude_lines = + pragma: no cover + def __repr__ + raise NotImplementedError +omit = + .* + +[isort] +default_section = THIRDPARTY +multi_line_output = 4 + +[egg_info] +tag_build = +tag_date = 0 + diff --git a/setup.py b/setup.py new file mode 100755 index 00000000..289f69a4 --- /dev/null +++ b/setup.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python + +""" + WeasyPrint + ========== + + WeasyPrint converts web documents to PDF. + + :copyright: Copyright 2011-2019 Simon Sapin and contributors, see AUTHORS. + :license: BSD, see LICENSE for details. + +""" + +import sys + +from setuptools import setup + +if sys.version_info.major < 3: + raise RuntimeError( + 'WeasyPrint does not support Python 2.x anymore. ' + 'Please use Python 3 or install an older version of WeasyPrint.') + +setup() diff --git a/weasyprint/VERSION b/weasyprint/VERSION new file mode 100644 index 00000000..7003e7fe --- /dev/null +++ b/weasyprint/VERSION @@ -0,0 +1 @@ +51 \ No newline at end of file diff --git a/weasyprint/__init__.py b/weasyprint/__init__.py new file mode 100644 index 00000000..7f384b2f --- /dev/null +++ b/weasyprint/__init__.py @@ -0,0 +1,443 @@ +""" + WeasyPrint + ========== + + WeasyPrint converts web documents to PDF. + + The public API is what is accessible from this "root" packages + without importing sub-modules. + + :copyright: Copyright 2011-2019 Simon Sapin and contributors, see AUTHORS. + :license: BSD, see LICENSE for details. + +""" + +import contextlib +import os +import sys +from pathlib import Path + +import cssselect2 +import html5lib +import tinycss2 + +if sys.version_info.major < 3: + raise RuntimeError( + 'WeasyPrint does not support Python 2.x anymore. ' + 'Please use Python 3 or install an older version of WeasyPrint.') + +if hasattr(sys, 'frozen'): + if hasattr(sys, '_MEIPASS'): + # Frozen with PyInstaller + # See https://github.com/Kozea/WeasyPrint/pull/540 + ROOT = Path(sys._MEIPASS) + else: + # Frozen with something else (py2exe, etc.) + # See https://github.com/Kozea/WeasyPrint/pull/269 + ROOT = os.path.dirname(sys.executable) +else: + ROOT = Path(os.path.dirname(__file__)) + +VERSION = __version__ = (ROOT / 'VERSION').read_text().strip() + +# Used for 'User-Agent' in HTTP and 'Creator' in PDF +VERSION_STRING = 'WeasyPrint %s (http://weasyprint.org/)' % VERSION + +__all__ = ['HTML', 'CSS', 'Attachment', 'Document', 'Page', + 'default_url_fetcher', 'VERSION'] + + +# Import after setting the version, as the version is used in other modules +from .urls import ( # noqa isort:skip + fetch, default_url_fetcher, path2url, ensure_url, url_is_absolute) +from .logger import LOGGER, PROGRESS_LOGGER # noqa isort:skip +# Some imports are at the end of the file (after the CSS class) +# to work around circular imports. + + +class HTML(object): + """Represents an HTML document parsed by html5lib. + + You can just create an instance with a positional argument: + ``doc = HTML(something)`` + The class will try to guess if the input is a filename, an absolute URL, + or a :term:`file object`. + + Alternatively, use **one** named argument so that no guessing is involved: + + :type filename: str or pathlib.Path + :param filename: A filename, relative to the current directory, or + absolute. + :type url: str + :param url: An absolute, fully qualified URL. + :type file_obj: :term:`file object` + :param file_obj: Any object with a ``read`` method. + :type string: str + :param string: A string of HTML source. + + Specifying multiple inputs is an error: + ``HTML(filename="foo.html", url="localhost://bar.html")`` + will raise a :obj:`TypeError`. + + You can also pass optional named arguments: + + :type encoding: str + :param encoding: Force the source character encoding. + :type base_url: str + :param base_url: The base used to resolve relative URLs + (e.g. in ````). If not provided, try to use + the input filename, URL, or ``name`` attribute of :term:`file objects + `. + :type url_fetcher: function + :param url_fetcher: A function or other callable + with the same signature as :func:`default_url_fetcher` called to + fetch external resources such as stylesheets and images. + (See :ref:`url-fetchers`.) + :type media_type: str + :param media_type: The media type to use for ``@media``. + Defaults to ``'print'``. **Note:** In some cases like + ``HTML(string=foo)`` relative URLs will be invalid if ``base_url`` + is not provided. + + """ + def __init__(self, guess=None, filename=None, url=None, file_obj=None, + string=None, encoding=None, base_url=None, + url_fetcher=default_url_fetcher, media_type='print'): + PROGRESS_LOGGER.info( + 'Step 1 - Fetching and parsing HTML - %s', + guess or filename or url or + getattr(file_obj, 'name', 'HTML string')) + result = _select_source( + guess, filename, url, file_obj, string, base_url, url_fetcher) + with result as (source_type, source, base_url, protocol_encoding): + if isinstance(source, str): + result = html5lib.parse(source, namespaceHTMLElements=False) + else: + result = html5lib.parse( + source, override_encoding=encoding, + transport_encoding=protocol_encoding, + namespaceHTMLElements=False) + assert result + self.base_url = find_base_url(result, base_url) + self.url_fetcher = url_fetcher + self.media_type = media_type + self.wrapper_element = cssselect2.ElementWrapper.from_html_root( + result, content_language=None) + self.etree_element = self.wrapper_element.etree_element + + def _ua_stylesheets(self): + return [HTML5_UA_STYLESHEET] + + def _ph_stylesheets(self): + return [HTML5_PH_STYLESHEET] + + def _get_metadata(self): + return get_html_metadata(self.wrapper_element, self.base_url) + + def render(self, stylesheets=None, enable_hinting=False, + presentational_hints=False, font_config=None): + """Lay out and paginate the document, but do not (yet) export it + to PDF or PNG. + + This returns a :class:`~document.Document` object which provides + access to individual pages and various meta-data. + See :meth:`write_pdf` to get a PDF directly. + + .. versionadded:: 0.15 + + :type stylesheets: list + :param stylesheets: + An optional list of user stylesheets. List elements are + :class:`CSS` objects, filenames, URLs, or file + objects. (See :ref:`stylesheet-origins`.) + :type enable_hinting: bool + :param enable_hinting: + Whether text, borders and background should be *hinted* to fall + at device pixel boundaries. Should be enabled for pixel-based + output (like PNG) but not for vector-based output (like PDF). + :type presentational_hints: bool + :param presentational_hints: Whether HTML presentational hints are + followed. + :type font_config: :class:`~fonts.FontConfiguration` + :param font_config: A font configuration handling ``@font-face`` rules. + :returns: A :class:`~document.Document` object. + + """ + return Document._render( + self, stylesheets, enable_hinting, presentational_hints, + font_config) + + def write_pdf(self, target=None, stylesheets=None, zoom=1, + attachments=None, presentational_hints=False, + font_config=None): + """Render the document to a PDF file. + + This is a shortcut for calling :meth:`render`, then + :meth:`Document.write_pdf() `. + + :type target: str, pathlib.Path or file object + :param target: + A filename where the PDF file is generated, a file object, or + :obj:`None`. + :type stylesheets: list + :param stylesheets: + An optional list of user stylesheets. The list's elements + are :class:`CSS` objects, filenames, URLs, or file-like + objects. (See :ref:`stylesheet-origins`.) + :type zoom: float + :param zoom: + The zoom factor in PDF units per CSS units. **Warning**: + All CSS units are affected, including physical units like + ``cm`` and named sizes like ``A4``. For values other than + 1, the physical CSS units will thus be "wrong". + :type attachments: list + :param attachments: A list of additional file attachments for the + generated PDF document or :obj:`None`. The list's elements are + :class:`Attachment` objects, filenames, URLs or file-like objects. + :type presentational_hints: bool + :param presentational_hints: Whether HTML presentational hints are + followed. + :type font_config: :class:`~fonts.FontConfiguration` + :param font_config: A font configuration handling ``@font-face`` rules. + :returns: + The PDF as :obj:`bytes` if ``target`` is not provided or + :obj:`None`, otherwise :obj:`None` (the PDF is written to + ``target``). + + """ + return self.render( + stylesheets, enable_hinting=False, + presentational_hints=presentational_hints, + font_config=font_config).write_pdf( + target, zoom, attachments) + + def write_image_surface(self, stylesheets=None, resolution=96, + presentational_hints=False, font_config=None): + """Render pages vertically on a cairo image surface. + + .. versionadded:: 0.17 + + There is no decoration around pages other than those specified in CSS + with ``@page`` rules. The final image is as wide as the widest page. + Each page is below the previous one, centered horizontally. + + This is a shortcut for calling :meth:`render`, then + :meth:`Document.write_image_surface() + `. + + :type stylesheets: list + :param stylesheets: + An optional list of user stylesheets. The list's elements + are :class:`CSS` objects, filenames, URLs, or file-like + objects. (See :ref:`stylesheet-origins`.) + :type resolution: float + :param resolution: + The output resolution in PNG pixels per CSS inch. At 96 dpi + (the default), PNG pixels match the CSS ``px`` unit. + :type presentational_hints: bool + :param presentational_hints: Whether HTML presentational hints are + followed. + :type font_config: :class:`~fonts.FontConfiguration` + :param font_config: A font configuration handling ``@font-face`` rules. + :returns: A cairo :class:`ImageSurface `. + + """ + surface, _width, _height = ( + self.render(stylesheets, enable_hinting=True, + presentational_hints=presentational_hints, + font_config=font_config) + .write_image_surface(resolution)) + return surface + + def write_png(self, target=None, stylesheets=None, resolution=96, + presentational_hints=False, font_config=None): + """Paint the pages vertically to a single PNG image. + + There is no decoration around pages other than those specified in CSS + with ``@page`` rules. The final image is as wide as the widest page. + Each page is below the previous one, centered horizontally. + + This is a shortcut for calling :meth:`render`, then + :meth:`Document.write_png() `. + + :type target: str, pathlib.Path or file object + :param target: + A filename where the PNG file is generated, a file object, or + :obj:`None`. + :type stylesheets: list + :param stylesheets: + An optional list of user stylesheets. The list's elements + are :class:`CSS` objects, filenames, URLs, or file-like + objects. (See :ref:`stylesheet-origins`.) + :type resolution: float + :param resolution: + The output resolution in PNG pixels per CSS inch. At 96 dpi + (the default), PNG pixels match the CSS ``px`` unit. + :type presentational_hints: bool + :param presentational_hints: Whether HTML presentational hints are + followed. + :type font_config: :class:`~fonts.FontConfiguration` + :param font_config: A font configuration handling ``@font-face`` rules. + :returns: + The image as :obj:`bytes` if ``target`` is not provided or + :obj:`None`, otherwise :obj:`None` (the image is written to + ``target``.) + + """ + png_bytes, _width, _height = ( + self.render(stylesheets, enable_hinting=True, + presentational_hints=presentational_hints, + font_config=font_config) + .write_png(target, resolution)) + return png_bytes + + +class CSS(object): + """Represents a CSS stylesheet parsed by tinycss2. + + An instance is created in the same way as :class:`HTML`, with the same + arguments. + + An additional argument called ``font_config`` must be provided to handle + ``@font-config`` rules. The same ``fonts.FontConfiguration`` object must be + used for different ``CSS`` objects applied to the same document. + + ``CSS`` objects have no public attributes or methods. They are only meant + to be used in the :meth:`~HTML.write_pdf`, :meth:`~HTML.write_png` and + :meth:`~HTML.render` methods of :class:`HTML` objects. + + """ + def __init__(self, guess=None, filename=None, url=None, file_obj=None, + string=None, encoding=None, base_url=None, + url_fetcher=default_url_fetcher, _check_mime_type=False, + media_type='print', font_config=None, matcher=None, + page_rules=None): + PROGRESS_LOGGER.info( + 'Step 2 - Fetching and parsing CSS - %s', + filename or url or getattr(file_obj, 'name', 'CSS string')) + result = _select_source( + guess, filename, url, file_obj, string, + base_url=base_url, url_fetcher=url_fetcher, + check_css_mime_type=_check_mime_type) + with result as (source_type, source, base_url, protocol_encoding): + if source_type == 'string' and not isinstance(source, bytes): + # unicode, no encoding + stylesheet = tinycss2.parse_stylesheet(source) + else: + if source_type == 'file_obj': + source = source.read() + stylesheet, encoding = tinycss2.parse_stylesheet_bytes( + source, environment_encoding=encoding, + protocol_encoding=protocol_encoding) + self.base_url = base_url + self.matcher = matcher or cssselect2.Matcher() + self.page_rules = [] if page_rules is None else page_rules + self.fonts = [] + preprocess_stylesheet( + media_type, base_url, stylesheet, url_fetcher, self.matcher, + self.page_rules, self.fonts, font_config) + + +class Attachment(object): + """Represents a file attachment for a PDF document. + + .. versionadded:: 0.22 + + An instance is created in the same way as :class:`HTML`, except that the + HTML specific arguments (``encoding`` and ``media_type``) are not + supported. An optional description can be provided with the ``description`` + argument. + + :param description: A description of the attachment to be included in the + PDF document. May be :obj:`None`. + + """ + def __init__(self, guess=None, filename=None, url=None, file_obj=None, + string=None, base_url=None, url_fetcher=default_url_fetcher, + description=None): + self.source = _select_source( + guess, filename, url, file_obj, string, + base_url=base_url, url_fetcher=url_fetcher) + self.description = description + + +@contextlib.contextmanager +def _select_source(guess=None, filename=None, url=None, file_obj=None, + string=None, base_url=None, url_fetcher=default_url_fetcher, + check_css_mime_type=False): + """ + Check that only one input is not None, and return it with the + normalized ``base_url``. + + """ + if base_url is not None: + base_url = ensure_url(base_url) + + selected_params = [ + param for param in (guess, filename, url, file_obj, string) if + param is not None] + if len(selected_params) != 1: + raise TypeError('Expected exactly one source, got ' + ( + ', '.join(selected_params) or 'nothing' + )) + elif guess is not None: + if hasattr(guess, 'read'): + type_ = 'file_obj' + elif isinstance(guess, Path): + type_ = 'filename' + elif url_is_absolute(guess): + type_ = 'url' + else: + type_ = 'filename' + result = _select_source( + base_url=base_url, url_fetcher=url_fetcher, + check_css_mime_type=check_css_mime_type, + **{type_: guess}) + with result as result: + yield result + elif filename is not None: + if isinstance(filename, Path): + filename = str(filename) + if base_url is None: + base_url = path2url(filename) + with open(filename, 'rb') as file_obj: + yield 'file_obj', file_obj, base_url, None + elif url is not None: + with fetch(url_fetcher, url) as result: + if check_css_mime_type and result['mime_type'] != 'text/css': + LOGGER.error( + 'Unsupported stylesheet type %s for %s', + result['mime_type'], result['redirected_url']) + yield 'string', '', base_url, None + else: + proto_encoding = result.get('encoding') + if base_url is None: + base_url = result.get('redirected_url', url) + if 'string' in result: + yield 'string', result['string'], base_url, proto_encoding + else: + yield ( + 'file_obj', result['file_obj'], base_url, + proto_encoding) + elif file_obj is not None: + if base_url is None: + # filesystem file-like objects have a 'name' attribute. + name = getattr(file_obj, 'name', None) + # Some streams have a .name like '', not a filename. + if name and not name.startswith('<'): + base_url = ensure_url(name) + yield 'file_obj', file_obj, base_url, None + elif string is not None: + yield 'string', string, base_url, None + else: + sources = dict(locals()) + sources_names = ', '.join( + name for name in ('guess', 'filename', 'url', 'file_obj', 'string') + if sources[name] is not None) or 'nothing' + raise TypeError('Expected exactly one source, got ' + sources_names) + +# Work around circular imports. +from .css import preprocess_stylesheet # noqa isort:skip +from .html import ( # noqa isort:skip + HTML5_UA_STYLESHEET, HTML5_PH_STYLESHEET, find_base_url, get_html_metadata) +from .document import Document, Page # noqa isort:skip diff --git a/weasyprint/__main__.py b/weasyprint/__main__.py new file mode 100644 index 00000000..a17dc57e --- /dev/null +++ b/weasyprint/__main__.py @@ -0,0 +1,216 @@ +""" + weasyprint.__main__ + ------------------- + + Command-line interface to WeasyPrint. + + :copyright: Copyright 2011-2019 Simon Sapin and contributors, see AUTHORS. + :license: BSD, see LICENSE for details. + +""" + +import argparse +import logging +import os +import sys + +import cairosvg + +from . import HTML, LOGGER, VERSION +from .text import cairo, pango + + +class PrintInfo(argparse.Action): + def __call__(*_, **__): + uname = os.uname() + print('System:', uname.sysname) + print('Machine:', uname.machine) + print('Version:', uname.version) + print('Release:', uname.release) + print() + print('WeasyPrint version:', VERSION) + print('Python version:', sys.version.split()[0]) + print('Cairo version:', cairo.cairo_version()) + print('Pango version:', pango.pango_version()) + print('CairoSVG version:', cairosvg.__version__) + sys.exit() + + +def main(argv=None, stdout=None, stdin=None): + """The ``weasyprint`` program takes at least two arguments: + + .. code-block:: sh + + weasyprint [options] + + The input is a filename or URL to an HTML document, or ``-`` to read + HTML from stdin. The output is a filename, or ``-`` to write to stdout. + + Options can be mixed anywhere before, between, or after the input and + output. + + .. option:: -e , --encoding + + Force the input character encoding (e.g. ``-e utf8``). + + .. option:: -f , --format + + Choose the output file format among PDF and PNG (e.g. ``-f png``). + Required if the output is not a ``.pdf`` or ``.png`` filename. + + .. option:: -s , --stylesheet + + Filename or URL of a user cascading stylesheet (see + :ref:`stylesheet-origins`) to add to the document + (e.g. ``-s print.css``). Multiple stylesheets are allowed. + + .. option:: -m , --media-type + + Set the media type to use for ``@media``. Defaults to ``print``. + + .. option:: -r , --resolution + + For PNG output only. Set the resolution in PNG pixel per CSS inch. + Defaults to 96, which means that PNG pixels match CSS pixels. + + .. option:: -u , --base-url + + Set the base for relative URLs in the HTML input. + Defaults to the input’s own URL, or the current directory for stdin. + + .. option:: -a , --attachment + + Adds an attachment to the document. The attachment is + included in the PDF output. This option can be used multiple + times. + + .. option:: -p, --presentational-hints + + Follow `HTML presentational hints + `_. + + .. option:: -v, --verbose + + Show warnings and information messages. + + .. option:: -d, --debug + + Show debugging messages. + + .. option:: --version + + Show the version number. Other options and arguments are ignored. + + .. option:: -h, --help + + Show the command-line usage. Other options and arguments are ignored. + + """ + parser = argparse.ArgumentParser( + prog='weasyprint', description='Renders web pages to PDF or PNG.') + parser.add_argument('--version', action='version', + version='WeasyPrint version %s' % VERSION, + help="Print WeasyPrint's version number and exit.") + parser.add_argument('-i', '--info', action=PrintInfo, nargs=0, + help='Print system information and exit.') + parser.add_argument('-e', '--encoding', + help='Character encoding of the input') + parser.add_argument('-f', '--format', choices=['pdf', 'png'], + help='Output format. Can be omitted if `output` ' + 'ends with a .pdf or .png extension.') + parser.add_argument('-s', '--stylesheet', action='append', + help='URL or filename for a user CSS stylesheet. ' + 'May be given multiple times.') + parser.add_argument('-m', '--media-type', default='print', + help='Media type to use for @media, defaults to print') + parser.add_argument('-r', '--resolution', type=float, + help='PNG only: the resolution in pixel per CSS inch. ' + 'Defaults to 96, one PNG pixel per CSS pixel.') + parser.add_argument('-u', '--base-url', + help='Base for relative URLs in the HTML input. ' + "Defaults to the input's own filename or URL " + 'or the current directory for stdin.') + parser.add_argument('-a', '--attachment', action='append', + help='URL or filename of a file ' + 'to attach to the PDF document') + parser.add_argument('-p', '--presentational-hints', action='store_true', + help='Follow HTML presentational hints.') + parser.add_argument('-v', '--verbose', action='store_true', + help='Show warnings and information messages.') + parser.add_argument('-d', '--debug', action='store_true', + help='Show debugging messages.') + parser.add_argument('-q', '--quiet', action='store_true', + help='Hide logging messages.') + parser.add_argument( + 'input', help='URL or filename of the HTML input, or - for stdin') + parser.add_argument( + 'output', help='Filename where output is written, or - for stdout') + + args = parser.parse_args(argv) + + if args.format is None: + output_lower = args.output.lower() + if output_lower.endswith('.pdf'): + format_ = 'pdf' + elif output_lower.endswith('.png'): + format_ = 'png' + else: + parser.error( + 'Either specify a format with -f or choose an ' + 'output filename that ends in .pdf or .png') + else: + format_ = args.format.lower() + + if args.input == '-': + if stdin is None: + stdin = sys.stdin + # stdin.buffer on Py3, stdin on Py2 + source = getattr(stdin, 'buffer', stdin) + if args.base_url is None: + args.base_url = '.' # current directory + elif args.base_url == '': + args.base_url = None # no base URL + else: + source = args.input + + if args.output == '-': + if stdout is None: + stdout = sys.stdout + # stdout.buffer on Py3, stdout on Py2 + output = getattr(stdout, 'buffer', stdout) + else: + output = args.output + + kwargs = { + 'stylesheets': args.stylesheet, + 'presentational_hints': args.presentational_hints} + if args.resolution: + if format_ == 'png': + kwargs['resolution'] = args.resolution + else: + parser.error('--resolution only applies for the PNG format.') + + if args.attachment: + if format_ == 'pdf': + kwargs['attachments'] = args.attachment + else: + parser.error('--attachment only applies for the PDF format.') + + # Default to logging to stderr. + if args.debug: + LOGGER.setLevel(logging.DEBUG) + elif args.verbose: + LOGGER.setLevel(logging.INFO) + if not args.quiet: + handler = logging.StreamHandler() + handler.setFormatter(logging.Formatter('%(levelname)s: %(message)s')) + LOGGER.addHandler(handler) + + html = HTML(source, base_url=args.base_url, encoding=args.encoding, + media_type=args.media_type) + getattr(html, 'write_' + format_)(output, **kwargs) + + +if __name__ == '__main__': # pragma: no cover + main() diff --git a/weasyprint/css/__init__.py b/weasyprint/css/__init__.py new file mode 100644 index 00000000..a8108535 --- /dev/null +++ b/weasyprint/css/__init__.py @@ -0,0 +1,967 @@ +""" + weasyprint.css + -------------- + + This module takes care of steps 3 and 4 of “CSS 2.1 processing model”: + Retrieve stylesheets associated with a document and annotate every element + with a value for every CSS property. + + http://www.w3.org/TR/CSS21/intro.html#processing-model + + This module does this in more than two steps. The + :func:`get_all_computed_styles` function does everything, but it is itsef + based on other functions in this module. + + :copyright: Copyright 2011-2019 Simon Sapin and contributors, see AUTHORS. + :license: BSD, see LICENSE for details. + +""" + +from collections import namedtuple +from logging import DEBUG, WARNING + +import cssselect2 +import tinycss2 +import tinycss2.nth + +from .. import CSS +from ..logger import LOGGER, PROGRESS_LOGGER +from ..urls import URLFetchingError, get_url_attribute, url_join +from . import computed_values, media_queries +from .properties import INHERITED, INITIAL_NOT_COMPUTED, INITIAL_VALUES +from .utils import remove_whitespace +from .validation import preprocess_declarations +from .validation.descriptors import preprocess_descriptors + +# Reject anything not in here: +PSEUDO_ELEMENTS = ( + None, 'before', 'after', 'marker', 'first-line', 'first-letter') + + +PageType = namedtuple('PageType', ['side', 'blank', 'first', 'index', 'name']) + + +class StyleFor: + """Convenience function to get the computed styles for an element.""" + def __init__(self, html, sheets, presentational_hints, target_collector): + # keys: (element, pseudo_element_type) + # element: an ElementTree Element or the '@page' string + # pseudo_element_type: a string such as 'first' (for @page) or + # 'after', or None for normal elements + # values: dicts of + # keys: property name as a string + # values: (values, weight) + # values: a PropertyValue-like object + # weight: values with a greater weight take precedence, see + # http://www.w3.org/TR/CSS21/cascade.html#cascading-order + self._cascaded_styles = cascaded_styles = {} + + # keys: (element, pseudo_element_type), like cascaded_styles + # values: style dict objects: + # keys: property name as a string + # values: a PropertyValue-like object + self._computed_styles = {} + + self._sheets = sheets + + PROGRESS_LOGGER.info('Step 3 - Applying CSS') + for specificity, attributes in find_style_attributes( + html.etree_element, presentational_hints, html.base_url): + element, declarations, base_url = attributes + style = cascaded_styles.setdefault((element, None), {}) + for name, values, importance in preprocess_declarations( + base_url, declarations): + precedence = declaration_precedence('author', importance) + weight = (precedence, specificity) + old_weight = style.get(name, (None, None))[1] + if old_weight is None or old_weight <= weight: + style[name] = values, weight + + # First, add declarations and set computed styles for "real" elements + # *in tree order*. Tree order is important so that parents have + # computed styles before their children, for inheritance. + + # Iterate on all elements, even if there is no cascaded style for them. + for element in html.wrapper_element.iter_subtree(): + for sheet, origin, sheet_specificity in sheets: + # Add declarations for matched elements + for selector in sheet.matcher.match(element): + specificity, order, pseudo_type, declarations = selector + specificity = sheet_specificity or specificity + style = cascaded_styles.setdefault( + (element.etree_element, pseudo_type), {}) + for name, values, importance in declarations: + precedence = declaration_precedence(origin, importance) + weight = (precedence, specificity) + old_weight = style.get(name, (None, None))[1] + if old_weight is None or old_weight <= weight: + style[name] = values, weight + parent = element.parent.etree_element if element.parent else None + self.set_computed_styles( + element.etree_element, root=html.etree_element, parent=parent, + base_url=html.base_url, target_collector=target_collector) + + # Then computed styles for pseudo elements, in any order. + # Pseudo-elements inherit from their associated element so they come + # last. Do them in a second pass as there is no easy way to iterate + # on the pseudo-elements for a given element with the current structure + # of cascaded_styles. (Keys are (element, pseudo_type) tuples.) + + # Only iterate on pseudo-elements that have cascaded styles. (Others + # might as well not exist.) + for element, pseudo_type in cascaded_styles: + if pseudo_type and not isinstance(element, PageType): + self.set_computed_styles( + element, pseudo_type=pseudo_type, + # The pseudo-element inherits from the element. + root=html.etree_element, parent=element, + base_url=html.base_url, target_collector=target_collector) + + # Clear the cascaded styles, we don't need them anymore. Keep the + # dictionary, it is used later for page margins. + self._cascaded_styles.clear() + + def __call__(self, element, pseudo_type=None): + style = self._computed_styles.get((element, pseudo_type)) + + if style: + if 'table' in style['display']: + if (style['display'] in ('table', 'inline-table') and + style['border_collapse'] == 'collapse'): + # Padding do not apply + for side in ['top', 'bottom', 'left', 'right']: + style['padding_' + side] = computed_values.ZERO_PIXELS + if (style['display'].startswith('table-') and + style['display'] != 'table-caption'): + # Margins do not apply + for side in ['top', 'bottom', 'left', 'right']: + style['margin_' + side] = computed_values.ZERO_PIXELS + + return style + + def set_computed_styles(self, element, parent, root=None, pseudo_type=None, + base_url=None, target_collector=None): + """Set the computed values of styles to ``element``. + + Take the properties left by ``apply_style_rule`` on an element or + pseudo-element and assign computed values with respect to the cascade, + declaration priority (ie. ``!important``) and selector specificity. + + """ + cascaded_styles = self.get_cascaded_styles() + computed_styles = self.get_computed_styles() + if element == root and pseudo_type is None: + assert parent is None + parent_style = None + root_style = { + # When specified on the font-size property of the root element, + # the rem units refer to the property’s initial value. + 'font_size': INITIAL_VALUES['font_size'], + } + else: + assert parent is not None + parent_style = computed_styles[parent, None] + root_style = computed_styles[root, None] + + cascaded = cascaded_styles.get((element, pseudo_type), {}) + computed_styles[element, pseudo_type] = computed_from_cascaded( + element, cascaded, parent_style, pseudo_type, root_style, base_url, + target_collector) + + # The style of marker is deleted when display is different from + # list-item. + if pseudo_type is None: + for pseudo in (None, 'before', 'after'): + pseudo_style = cascaded_styles.get((element, pseudo), {}) + if 'display' in pseudo_style: + if pseudo_style['display'][0] == 'list-item': + break + else: + if (element, 'marker') in cascaded_styles: + del cascaded_styles[element, 'marker'] + + def add_page_declarations(self, page_type): + for sheet, origin, sheet_specificity in self._sheets: + for _rule, selector_list, declarations in sheet.page_rules: + for selector in selector_list: + specificity, pseudo_type, selector_page_type = selector + if self._page_type_match(selector_page_type, page_type): + specificity = sheet_specificity or specificity + style = self._cascaded_styles.setdefault( + (page_type, pseudo_type), {}) + for name, values, importance in declarations: + precedence = declaration_precedence( + origin, importance) + weight = (precedence, specificity) + old_weight = style.get(name, (None, None))[1] + if old_weight is None or old_weight <= weight: + style[name] = values, weight + + def get_cascaded_styles(self): + return self._cascaded_styles + + def get_computed_styles(self): + return self._computed_styles + + @staticmethod + def _page_type_match(selector_page_type, page_type): + if selector_page_type.side not in (None, page_type.side): + return False + if selector_page_type.blank not in (None, page_type.blank): + return False + if selector_page_type.first not in (None, page_type.first): + return False + if selector_page_type.name not in (None, page_type.name): + return False + if selector_page_type.index is not None: + a, b, group = selector_page_type.index + # TODO: handle group + if a: + if (page_type.index + 1 - b) % a: + return False + else: + if page_type.index + 1 != b: + return False + return True + + +def get_child_text(element): + """Return the text directly in the element, not descendants.""" + content = [element.text] if element.text else [] + for child in element: + if child.tail: + content.append(child.tail) + return ''.join(content) + + +def find_stylesheets(wrapper_element, device_media_type, url_fetcher, base_url, + font_config, page_rules): + """Yield the stylesheets in ``element_tree``. + + The output order is the same as the source order. + + """ + from ..html import element_has_link_type # Work around circular imports. + + for wrapper in wrapper_element.query_all('style', 'link'): + element = wrapper.etree_element + mime_type = element.get('type', 'text/css').split(';', 1)[0].strip() + # Only keep 'type/subtype' from 'type/subtype ; param1; param2'. + if mime_type != 'text/css': + continue + media_attr = element.get('media', '').strip() or 'all' + media = [media_type.strip() for media_type in media_attr.split(',')] + if not media_queries.evaluate_media_query(media, device_media_type): + continue + if element.tag == 'style': + # Content is text that is directly in the + + +

Hello World!

+

Follow this link to view the reference image, which should be rendered below the text "Hello World!" on the test page in the same way that this paragraph is rendered below that text on this page.

+ + \ No newline at end of file diff --git a/weasyprint/tests/resources/acid2-test.html b/weasyprint/tests/resources/acid2-test.html new file mode 100644 index 00000000..d6f34037 --- /dev/null +++ b/weasyprint/tests/resources/acid2-test.html @@ -0,0 +1,148 @@ + + + + The Second Acid Test + + + + + +
+

Standards compliant?

+

Take The Acid2 Test and compare it to the reference rendering.

+
+

Hello World!

+ +
+

+

+
                              
+
ERROR
+
+
+ +
+
 
+
+
    +
  • +
  • +
  • +
  • + +
+
+
+ + diff --git a/weasyprint/tests/resources/blue.jpg b/weasyprint/tests/resources/blue.jpg new file mode 100644 index 00000000..1a6175ab Binary files /dev/null and b/weasyprint/tests/resources/blue.jpg differ diff --git a/weasyprint/tests/resources/doc1.html b/weasyprint/tests/resources/doc1.html new file mode 100644 index 00000000..a6c9d132 --- /dev/null +++ b/weasyprint/tests/resources/doc1.html @@ -0,0 +1,58 @@ + + + + + + + + + + + +

WeasyPrint test document (with Ünicōde)

+

Hello

+ +
+ + WeasyPrint + +
+ diff --git a/weasyprint/tests/resources/doc1_UTF-16BE.html b/weasyprint/tests/resources/doc1_UTF-16BE.html new file mode 100644 index 00000000..747ea051 Binary files /dev/null and b/weasyprint/tests/resources/doc1_UTF-16BE.html differ diff --git a/weasyprint/tests/resources/icon.png b/weasyprint/tests/resources/icon.png new file mode 100644 index 00000000..6ddb17e0 Binary files /dev/null and b/weasyprint/tests/resources/icon.png differ diff --git a/weasyprint/tests/resources/latin1-test.css b/weasyprint/tests/resources/latin1-test.css new file mode 100644 index 00000000..73b76ee2 --- /dev/null +++ b/weasyprint/tests/resources/latin1-test.css @@ -0,0 +1,4 @@ +h1::before { + content: "Ilv Unicode"; + background-image: url(pattern.png) +} diff --git a/weasyprint/tests/resources/logo_small.png b/weasyprint/tests/resources/logo_small.png new file mode 100644 index 00000000..99f46def Binary files /dev/null and b/weasyprint/tests/resources/logo_small.png differ diff --git a/weasyprint/tests/resources/mini_ua.css b/weasyprint/tests/resources/mini_ua.css new file mode 100644 index 00000000..186626cb --- /dev/null +++ b/weasyprint/tests/resources/mini_ua.css @@ -0,0 +1,4 @@ +/* Minimal user-agent stylesheet */ +p { margin: 1em 0px } /* 0px should be translated to 0*/ +a { text-decoration: underline } +h1 { font-weight: bolder } diff --git a/weasyprint/tests/resources/pattern.gif b/weasyprint/tests/resources/pattern.gif new file mode 100644 index 00000000..8f24312f Binary files /dev/null and b/weasyprint/tests/resources/pattern.gif differ diff --git a/weasyprint/tests/resources/pattern.palette.png b/weasyprint/tests/resources/pattern.palette.png new file mode 100644 index 00000000..2a6b3994 Binary files /dev/null and b/weasyprint/tests/resources/pattern.palette.png differ diff --git a/weasyprint/tests/resources/pattern.png b/weasyprint/tests/resources/pattern.png new file mode 100644 index 00000000..a6860ada Binary files /dev/null and b/weasyprint/tests/resources/pattern.png differ diff --git a/weasyprint/tests/resources/pattern.svg b/weasyprint/tests/resources/pattern.svg new file mode 100644 index 00000000..299d3992 --- /dev/null +++ b/weasyprint/tests/resources/pattern.svg @@ -0,0 +1,6 @@ + + + + + + diff --git a/weasyprint/tests/resources/really-a-png.svg b/weasyprint/tests/resources/really-a-png.svg new file mode 100644 index 00000000..a6860ada Binary files /dev/null and b/weasyprint/tests/resources/really-a-png.svg differ diff --git a/weasyprint/tests/resources/really-a-svg.png b/weasyprint/tests/resources/really-a-svg.png new file mode 100644 index 00000000..299d3992 --- /dev/null +++ b/weasyprint/tests/resources/really-a-svg.png @@ -0,0 +1,6 @@ + + + + + + diff --git a/weasyprint/tests/resources/sheet2.css b/weasyprint/tests/resources/sheet2.css new file mode 100644 index 00000000..9554239e --- /dev/null +++ b/weasyprint/tests/resources/sheet2.css @@ -0,0 +1,5 @@ +li { + margin-bottom: 3em; /* Should be masked*/ + margin: 2em 0; + margin-left: 4em; /* Should not be masked*/ +} diff --git a/weasyprint/tests/resources/sub_directory/sheet1.css b/weasyprint/tests/resources/sub_directory/sheet1.css new file mode 100644 index 00000000..02beb622 --- /dev/null +++ b/weasyprint/tests/resources/sub_directory/sheet1.css @@ -0,0 +1,16 @@ +@import url(../sheet2.css) all; +p { + background: currentColor; +} + +@media print { + ul { + /* 1ex == 0.8em for ahem. */ + margin: 2em 2.5ex; + } +} +@media screen { + ul { + border-width: 1000px !important; + } +} diff --git a/weasyprint/tests/resources/user.css b/weasyprint/tests/resources/user.css new file mode 100644 index 00000000..0d2f9308 --- /dev/null +++ b/weasyprint/tests/resources/user.css @@ -0,0 +1,5 @@ +html { + /* Reversed contrast */ + color: white; + background-color: black; +} diff --git a/weasyprint/tests/resources/utf8-test.css b/weasyprint/tests/resources/utf8-test.css new file mode 100644 index 00000000..4a035077 --- /dev/null +++ b/weasyprint/tests/resources/utf8-test.css @@ -0,0 +1,4 @@ +h1::before { + content: "I løvë Unicode"; + background-image: url(pattern.png) +} diff --git a/weasyprint/tests/resources/weasyprint.otf b/weasyprint/tests/resources/weasyprint.otf new file mode 100644 index 00000000..8b784407 Binary files /dev/null and b/weasyprint/tests/resources/weasyprint.otf differ diff --git a/weasyprint/tests/test_acid2.py b/weasyprint/tests/test_acid2.py new file mode 100644 index 00000000..a523440a --- /dev/null +++ b/weasyprint/tests/test_acid2.py @@ -0,0 +1,39 @@ +""" + weasyprint.tests.test_draw.test_acid2 + ------------------------------------- + + Check the famous Acid2 test. + + :copyright: Copyright 2011-2019 Simon Sapin and contributors, see AUTHORS. + :license: BSD, see LICENSE for details. + +""" + +from .. import HTML +from .test_draw import assert_pixels_equal, image_to_pixels +from .testing_utils import ( + assert_no_logs, capture_logs, requires, resource_filename) + + +@assert_no_logs +@requires('cairo', (1, 12, 0)) +def test_acid2(): + def render(filename): + return HTML(resource_filename(filename)).render(enable_hinting=True) + + with capture_logs(): + # This is a copy of http://www.webstandards.org/files/acid2/test.html + document = render('acid2-test.html') + intro_page, test_page = document.pages + # Ignore the intro page: it is not in the reference + test_image, width, height = document.copy( + [test_page]).write_image_surface() + + # This is a copy of http://www.webstandards.org/files/acid2/reference.html + ref_image, ref_width, ref_height = render( + 'acid2-reference.html').write_image_surface() + + assert (width, height) == (ref_width, ref_height) + assert_pixels_equal( + 'acid2', width, height, image_to_pixels(test_image, width, height), + image_to_pixels(ref_image, width, height), tolerance=2) diff --git a/weasyprint/tests/test_api.py b/weasyprint/tests/test_api.py new file mode 100644 index 00000000..c34f8a50 --- /dev/null +++ b/weasyprint/tests/test_api.py @@ -0,0 +1,951 @@ +""" + weasyprint.tests.test_api + ------------------------- + + Test the public API. + + :copyright: Copyright 2011-2019 Simon Sapin and contributors, see AUTHORS. + :license: BSD, see LICENSE for details. + +""" + +import gzip +import io +import math +import os +import sys +import unicodedata +import zlib +from pathlib import Path +from urllib.parse import urljoin, uses_relative + +import cairocffi as cairo +import py +import pytest + +from .. import CSS, HTML, __main__, default_url_fetcher +from ..urls import path2url +from .test_draw import assert_pixels_equal, image_to_pixels, parse_pixels +from .testing_utils import ( + FakeHTML, assert_no_logs, capture_logs, http_server, resource_filename) + + +def _test_resource(class_, basename, check, **kwargs): + """Common code for testing the HTML and CSS classes.""" + absolute_filename = resource_filename(basename) + absolute_path = Path(absolute_filename) + url = path2url(absolute_filename) + check(class_(absolute_filename, **kwargs)) + check(class_(absolute_path, **kwargs)) + check(class_(guess=absolute_filename, **kwargs)) + check(class_(guess=absolute_path, **kwargs)) + check(class_(filename=absolute_filename, **kwargs)) + check(class_(filename=absolute_path, **kwargs)) + check(class_(url, **kwargs)) + check(class_(guess=url, **kwargs)) + check(class_(url=url, **kwargs)) + with open(absolute_filename, 'rb') as fd: + check(class_(fd, **kwargs)) + with open(absolute_filename, 'rb') as fd: + check(class_(guess=fd, **kwargs)) + with open(absolute_filename, 'rb') as fd: + check(class_(file_obj=fd, **kwargs)) + with open(absolute_filename, 'rb') as fd: + content = fd.read() + py.path.local(os.path.dirname(__file__)).chdir() + relative_filename = os.path.join('resources', basename) + relative_path = Path(relative_filename) + check(class_(relative_filename, **kwargs)) + check(class_(relative_path, **kwargs)) + check(class_(string=content, base_url=relative_filename, **kwargs)) + encoding = kwargs.get('encoding') or 'utf8' + check(class_(string=content.decode(encoding), # unicode + base_url=relative_filename, **kwargs)) + with pytest.raises(TypeError): + class_(filename='foo', url='bar') + + +def _check_doc1(html, has_base_url=True): + """Check that a parsed HTML document looks like resources/doc1.html""" + root = html.etree_element + assert root.tag == 'html' + assert [child.tag for child in root] == ['head', 'body'] + _head, body = root + assert [child.tag for child in body] == ['h1', 'p', 'ul', 'div'] + h1, p, ul, div = body + assert h1.text == 'WeasyPrint test document (with Ünicōde)' + if has_base_url: + url = urljoin(html.base_url, 'pattern.png') + assert url.startswith('file:') + assert url.endswith('weasyprint/tests/resources/pattern.png') + else: + assert html.base_url is None + + +def _run(args, stdin=b''): + stdin = io.BytesIO(stdin) + stdout = io.BytesIO() + try: + __main__.HTML = FakeHTML + __main__.main(args.split(), stdin=stdin, stdout=stdout) + finally: + __main__.HTML = HTML + return stdout.getvalue() + + +class _fake_file(object): + def __init__(self): + self.chunks = [] + + def write(self, data): + self.chunks.append(bytes(data[:])) + + def getvalue(self): + return b''.join(self.chunks) + + +def _png_size(result): + png_bytes, width, height = result + surface = cairo.ImageSurface.create_from_png(io.BytesIO(png_bytes)) + assert (surface.get_width(), surface.get_height()) == (width, height) + return width, height + + +def _round_meta(pages): + """Eliminate errors of floating point arithmetic for metadata. + (eg. 49.99999999999994 instead of 50) + + """ + for page in pages: + anchors = page.anchors + for anchor_name, (pos_x, pos_y) in anchors.items(): + anchors[anchor_name] = round(pos_x, 6), round(pos_y, 6) + links = page.links + for i, link in enumerate(links): + link_type, target, (pos_x, pos_y, width, height) = link + link = ( + link_type, target, (round(pos_x, 6), round(pos_y, 6), + round(width, 6), round(height, 6))) + links[i] = link + bookmarks = page.bookmarks + for i, (level, label, (pos_x, pos_y), state) in enumerate(bookmarks): + bookmarks[i] = (level, label, + (round(pos_x, 6), round(pos_y, 6)), state) + + +@assert_no_logs +def test_html_parsing(): + """Test the constructor for the HTML class.""" + _test_resource(FakeHTML, 'doc1.html', _check_doc1) + _test_resource(FakeHTML, 'doc1_UTF-16BE.html', _check_doc1, + encoding='UTF-16BE') + + py.path.local(os.path.dirname(__file__)).chdir() + filename = os.path.join('resources', 'doc1.html') + with open(filename, encoding='utf-8') as fd: + string = fd.read() + _check_doc1(FakeHTML(string=string, base_url=filename)) + _check_doc1(FakeHTML(string=string), has_base_url=False) + string_with_meta = string.replace( + '' + combined = b'' + html + linked = b'' + html + + py.path.local(resource_filename('')).chdir() + # Reference + html_obj = FakeHTML(string=combined, base_url='dummy.html') + # pdf_bytes = html_obj.write_pdf() + png_bytes = html_obj.write_png() + x2_png_bytes = html_obj.write_png(resolution=192) + rotated_png_bytes = FakeHTML(string=combined, base_url='dummy.html', + media_type='screen').write_png() + empty_png_bytes = FakeHTML( + string=b'').write_png() + check_png_pattern(png_bytes) + check_png_pattern(rotated_png_bytes, rotated=True) + check_png_pattern(empty_png_bytes, blank=True) + + tmpdir.chdir() + with open(resource_filename('pattern.png'), 'rb') as pattern_fd: + pattern_bytes = pattern_fd.read() + tmpdir.join('pattern.png').write_binary(pattern_bytes) + tmpdir.join('no_css.html').write_binary(html) + tmpdir.join('combined.html').write_binary(combined) + tmpdir.join('combined-UTF-16BE.html').write_binary( + combined.decode('ascii').encode('UTF-16BE')) + tmpdir.join('linked.html').write_binary(linked) + tmpdir.join('style.css').write_binary(css) + + _run('combined.html out1.png') + _run('combined.html out2.pdf') + assert tmpdir.join('out1.png').read_binary() == png_bytes + # TODO: check PDF content? How? + # assert tmpdir.join('out2.pdf').read_binary() == pdf_bytes + + _run('combined-UTF-16BE.html out3.png --encoding UTF-16BE') + assert tmpdir.join('out3.png').read_binary() == png_bytes + + _run(tmpdir.join('combined.html').strpath + ' out4.png') + assert tmpdir.join('out4.png').read_binary() == png_bytes + + _run(path2url(tmpdir.join('combined.html').strpath) + ' out5.png') + assert tmpdir.join('out5.png').read_binary() == png_bytes + + _run('linked.html out6.png') # test relative URLs + assert tmpdir.join('out6.png').read_binary() == png_bytes + + _run('combined.html out7 -f png') + _run('combined.html out8 --format pdf') + assert tmpdir.join('out7').read_binary() == png_bytes + # assert tmpdir.join('out8').read_binary(), pdf_bytes + + _run('no_css.html out9.png') + _run('no_css.html out10.png -s style.css') + assert tmpdir.join('out9.png').read_binary() != png_bytes + # assert tmpdir.join('out10.png').read_binary() == png_bytes + + stdout = _run('--format png combined.html -') + assert stdout == png_bytes + + _run('- out11.png', stdin=combined) + check_png_pattern(tmpdir.join('out11.png').read_binary()) + assert tmpdir.join('out11.png').read_binary() == png_bytes + + stdout = _run('--format png - -', stdin=combined) + assert stdout == png_bytes + + _run('combined.html out13.png --media-type screen') + _run('combined.html out12.png -m screen') + _run('linked.html out14.png -m screen') + assert tmpdir.join('out12.png').read_binary() == rotated_png_bytes + assert tmpdir.join('out13.png').read_binary() == rotated_png_bytes + assert tmpdir.join('out14.png').read_binary() == rotated_png_bytes + + stdout = _run('-f pdf combined.html -') + assert stdout.count(b'attachment') == 0 + stdout = _run('-f pdf -a pattern.png combined.html -') + assert stdout.count(b'attachment') == 1 + stdout = _run('-f pdf -a style.css -a pattern.png combined.html -') + assert stdout.count(b'attachment') == 2 + + stdout = _run('-f png -r 192 linked.html -') + assert stdout == x2_png_bytes + stdout = _run('-f png --resolution 192 linked.html -') + assert _run('linked.html - -f png --resolution 192') == x2_png_bytes + assert stdout == x2_png_bytes + + os.mkdir('subdirectory') + py.path.local('subdirectory').chdir() + with capture_logs() as logs: + stdout = _run('--format png - -', stdin=combined) + assert len(logs) == 1 + assert logs[0].startswith('ERROR: Failed to load image') + assert stdout == empty_png_bytes + + stdout = _run('--format png --base-url .. - -', stdin=combined) + assert stdout == png_bytes + + +@assert_no_logs +def test_unicode_filenames(tmpdir): + """Test non-ASCII filenames both in Unicode or bytes form.""" + # Replicate pattern.png in CSS so that base_url does not matter. + html = b''' + + + ''' + png_bytes = FakeHTML(string=html).write_png() + check_png_pattern(png_bytes) + unicode_filename = 'Unicödé' + if sys.platform.startswith('darwin'): + unicode_filename = unicodedata.normalize('NFD', unicode_filename) + + tmpdir.chdir() + tmpdir.join(unicode_filename).write(html) + bytes_file, = tmpdir.listdir() + assert bytes_file.basename == unicode_filename + + assert FakeHTML(unicode_filename).write_png() == png_bytes + assert FakeHTML(bytes_file.strpath).write_png() == png_bytes + + os.remove(unicode_filename) + assert tmpdir.listdir() == [] + + FakeHTML(string=html).write_png(unicode_filename) + assert bytes_file.read_binary() == png_bytes + + +@assert_no_logs +def test_low_level_api(): + html = FakeHTML(string='') + css = CSS(string=''' + @page { margin: 2px; size: 8px; background: #fff } + html { background: #00f; } + body { background: #f00; width: 1px; height: 1px } + ''') + pdf_bytes = html.write_pdf(stylesheets=[css]) + assert pdf_bytes.startswith(b'%PDF') + # TODO: check PDF content? How? + # assert html.render([css]).write_pdf() == pdf_bytes + + png_bytes = html.write_png(stylesheets=[css]) + document = html.render([css], enable_hinting=True) + page, = document.pages + assert page.width == 8 + assert page.height == 8 + assert document.write_png() == (png_bytes, 8, 8) + assert document.copy([page]).write_png() == (png_bytes, 8, 8) + + surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, 8, 8) + page.paint(cairo.Context(surface)) + file_obj = io.BytesIO() + surface.write_to_png(file_obj) + check_png_pattern(file_obj.getvalue()) + + surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, 8, 8) + context = cairo.Context(surface) + # Rotate at the center + context.translate(4, 4) + context.rotate(-math.pi / 2) + context.translate(-4, -4) + page.paint(context) + file_obj = io.BytesIO() + surface.write_to_png(file_obj) + check_png_pattern(file_obj.getvalue(), rotated=True) + + document = html.render([css], enable_hinting=True) + page, = document.pages + assert (page.width, page.height) == (8, 8) + png_bytes, width, height = document.write_png(resolution=192) + assert (width, height) == (16, 16) + check_png_pattern(png_bytes, x2=True) + + document = html.render([css], enable_hinting=True) + page, = document.pages + assert (page.width, page.height) == (8, 8) + # A resolution that is not multiple of 96: + assert _png_size(document.write_png(resolution=145.2)) == (13, 13) + + document = FakeHTML(string=''' + +

+

+ ''').render() + page_1, page_2 = document.pages + assert (page_1.width, page_1.height) == (5, 10) + assert (page_2.width, page_2.height) == (6, 4) + + result = document.write_png() + # (Max of both widths, Sum of both heights) + assert _png_size(result) == (6, 14) + assert document.copy([page_1, page_2]).write_png() == result + assert _png_size(document.copy([page_1]).write_png()) == (5, 10) + assert _png_size(document.copy([page_2]).write_png()) == (6, 4) + + +@pytest.mark.parametrize('html, expected_by_page, expected_tree, round', ( + (''' + +

a

+

b

+

c

+

d

+

e

+ ''', [ + [(1, 'a', (0, 0), 'open'), (4, 'b', (0, 10), 'open')], + [(3, 'c', (3, 2), 'open'), (2, 'd', (0, 10), 'open'), + (1, 'e', (0, 20), 'open')], + ], [ + ('a', (0, 0, 0), [ + ('b', (0, 0, 10), [], 'open'), + ('c', (1, 3, 2), [], 'open'), + ('d', (1, 0, 10), [], 'open')], 'open'), + ('e', (1, 0, 20), [], 'open'), + ], False), + (''' + +

Title 1

+

Title 2

+

Title 3

+

Title 4

+

Title 5

+ +

Title 6

+

Title 7

+

Title 8

+

Title 9

+

Title 10

+

Title 11

+ ''', [ + [ + (1, 'Title 1', (0, 0), 'open'), + (1, 'Title 2', (0, 100), 'open'), + (2, 'Title 3', (20, 200), 'open'), + (2, 'Title 4', (0, 300), 'open'), + (3, 'Title 5', (0, 400), 'open') + ], [ + (2, 'Title 6', (0, 100), 'open'), + (1, 'Title 7', (0, 200), 'open'), + (2, 'Title 8', (0, 300), 'open'), + (3, 'Title 9', (0, 400), 'open'), + (1, 'Title 10', (0, 500), 'open'), + (2, 'Title 11', (0, 600), 'open') + ], + ], [ + ('Title 1', (0, 0, 0), [], 'open'), + ('Title 2', (0, 0, 100), [ + ('Title 3', (0, 20, 200), [], 'open'), + ('Title 4', (0, 0, 300), [ + ('Title 5', (0, 0, 400), [], 'open')], 'open'), + ('Title 6', (1, 0, 100), [], 'open')], 'open'), + ('Title 7', (1, 0, 200), [ + ('Title 8', (1, 0, 300), [ + ('Title 9', (1, 0, 400), [], 'open')], 'open')], 'open'), + ('Title 10', (1, 0, 500), [ + ('Title 11', (1, 0, 600), [], 'open')], 'open'), + ], False), + (''' + +

A

depth 1

+

B

depth 2

+

C

depth 1

+

D

depth 2

+

E

depth 3

+ ''', [[ + (2, 'A', (0, 0), 'open'), + (4, 'B', (0, 20), 'open'), + (2, 'C', (0, 40), 'open'), + (3, 'D', (0, 60), 'open'), + (4, 'E', (0, 80), 'open'), + ]], [ + ('A', (0, 0, 0), [ + ('B', (0, 0, 20), [], 'open')], 'open'), + ('C', (0, 0, 40), [ + ('D', (0, 0, 60), [ + ('E', (0, 0, 80), [], 'open')], 'open')], 'open'), + ], False), + (''' + +

A

h2 depth 1

+

B

h4 depth 2

+

C

h3 depth 2

+
D

h5 depth 3

+

E

h1 depth 1

+

F

h2 depth 2

+

G

h2 depth 2

+

H

h4 depth 3

+

I

h1 depth 1

+ ''', [[ + (2, 'A', (0, 0), 'open'), + (4, 'B', (0, 20), 'open'), + (3, 'C', (0, 40), 'open'), + (5, 'D', (0, 60), 'open'), + (1, 'E', (0, 70), 'open'), + (2, 'F', (0, 90), 'open'), + (2, 'G', (0, 110), 'open'), + (4, 'H', (0, 130), 'open'), + (1, 'I', (0, 150), 'open'), + ]], [ + ('A', (0, 0, 0), [ + ('B', (0, 0, 20), [], 'open'), + ('C', (0, 0, 40), [ + ('D', (0, 0, 60), [], 'open')], 'open')], 'open'), + ('E', (0, 0, 70), [ + ('F', (0, 0, 90), [], 'open'), + ('G', (0, 0, 110), [ + ('H', (0, 0, 130), [], 'open')], 'open')], 'open'), + ('I', (0, 0, 150), [], 'open'), + ], False), + ('

é', [ + [(1, 'é', (0, 0), 'open')] + ], [ + ('é', (0, 0, 0), [], 'open') + ], False), + (''' +

! + ''', [ + [(1, '!', (50, 0), 'open')] + ], [ + ('!', (0, 50, 0), [], 'open') + ], False), + (''' + + Chocolate + ''' % path2url(resource_filename('pattern.png')), + [[(1, 'Chocolate', (0, 0), 'open')]], + [('Chocolate', (0, 0, 0), [], 'open')], False), + (''' +

! + ''', [[(1, '!', (0, 50), 'open')]], [('!', (0, 0, 50), [], 'open')], True), + (''' + +

! + ''', [[(1, '!', (0, 50), 'open')]], [('!', (0, 0, 50), [], 'open')], True), +)) +@assert_no_logs +def test_assert_bookmarks(html, expected_by_page, expected_tree, round): + document = FakeHTML(string=html).render() + if round: + _round_meta(document.pages) + assert [p.bookmarks for p in document.pages] == expected_by_page + assert document.make_bookmark_tree() == expected_tree + + +@assert_no_logs +def test_links(): + def assert_links(html, expected_links_by_page, expected_anchors_by_page, + expected_resolved_links, + base_url=resource_filename(''), + warnings=(), round=False): + with capture_logs() as logs: + document = FakeHTML(string=html, base_url=base_url).render() + if round: + _round_meta(document.pages) + resolved_links = list(document.resolve_links()) + assert len(logs) == len(warnings) + for message, expected in zip(logs, warnings): + assert expected in message + assert [p.links for p in document.pages] == expected_links_by_page + assert [p.anchors for p in document.pages] == expected_anchors_by_page + assert resolved_links == expected_resolved_links + + assert_links(''' + +

+

+

Hello, World

+

+ +

+ ''', [ + [ + ('external', 'http://weasyprint.org', (0, 0, 30, 20)), + ('external', 'http://weasyprint.org', (0, 0, 30, 30)), + ('internal', 'lipsum', (10, 100, 32, 20)), + ('internal', 'lipsum', (10, 100, 32, 32)) + ], + [('internal', 'hello', (0, 0, 200, 30))], + ], [ + {'hello': (0, 200)}, + {'lipsum': (0, 0)} + ], [ + ( + [ + ('external', 'http://weasyprint.org', (0, 0, 30, 20)), + ('external', 'http://weasyprint.org', (0, 0, 30, 30)), + ('internal', 'lipsum', (10, 100, 32, 20)), + ('internal', 'lipsum', (10, 100, 32, 32)) + ], + [('hello', 0, 200)], + ), + ( + [ + ('internal', 'hello', (0, 0, 200, 30)) + ], + [('lipsum', 0, 0)]), + ]) + + assert_links( + ''' + + + ''', [[('external', 'http://weasyprint.org/foo/lipsum/%C3%A9_%E9', + (5, 10, 190, 0))]], + [{}], [([('external', 'http://weasyprint.org/foo/lipsum/%C3%A9_%E9', + (5, 10, 190, 0))], [])], + base_url='http://weasyprint.org/foo/bar/') + assert_links( + ''' + +
+ ''', [[('external', 'http://weasyprint.org/foo/lipsum/%C3%A9_%E9', + (5, 10, 190, 0))]], + [{}], [([('external', 'http://weasyprint.org/foo/lipsum/%C3%A9_%E9', + (5, 10, 190, 0))], [])], + base_url='http://weasyprint.org/foo/bar/') + + # Relative URI reference without a base URI: allowed for links + assert_links( + ''' + + + ''', [[('external', '../lipsum', (5, 10, 190, 0))]], [{}], + [([('external', '../lipsum', (5, 10, 190, 0))], [])], base_url=None) + + # Relative URI reference without a base URI: not supported for -weasy-link + assert_links( + ''' + +
+ ''', [[]], [{}], [([], [])], base_url=None, warnings=[ + 'WARNING: Ignored `-weasy-link: url("../lipsum")` at 1:1, ' + 'Relative URI reference without a base URI']) + + # Internal or absolute URI reference without a base URI: OK + assert_links( + ''' + + + + ''', [[('internal', 'lipsum', (5, 10, 190, 0)), + ('external', 'http://weasyprint.org/', (0, 10, 200, 0))]], + [{'lipsum': (5, 10)}], + [([('internal', 'lipsum', (5, 10, 190, 0)), + ('external', 'http://weasyprint.org/', (0, 10, 200, 0))], + [('lipsum', 5, 10)])], + base_url=None) + + assert_links( + ''' + +
+ ''', + [[('internal', 'lipsum', (5, 10, 190, 0))]], + [{'lipsum': (5, 10)}], + [([('internal', 'lipsum', (5, 10, 190, 0))], [('lipsum', 5, 10)])], + base_url=None) + + assert_links( + ''' + + + + + ''', + [[('internal', 'lipsum', (0, 0, 200, 15)), + ('internal', 'missing', (0, 15, 200, 15))]], + [{'lipsum': (0, 15)}], + [([('internal', 'lipsum', (0, 0, 200, 15))], [('lipsum', 0, 15)])], + base_url=None, + warnings=[ + 'ERROR: No anchor #missing for internal URI reference']) + + assert_links( + ''' + + + ''', + [[('internal', 'lipsum', (30, 10, 40, 200))]], + [{'lipsum': (70, 10)}], + [([('internal', 'lipsum', (30, 10, 40, 200))], [('lipsum', 70, 10)])], + round=True) + + +# Make relative URL references work with our custom URL scheme. +uses_relative.append('weasyprint-custom') + + +@assert_no_logs +def test_url_fetcher(): + filename = resource_filename('pattern.png') + with open(filename, 'rb') as pattern_fd: + pattern_png = pattern_fd.read() + + def fetcher(url): + if url == 'weasyprint-custom:foo/%C3%A9_%e9_pattern': + return dict(string=pattern_png, mime_type='image/png') + elif url == 'weasyprint-custom:foo/bar.css': + return dict(string='body { background: url(é_%e9_pattern)', + mime_type='text/css') + else: + return default_url_fetcher(url) + + base_url = resource_filename('dummy.html') + css = CSS(string=''' + @page { size: 8px; margin: 2px; background: #fff } + body { margin: 0; font-size: 0 } + ''', base_url=base_url) + + def test(html, blank=False): + html = FakeHTML(string=html, url_fetcher=fetcher, base_url=base_url) + check_png_pattern(html.write_png(stylesheets=[css]), blank=blank) + + test('') # Test a "normal" URL + test('' % Path(filename).as_uri()) + test('' % Path(filename).as_uri()) + test('') + test('') + test('
  • ') + test('') + test('') + + with capture_logs() as logs: + test('', blank=True) + assert len(logs) == 1 + assert logs[0].startswith( + 'ERROR: Failed to load image at "custom:foo/bar"') + + def fetcher_2(url): + assert url == 'weasyprint-custom:%C3%A9_%e9.css' + return dict(string='', mime_type='text/css') + FakeHTML(string='', url_fetcher=fetcher_2).render() + + +@assert_no_logs +def test_html_meta(): + def assert_meta(html, **meta): + meta.setdefault('title', None) + meta.setdefault('authors', []) + meta.setdefault('keywords', []) + meta.setdefault('generator', None) + meta.setdefault('description', None) + meta.setdefault('created', None) + meta.setdefault('modified', None) + meta.setdefault('attachments', []) + assert vars(FakeHTML(string=html).render().metadata) == meta + + assert_meta('') + assert_meta( + ''' + + + Test document +

    Another title

    + + + + + + + + + + + + ''', + authors=['I Me & Myself', 'Smith, John'], + title='Test document', + generator='Human after all', + keywords=['html', 'css', 'pdf', 'Python; cairo'], + description="Blah… ", + created='2011-04', + modified='2013') + assert_meta( + ''' + One + + Two + Three + + ''', + title='One', + authors=['', 'Me']) + + +@assert_no_logs +def test_http(): + def gzip_compress(data): + file_obj = io.BytesIO() + gzip_file = gzip.GzipFile(fileobj=file_obj, mode='wb') + gzip_file.write(data) + gzip_file.close() + return file_obj.getvalue() + + with http_server({ + '/gzip': lambda env: ( + (gzip_compress(b''), [('Content-Encoding', 'gzip')]) + if 'gzip' in env.get('HTTP_ACCEPT_ENCODING', '') else + (b'', []) + ), + '/deflate': lambda env: ( + (zlib.compress(b''), + [('Content-Encoding', 'deflate')]) + if 'deflate' in env.get('HTTP_ACCEPT_ENCODING', '') else + (b'', []) + ), + '/raw-deflate': lambda env: ( + # Remove zlib header and checksum + (zlib.compress(b'')[2:-4], + [('Content-Encoding', 'deflate')]) + if 'deflate' in env.get('HTTP_ACCEPT_ENCODING', '') else + (b'', []) + ), + }) as root_url: + assert HTML(root_url + '/gzip').etree_element.get('test') == 'ok' + assert HTML(root_url + '/deflate').etree_element.get('test') == 'ok' + assert HTML( + root_url + '/raw-deflate').etree_element.get('test') == 'ok' diff --git a/weasyprint/tests/test_boxes.py b/weasyprint/tests/test_boxes.py new file mode 100644 index 00000000..16127f8d --- /dev/null +++ b/weasyprint/tests/test_boxes.py @@ -0,0 +1,1762 @@ +""" + weasyprint.tests.test_boxes + --------------------------- + + Test that the "before layout" box tree is correctly constructed. + + :copyright: Copyright 2011-2019 Simon Sapin and contributors, see AUTHORS. + :license: BSD, see LICENSE for details. + +""" + +import functools + +import pytest + +from .. import images +from ..css import PageType, get_all_computed_styles +from ..css.targets import TargetCollector +from ..formatting_structure import boxes, build, counters +from ..layout.pages import set_page_type_computed_styles +from .testing_utils import BASE_URL, FakeHTML, assert_no_logs, capture_logs + +PROPER_CHILDREN = dict((key, tuple(map(tuple, value))) for key, value in { + # Children can be of *any* type in *one* of the lists. + boxes.BlockContainerBox: [[boxes.BlockLevelBox], [boxes.LineBox]], + boxes.LineBox: [[boxes.InlineLevelBox]], + boxes.InlineBox: [[boxes.InlineLevelBox]], + boxes.TableBox: [[boxes.TableCaptionBox, + boxes.TableColumnGroupBox, boxes.TableColumnBox, + boxes.TableRowGroupBox, boxes.TableRowBox]], + boxes.InlineTableBox: [[boxes.TableCaptionBox, + boxes.TableColumnGroupBox, boxes.TableColumnBox, + boxes.TableRowGroupBox, boxes.TableRowBox]], + boxes.TableColumnGroupBox: [[boxes.TableColumnBox]], + boxes.TableRowGroupBox: [[boxes.TableRowBox]], + boxes.TableRowBox: [[boxes.TableCellBox]], +}.items()) + + +def serialize(box_list): + """Transform a box list into a structure easier to compare for testing.""" + return [( + box.element_tag, + type(box).__name__[:-3], + # All concrete boxes are either text, replaced, column or parent. + (box.text if isinstance(box, boxes.TextBox) + else '' if isinstance(box, boxes.ReplacedBox) + else serialize( + getattr(box, 'column_groups', ()) + tuple(box.children)))) + for box in box_list] + + +def _parse_base(html_content, base_url=BASE_URL): + document = FakeHTML(string=html_content, base_url=base_url) + style_for = get_all_computed_styles(document) + get_image_from_uri = functools.partial( + images.get_image_from_uri, {}, document.url_fetcher) + target_collector = TargetCollector() + return ( + document.etree_element, style_for, get_image_from_uri, base_url, + target_collector) + + +def parse(html_content): + """Parse some HTML, apply stylesheets and transform to boxes.""" + box, = build.element_to_box(*_parse_base(html_content)) + return box + + +def parse_all(html_content, base_url=BASE_URL): + """Like parse() but also run all corrections on boxes.""" + box = build.build_formatting_structure(*_parse_base( + html_content, base_url)) + _sanity_checks(box) + return box + + +def render_pages(html_content): + """Lay out a document and return a list of PageBox objects.""" + return [p._page_box for p in FakeHTML( + string=html_content, base_url=BASE_URL + ).render(enable_hinting=True).pages] + + +def assert_tree(box, expected): + """Check the box tree equality. + + The obtained result is prettified in the message in case of failure. + + box: a Box object, starting with and blocks. + expected: a list of serialized children as returned by to_lists(). + + """ + assert box.element_tag == 'html' + assert isinstance(box, boxes.BlockBox) + assert len(box.children) == 1 + + box = box.children[0] + assert isinstance(box, boxes.BlockBox) + assert box.element_tag == 'body' + + assert serialize(box.children) == expected + + +def _sanity_checks(box): + """Check that the rules regarding boxes are met. + + This is not required and only helps debugging. + + - A block container can contain either only block-level boxes or + only line boxes; + - Line boxes and inline boxes can only contain inline-level boxes. + + """ + if not isinstance(box, boxes.ParentBox): + return + + acceptable_types_lists = None # raises when iterated + for class_ in type(box).mro(): + if class_ in PROPER_CHILDREN: + acceptable_types_lists = PROPER_CHILDREN[class_] + break + + assert any( + all(isinstance(child, acceptable_types) or + not child.is_in_normal_flow() + for child in box.children) + for acceptable_types in acceptable_types_lists + ), (box, box.children) + + for child in box.children: + _sanity_checks(child) + + +def _get_grid(html): + html = parse_all(html) + body, = html.children + table_wrapper, = body.children + table, = table_wrapper.children + return tuple( + [[(style, width, color) if width else None + for _score, (style, width, color) in column] + for column in grid] + for grid in table.collapsed_border_grid) + + +@assert_no_logs +def test_box_tree(): + assert_tree(parse('

    '), [('p', 'Block', [])]) + assert_tree(parse(''' + +

    Hello World L!

    '''), [ + ('p', 'Block', [ + ('p', 'Text', 'Hello '), + ('em', 'Inline', [ + ('em', 'Text', 'World '), + ('img', 'InlineReplaced', ''), + ('span', 'InlineBlock', [ + ('span', 'Text', 'L')])]), + ('p', 'Text', '!')])]) + + +@assert_no_logs +def test_html_entities(): + for quote in ['"', '"', '"', '"']: + assert_tree(parse('

    {0}abc{1}'.format(quote, quote)), [ + ('p', 'Block', [ + ('p', 'Text', '"abc"')])]) + + +@assert_no_logs +def test_inline_in_block_1(): + source = '

    Hello, World!\n

    Lipsum.

    ' + expected = [ + ('div', 'Block', [ + ('div', 'Block', [ + ('div', 'Line', [ + ('div', 'Text', 'Hello, '), + ('em', 'Inline', [ + ('em', 'Text', 'World')]), + ('div', 'Text', '!\n')])]), + ('p', 'Block', [ + ('p', 'Line', [ + ('p', 'Text', 'Lipsum.')])])])] + box = parse(source) + box = build.inline_in_block(box) + assert_tree(box, expected) + + +@assert_no_logs +def test_inline_in_block_2(): + source = '

    Lipsum.

    Hello, World!\n
    ' + expected = [ + ('div', 'Block', [ + ('p', 'Block', [ + ('p', 'Line', [ + ('p', 'Text', 'Lipsum.')])]), + ('div', 'Block', [ + ('div', 'Line', [ + ('div', 'Text', 'Hello, '), + ('em', 'Inline', [ + ('em', 'Text', 'World')]), + ('div', 'Text', '!\n')])])])] + box = parse(source) + box = build.inline_in_block(box) + assert_tree(box, expected) + + +@assert_no_logs +def test_inline_in_block_3(): + # Absolutes are left in the lines to get their static position later. + source = '''

    Hello World!

    ''' + expected = [ + ('p', 'Block', [ + ('p', 'Line', [ + ('p', 'Text', 'Hello '), + ('em', 'Block', [ + ('em', 'Line', [ + ('em', 'Text', 'World')])]), + ('p', 'Text', '!')])])] + box = parse(source) + box = build.inline_in_block(box) + assert_tree(box, expected) + box = build.block_in_inline(box) + assert_tree(box, expected) + + +@assert_no_logs +def test_inline_in_block_4(): + # Floats are pull to the top of their containing blocks + source = '

    Hello World!

    ' + box = parse(source) + box = build.inline_in_block(box) + box = build.block_in_inline(box) + assert_tree(box, [ + ('p', 'Block', [ + ('p', 'Line', [ + ('p', 'Text', 'Hello '), + ('em', 'Block', [ + ('em', 'Line', [ + ('em', 'Text', 'World')])]), + ('p', 'Text', '!')])])]) + + +@assert_no_logs +def test_block_in_inline(): + box = parse(''' + +

    Lorem ipsum dolor sit + amet,conse''') + box = build.inline_in_block(box) + assert_tree(box, [ + ('body', 'Line', [ + ('p', 'InlineBlock', [ + ('p', 'Line', [ + ('p', 'Text', 'Lorem '), + ('em', 'Inline', [ + ('em', 'Text', 'ipsum '), + ('strong', 'Inline', [ + ('strong', 'Text', 'dolor '), + ('span', 'Block', [ # This block is "pulled up" + ('span', 'Line', [ + ('span', 'Text', 'sit')])]), + # No whitespace processing here. + ('strong', 'Text', '\n '), + ('span', 'Block', [ # This block is "pulled up" + ('span', 'Line', [ + ('span', 'Text', 'amet,')])])]), + ('span', 'Block', [ # This block is "pulled up" + ('span', 'Line', [ + ('em', 'Inline', [ + ('em', 'Text', 'conse'), + ('i', 'Block', [])])])])])])])])]) + + box = build.block_in_inline(box) + assert_tree(box, [ + ('body', 'Line', [ + ('p', 'InlineBlock', [ + ('p', 'Block', [ + ('p', 'Line', [ + ('p', 'Text', 'Lorem '), + ('em', 'Inline', [ + ('em', 'Text', 'ipsum '), + ('strong', 'Inline', [ + ('strong', 'Text', 'dolor ')])])])]), + ('span', 'Block', [ + ('span', 'Line', [ + ('span', 'Text', 'sit')])]), + ('p', 'Block', [ + ('p', 'Line', [ + ('em', 'Inline', [ + ('strong', 'Inline', [ + # Whitespace processing not done yet. + ('strong', 'Text', '\n ')])])])]), + ('span', 'Block', [ + ('span', 'Line', [ + ('span', 'Text', 'amet,')])]), + + ('p', 'Block', [ + ('p', 'Line', [ + ('em', 'Inline', [ + ('strong', 'Inline', [])])])]), + ('span', 'Block', [ + ('span', 'Block', [ + ('span', 'Line', [ + ('em', 'Inline', [ + ('em', 'Text', 'conse')])])]), + ('i', 'Block', []), + ('span', 'Block', [ + ('span', 'Line', [ + ('em', 'Inline', [])])])]), + ('p', 'Block', [ + ('p', 'Line', [ + ('em', 'Inline', [])])])])])]) + + +@assert_no_logs +def test_styles(): + box = parse(''' + +

    Lorem ipsum dolor sit + amet,consectetur

    ''') + box = build.inline_in_block(box) + box = build.block_in_inline(box) + + descendants = list(box.descendants()) + assert len(descendants) == 31 + assert descendants[0] == box + + for child in descendants: + # All boxes inherit the color + assert child.style['color'] == (0, 0, 1, 1) # blue + # Only non-anonymous boxes have margins + assert child.style['margin_top'] in ((0, 'px'), (42, 'px')) + + +@assert_no_logs +def test_whitespace(): + # TODO: test more cases + # http://www.w3.org/TR/CSS21/text.html#white-space-model + assert_tree(parse_all(''' +

    Lorem \t\r\n ipsum\t dolor + sit + amet + consectetur.

    +
    \t  foo\n
    +
    \t  foo\n
    +
    \t  foo\n
    + '''), [ + ('p', 'Block', [ + ('p', 'Line', [ + ('p', 'Text', 'Lorem ipsum '), + ('strong', 'Inline', [ + ('strong', 'Text', 'dolor '), + ('img', 'InlineReplaced', ''), + ('strong', 'Text', ' sit '), + ('span', 'Block', []), + ('em', 'Inline', [ + ('em', 'Text', 'amet ')]), + ('strong', 'Text', 'consectetur')]), + ('p', 'Text', '.')])]), + ('pre', 'Block', [ + ('pre', 'Line', [ + # pre + ('pre', 'Text', '\t foo\n')])]), + ('pre', 'Block', [ + ('pre', 'Line', [ + # pre-wrap + ('pre', 'Text', '\t foo\n')])]), + ('pre', 'Block', [ + ('pre', 'Line', [ + # pre-line + ('pre', 'Text', ' foo\n')])])]) + + +@assert_no_logs +@pytest.mark.parametrize('page_type, top, right, bottom, left', ( + (PageType(side='left', first=True, index=0, blank=None, name=None), + 20, 3, 3, 10), + (PageType(side='right', first=True, index=0, blank=None, name=None), + 20, 10, 3, 3), + (PageType(side='left', first=None, index=1, blank=None, name=None), + 10, 3, 3, 10), + (PageType(side='right', first=None, index=1, blank=None, name=None), + 10, 10, 3, 3), + (PageType(side='right', first=None, index=1, blank=None, name='name'), + 5, 10, 3, 15), + (PageType(side='right', first=None, index=2, blank=None, name='name'), + 5, 10, 1, 15), + (PageType(side='right', first=None, index=8, blank=None, name='name'), + 5, 10, 2, 15), +)) +def test_page_style(page_type, top, right, bottom, left): + document = FakeHTML(string=''' + + ''') + style_for = get_all_computed_styles(document) + + # Force the generation of the style for this page type as it's generally + # only done during the rendering. + set_page_type_computed_styles(page_type, document, style_for) + + style = style_for(page_type) + assert style['margin_top'] == (top, 'px') + assert style['margin_right'] == (right, 'px') + assert style['margin_bottom'] == (bottom, 'px') + assert style['margin_left'] == (left, 'px') + + +@assert_no_logs +def test_images_1(): + with capture_logs() as logs: + result = parse_all(''' +

    No srcInexistent src

    + ''') + assert len(logs) == 1 + assert 'ERROR: Failed to load image' in logs[0] + assert 'inexistent.jpg' in logs[0] + assert_tree(result, [ + ('p', 'Block', [ + ('p', 'Line', [ + ('img', 'InlineReplaced', ''), + ('img', 'Inline', [ + ('img', 'Text', 'No src')]), + ('img', 'Inline', [ + ('img', 'Text', 'Inexistent src')])])])]) + + +@assert_no_logs +def test_images_2(): + with capture_logs() as logs: + result = parse_all('

    No base_url', + base_url=None) + assert len(logs) == 1 + assert 'ERROR: Relative URI reference without a base URI' in logs[0] + assert_tree(result, [ + ('p', 'Block', [ + ('p', 'Line', [ + ('img', 'Inline', [ + ('img', 'Text', 'No base_url')])])])]) + + +@assert_no_logs +def test_tables_1(): + # Rules in http://www.w3.org/TR/CSS21/tables.html#anonymous-boxes + + # Rule 1.3 + # Also table model: http://www.w3.org/TR/CSS21/tables.html#model + assert_tree(parse_all(''' + + + foo + bar + + + + + + + top caption + + baz + + + '''), [ + ('x-table', 'Block', [ + ('x-caption', 'TableCaption', [ + ('x-caption', 'Line', [ + ('x-caption', 'Text', 'top caption')])]), + ('x-table', 'Table', [ + ('x-table', 'TableColumnGroup', [ + ('x-col', 'TableColumn', [])]), + ('x-thead', 'TableRowGroup', [ + ('x-thead', 'TableRow', [ + ('x-th', 'TableCell', [])])]), + ('x-table', 'TableRowGroup', [ + ('x-tr', 'TableRow', [ + ('x-th', 'TableCell', [ + ('x-th', 'Line', [ + ('x-th', 'Text', 'foo')])]), + ('x-th', 'TableCell', [ + ('x-th', 'Line', [ + ('x-th', 'Text', 'bar')])])])]), + ('x-thead', 'TableRowGroup', []), + ('x-table', 'TableRowGroup', [ + ('x-tr', 'TableRow', [ + ('x-td', 'TableCell', [ + ('x-td', 'Line', [ + ('x-td', 'Text', 'baz')])])])]), + ('x-tfoot', 'TableRowGroup', [])]), + ('x-caption', 'TableCaption', [])])]) + + +@assert_no_logs +def test_tables_2(): + # Rules 1.4 and 3.1 + assert_tree(parse_all(''' + foo + bar + '''), [ + ('body', 'Block', [ + ('body', 'Table', [ + ('body', 'TableRowGroup', [ + ('body', 'TableRow', [ + ('span', 'TableCell', [ + ('span', 'Line', [ + ('span', 'Text', 'foo')])]), + ('span', 'TableCell', [ + ('span', 'Line', [ + ('span', 'Text', 'bar')])])])])])])]) + + +@assert_no_logs +def test_tables_3(): + # http://www.w3.org/TR/CSS21/tables.html#anonymous-boxes + # Rules 1.1 and 1.2 + # Rule XXX (not in the spec): column groups have at least one column child + assert_tree(parse_all(''' + + 1 + + 2 + 3 + + 4 + + + '''), [ + ('body', 'Block', [ + ('body', 'Table', [ + ('span', 'TableColumnGroup', [ + ('em', 'TableColumn', [])]), + ('ins', 'TableColumnGroup', [ + ('ins', 'TableColumn', [])])])])]) + + +@assert_no_logs +def test_tables_4(): + # Rules 2.1 then 2.3 + assert_tree(parse_all('foo

    '), [ + ('x-table', 'Block', [ + ('x-table', 'Table', [ + ('x-table', 'TableRowGroup', [ + ('x-table', 'TableRow', [ + ('x-table', 'TableCell', [ + ('x-table', 'Block', [ + ('x-table', 'Line', [ + ('x-table', 'Text', 'foo ')])]), + ('div', 'Block', [])])])])])])]) + + +@assert_no_logs +def test_tables_5(): + # Rule 2.2 + assert_tree(parse_all('' + '
    '), [ + ('body', 'Block', [ + ('body', 'Table', [ + ('x-thead', 'TableRowGroup', [ + ('x-thead', 'TableRow', [ + ('x-thead', 'TableCell', [ + ('div', 'Block', [])]), + ('x-td', 'TableCell', [])])])])])]) + + +@assert_no_logs +def test_tables_6(): + # Rule 3.2 + assert_tree(parse_all(''), [ + ('body', 'Line', [ + ('span', 'Inline', [ + ('span', 'InlineBlock', [ + ('span', 'InlineTable', [ + ('span', 'TableRowGroup', [ + ('x-tr', 'TableRow', [])])])])])])]) + + +@assert_no_logs +def test_tables_7(): + # Rule 3.1 + # Also, rule 1.3 does not apply: whitespace before and after is preserved + assert_tree(parse_all(''' + + + + + '''), [ + ('body', 'Line', [ + ('span', 'Inline', [ + # Whitespace is preserved in table handling, then collapsed + # into a single space. + ('span', 'Text', ' '), + ('span', 'InlineBlock', [ + ('span', 'InlineTable', [ + ('span', 'TableRowGroup', [ + ('span', 'TableRow', [ + ('em', 'TableCell', []), + ('em', 'TableCell', [])])])])]), + ('span', 'Text', ' ')])])]) + + +@assert_no_logs +def test_tables_8(): + # Rule 3.2 + assert_tree(parse_all('\t'), [ + ('body', 'Block', [ + ('body', 'Table', [ + ('body', 'TableRowGroup', [ + ('x-tr', 'TableRow', []), + ('x-tr', 'TableRow', [])])])])]) + + +@assert_no_logs +def test_tables_9(): + assert_tree(parse_all('\n'), [ + ('body', 'Block', [ + ('body', 'Table', [ + ('body', 'TableColumnGroup', [ + ('x-col', 'TableColumn', [])]), + ('x-colgroup', 'TableColumnGroup', [ + ('x-colgroup', 'TableColumn', [])])])])]) + + +@assert_no_logs +def test_table_style(): + html = parse_all('
    ') + body, = html.children + wrapper, = body.children + table, = wrapper.children + assert isinstance(wrapper, boxes.BlockBox) + assert isinstance(table, boxes.TableBox) + assert wrapper.style['margin_top'] == (1, 'px') + assert wrapper.style['padding_top'] == (0, 'px') + assert table.style['margin_top'] == (0, 'px') + assert table.style['padding_top'] == (2, 'px') + + +@assert_no_logs +def test_column_style(): + html = parse_all(''' + + + +
    + ''') + body, = html.children + wrapper, = body.children + table, = wrapper.children + colgroup, = table.column_groups + widths = [col.style['width'] for col in colgroup.children] + assert widths == [(10, 'px'), (10, 'px'), (10, 'px'), 'auto', 'auto'] + assert [col.grid_x for col in colgroup.children] == [0, 1, 2, 3, 4] + # copies, not the same box object + assert colgroup.children[0] is not colgroup.children[1] + + +@assert_no_logs +def test_nested_grid_x(): + html = parse_all(''' + + + + + + + + +
    + ''') + body, = html.children + wrapper, = body.children + table, = wrapper.children + grid = [(colgroup.grid_x, [col.grid_x for col in colgroup.children]) + for colgroup in table.column_groups] + assert grid == [(0, [0, 1]), (2, [2, 3]), (4, [4, 5, 6]), (7, [7])] + + +@assert_no_logs +def test_colspan_rowspan_1(): + # +---+---+---+ + # | A | B | C | X + # +---+---+---+ + # | D | E | X + # +---+---+ +---+ + # | F ...| | | <-- overlap + # +---+---+---+ + + # | H | X X | G | + # +---+---+ + + + # | I | J | X | | + # +---+---+ +---+ + + # X: empty cells + html = parse_all(''' + + + + + + + + + + + +
    A B C +
    D E +
    F G +
    H +
    I J +
    + ''') + body, = html.children + wrapper, = body.children + table, = wrapper.children + group, = table.children + assert [[c.grid_x for c in row.children] for row in group.children] == [ + [0, 1, 2], + [0, 1], + [0, 3], + [0], + [0, 1], + ] + assert [[c.colspan for c in row.children] for row in group.children] == [ + [1, 1, 1], + [1, 2], + [2, 1], + [1], + [1, 1], + ] + assert [[c.rowspan for c in row.children] for row in group.children] == [ + [1, 1, 1], + [1, 2], + [1, 3], + [1], + [1, 1], + ] + + +@assert_no_logs +def test_colspan_rowspan_2(): + # A cell box cannot extend beyond the last row box of a table. + html = parse_all(''' + + + + + + + + +
    + ''') + body, = html.children + wrapper, = body.children + table, = wrapper.children + group, = table.children + assert [[c.grid_x for c in row.children] for row in group.children] == [ + [0, 1], + [1], + ] + assert [[c.colspan for c in row.children] for row in group.children] == [ + [1, 1], + [1], + ] + assert [[c.rowspan for c in row.children] for row in group.children] == [ + [2, 1], # Not 5 + [1], + ] + + +@assert_no_logs +def test_before_after_1(): + assert_tree(parse_all(''' + +

    +
    +
    + '''), [ + # No content in pseudo-element, no box generated + ('p', 'Block', []), + ('div', 'Block', []), + ('section', 'Block', [])]) + + +@assert_no_logs +def test_before_after_2(): + assert_tree(parse_all(''' + +

    c

    + '''), [ + ('p', 'Block', [ + ('p', 'Line', [ + ('p::before', 'Inline', [ + ('p::before', 'Text', 'ab')]), + ('p', 'Text', ' c '), + ('p::after', 'Inline', [ + ('p::after', 'Text', 'de')])])])]) + + +@assert_no_logs +def test_before_after_3(): + assert_tree(parse_all(''' + +

    some text

    + '''), [ + ('p', 'Block', [ + ('p', 'Line', [ + ('a', 'Inline', [ + ('a::before', 'Inline', [ + ('a::before', 'Text', '[some url] ')]), + ('a', 'Text', 'some text')])])])]) + + +@assert_no_logs +def test_before_after_4(): + assert_tree(parse_all(''' + +

    Lorem ipsum dolor sit amet

    + '''), [ + ('p', 'Block', [ + ('p', 'Line', [ + ('q', 'Inline', [ + ('q::before', 'Inline', [ + ('q::before', 'Text', '« ')]), + ('q', 'Text', 'Lorem ipsum '), + ('q', 'Inline', [ + ('q::before', 'Inline', [ + ('q::before', 'Text', '“ ')]), + ('q', 'Text', 'dolor'), + ('q::after', 'Inline', [ + ('q::after', 'Text', ' ”')])]), + ('q', 'Text', ' sit amet'), + ('q::after', 'Inline', [ + ('q::after', 'Text', ' »')])])])])]) + + +@assert_no_logs +def test_before_after_5(): + with capture_logs() as logs: + assert_tree(parse_all(''' + +

    c

    + '''), [ + ('p', 'Block', [ + ('p', 'Line', [ + ('p::before', 'Inline', [ + ('p::before', 'Text', 'a'), + ('p::before', 'InlineReplaced', ''), + ('p::before', 'Text', 'b')]), + ('p', 'Text', 'c')])])]) + assert len(logs) == 1 + assert 'nested-function(' in logs[0] + assert 'invalid value' in logs[0] + + +@assert_no_logs +def test_counters_1(): + assert_tree(parse_all(''' + +

    +

    +

    +

    +

    +

    +

    +

    +

    +

    +

    +

    '''), [ + ('p', 'Block', [ + ('p', 'Line', [ + ('p::before', 'Inline', [ + ('p::before', 'Text', counter)])])]) + for counter in '0 1 3 2 4 6 -11 -9 -7 44 46 48'.split()]) + + +@assert_no_logs +def test_counters_2(): + assert_tree(parse_all(''' +
      +
    1. +
    2. +
    3. +
      1. +
      2. +
      3. +
      4. +
    4. +
    5. +
    '''), [ + ('ol', 'Block', [ + ('li', 'Block', [ + ('li', 'Line', [ + ('li::marker', 'Inline', [ + ('li::marker', 'Text', '1. ')])])]), + ('li', 'Block', [ + ('li', 'Line', [ + ('li::marker', 'Inline', [ + ('li::marker', 'Text', '2. ')])])]), + ('li', 'Block', [ + ('li', 'Line', [ + ('li::marker', 'Inline', [ + ('li::marker', 'Text', '3. ')])])]), + ('li', 'Block', [ + ('li', 'Block', [ + ('li', 'Line', [ + ('li::marker', 'Inline', [ + ('li::marker', 'Text', '4. ')])])]), + ('ol', 'Block', [ + ('li', 'Block', [ + ('li', 'Line', [ + ('li::marker', 'Inline', [ + ('li::marker', 'Text', '1. ')])])]), + ('li', 'Block', [ + ('li', 'Line', [ + ('li::marker', 'Inline', [ + ('li::marker', 'Text', '1. ')])])]), + ('li', 'Block', [ + ('li', 'Line', [ + ('li::marker', 'Inline', [ + ('li::marker', 'Text', '2. ')])])])])]), + ('li', 'Block', [ + ('li', 'Line', [ + ('li::marker', 'Inline', [ + ('li::marker', 'Text', '5. ')])])])])]) + + +@assert_no_logs +def test_counters_3(): + assert_tree(parse_all(''' + +
    +

    +

    +

    +
    +

    '''), [ + ('div', 'Block', [ + ('p', 'Block', [ + ('p', 'Line', [ + ('p::marker', 'Inline', [ + ('p::marker', 'Text', '1. ')])])]), + ('p', 'Block', [ + ('p', 'Line', [ + ('p::marker', 'Inline', [ + ('p::marker', 'Text', '2. ')])])]), + ('p', 'Block', [ + ('p', 'Line', [ + ('p::marker', 'Inline', [ + ('p::marker', 'Text', '-55. ')])])])]), + ('p', 'Block', [ + ('p', 'Line', [ + ('p::marker', 'Inline', [ + ('p::marker', 'Text', '1. ')])])])]) + + +@assert_no_logs +def test_counters_4(): + assert_tree(parse_all(''' + + +

    +

    +

    +

    +
    +

    +
    + '''), [ + ('section', 'Block', [ + ('section', 'Block', [ + ('section', 'Line', [ + ('section::before', 'Inline', [])])]), + ('h1', 'Block', [ + ('h1', 'Line', [ + ('h1::before', 'Inline', [ + ('h1::before', 'Text', '1')])])]), + ('h1', 'Block', [ + ('h1', 'Line', [ + ('h1::before', 'Inline', [ + ('h1::before', 'Text', '2')])])]), + ('section', 'Block', [ + ('section', 'Block', [ + ('section', 'Line', [ + ('section::before', 'Inline', [])])]), + ('h1', 'Block', [ + ('h1', 'Line', [ + ('h1::before', 'Inline', [ + ('h1::before', 'Text', '2.1')])])]), + ('h1', 'Block', [ + ('h1', 'Line', [ + ('h1::before', 'Inline', [ + ('h1::before', 'Text', '2.2')])])])]), + ('h1', 'Block', [ + ('h1', 'Line', [ + ('h1::before', 'Inline', [ + ('h1::before', 'Text', '3')])])])])]) + + +@assert_no_logs +def test_counters_5(): + assert_tree(parse_all(''' + +
    + + Scope created now, deleted after the div + +
    +

    '''), [ + ('div', 'Block', [ + ('div', 'Line', [ + ('span', 'Inline', [ + ('span', 'Text', + 'Scope created now, deleted after the div ')])])]), + ('p', 'Block', [ + ('p', 'Line', [ + ('p::before', 'Inline', [ + ('p::before', 'Text', '0')])])])]) + + +@assert_no_logs +def test_counters_6(): + # counter-increment may interfere with display: list-item + assert_tree(parse_all(''' +

    '''), [ + ('p', 'Block', [ + ('p', 'Line', [ + ('p::marker', 'Inline', [ + ('p::marker', 'Text', '0. ')])])])]) + + +@assert_no_logs +def test_counters_7(): + # Test that counters are case-sensitive + # See https://github.com/Kozea/WeasyPrint/pull/827 + assert_tree(parse_all(''' + +

    +

    +

    '''), [ + ('p', 'Block', [ + ('p', 'Line', [ + ('p::before', 'Inline', [ + ('p::before', 'Text', counter)])])]) + for counter in '2.0 2.3 4.3'.split()]) + + +@assert_no_logs +def test_counters_8(): + assert_tree(parse_all(''' + +

    +

    '''), 2 * [ + ('p', 'Block', [ + ('p::before', 'Block', [ + ('p::marker', 'Block', [ + ('p::marker', 'Line', [ + ('p::marker', 'Text', '• ')])]), + ('p::before', 'Block', [ + ('p::before', 'Line', [ + ('p::before', 'Text', 'a')])])])])]) + + +@assert_no_logs +def test_counter_styles_1(): + assert_tree(parse_all(''' + +

    +

    +

    +

    +

    + '''), [ + ('p', 'Block', [ + ('p', 'Line', [ + ('p::before', 'Inline', [ + ('p::before', 'Text', counter)])])]) + for counter in '-- • ◦ ▪ -7'.split()]) + + +@assert_no_logs +def test_counter_styles_2(): + assert_tree(parse_all(''' + +

    +

    +

    +

    +

    +

    +

    +

    +

    +

    +

    +

    +

    +

    +

    +

    +

    +

    + '''), [ + ('p', 'Block', [ + ('p', 'Line', [ + ('p::before', 'Inline', [ + ('p::before', 'Text', counter)])])]) + for counter in '''-1986 -1985 -11 -10 -09 -08 -01 00 01 02 09 10 11 + 99 100 101 4135 4136'''.split()]) + + +@assert_no_logs +def test_counter_styles_3(): + # Same test as above, but short-circuit HTML and boxes + assert [counters.format(value, 'decimal-leading-zero') for value in [ + -1986, -1985, + -11, -10, -9, -8, + -1, 0, 1, 2, + 9, 10, 11, + 99, 100, 101, + 4135, 4136 + ]] == ''' + -1986 -1985 -11 -10 -09 -08 -01 00 01 02 09 10 11 + 99 100 101 4135 4136 + '''.split() + + +@assert_no_logs +def test_counter_styles_4(): + # Now that we’re confident that they do the same, use the shorter form. + + # http://test.csswg.org/suites/css2.1/20110323/html4/content-counter-007.htm + assert [counters.format(value, 'lower-roman') for value in [ + -1986, -1985, + -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 49, 50, + 389, 390, + 3489, 3490, 3491, + 4999, 5000, 5001 + ]] == ''' + -1986 -1985 -1 0 i ii iii iv v vi vii viii ix x xi xii + xlix l ccclxxxix cccxc mmmcdlxxxix mmmcdxc mmmcdxci + mmmmcmxcix 5000 5001 + '''.split() + + +@assert_no_logs +def test_counter_styles_5(): + # http://test.csswg.org/suites/css2.1/20110323/html4/content-counter-008.htm + assert [counters.format(value, 'upper-roman') for value in [ + -1986, -1985, + -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 49, 50, + 389, 390, + 3489, 3490, 3491, + 4999, 5000, 5001 + ]] == ''' + -1986 -1985 -1 0 I II III IV V VI VII VIII IX X XI XII + XLIX L CCCLXXXIX CCCXC MMMCDLXXXIX MMMCDXC MMMCDXCI + MMMMCMXCIX 5000 5001 + '''.split() + + +@assert_no_logs +def test_counter_styles_6(): + assert [counters.format(value, 'lower-alpha') for value in [ + -1986, -1985, + -1, 0, 1, 2, 3, 4, + 25, 26, 27, 28, 29, + 2002, 2003 + ]] == ''' + -1986 -1985 -1 0 a b c d y z aa ab ac bxz bya + '''.split() + + +@assert_no_logs +def test_counter_styles_7(): + assert [counters.format(value, 'upper-alpha') for value in [ + -1986, -1985, + -1, 0, 1, 2, 3, 4, + 25, 26, 27, 28, 29, + 2002, 2003 + ]] == ''' + -1986 -1985 -1 0 A B C D Y Z AA AB AC BXZ BYA + '''.split() + + +@assert_no_logs +def test_counter_styles_8(): + assert [counters.format(value, 'lower-latin') for value in [ + -1986, -1985, + -1, 0, 1, 2, 3, 4, + 25, 26, 27, 28, 29, + 2002, 2003 + ]] == ''' + -1986 -1985 -1 0 a b c d y z aa ab ac bxz bya + '''.split() + + +@assert_no_logs +def test_counter_styles_9(): + assert [counters.format(value, 'upper-latin') for value in [ + -1986, -1985, + -1, 0, 1, 2, 3, 4, + 25, 26, 27, 28, 29, + 2002, 2003 + ]] == ''' + -1986 -1985 -1 0 A B C D Y Z AA AB AC BXZ BYA + '''.split() + + +@assert_no_logs +def test_counter_styles_10(): + # http://test.csswg.org/suites/css2.1/20110323/html4/content-counter-009.htm + assert [counters.format(value, 'georgian') for value in [ + -1986, -1985, + -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 20, 30, 40, 50, 60, 70, 80, 90, 100, + 200, 300, 400, 500, 600, 700, 800, 900, 1000, + 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000, + 19999, 20000, 20001 + ]] == ''' + -1986 -1985 -1 0 ა + ბ გ დ ე ვ ზ ჱ თ ი ია იბ + კ ლ მ ნ ჲ ო პ ჟ რ + ს ტ ჳ ფ ქ ღ ყ შ ჩ + ც ძ წ ჭ ხ ჴ ჯ ჰ ჵ + ჵჰშჟთ 20000 20001 + '''.split() + + +@assert_no_logs +def test_counter_styles_11(): + # http://test.csswg.org/suites/css2.1/20110323/html4/content-counter-010.htm + assert [counters.format(value, 'armenian') for value in [ + -1986, -1985, + -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 20, 30, 40, 50, 60, 70, 80, 90, 100, + 200, 300, 400, 500, 600, 700, 800, 900, 1000, + 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, + 9999, 10000, 10001 + ]] == ''' + -1986 -1985 -1 0 Ա + Բ Գ Դ Ե Զ Է Ը Թ Ժ ԺԱ ԺԲ + Ի Լ Խ Ծ Կ Հ Ձ Ղ Ճ + Մ Յ Ն Շ Ո Չ Պ Ջ Ռ + Ս Վ Տ Ր Ց Ւ Փ Ք + ՔՋՂԹ 10000 10001 + '''.split() + + +@assert_no_logs +def test_margin_boxes(): + page_1, page_2 = render_pages(''' + +

    lorem ipsum + ''') + assert page_1.children[0].element_tag == 'html' + assert page_2.children[0].element_tag == 'html' + + margin_boxes_1 = [box.at_keyword for box in page_1.children[1:]] + margin_boxes_2 = [box.at_keyword for box in page_2.children[1:]] + assert margin_boxes_1 == ['@top-center', '@bottom-left', + '@bottom-left-corner'] + assert margin_boxes_2 == ['@top-center'] + + html, top_center = page_2.children + line_box, = top_center.children + text_box, = line_box.children + assert text_box.text == 'Title' + + +@assert_no_logs +def test_margin_box_string_set_1(): + # Test that both pages get string in the `bottom-center` margin box + page_1, page_2 = render_pages(''' + +

    first assignment

    +
    + ''') + + html, bottom_center = page_2.children + line_box, = bottom_center.children + text_box, = line_box.children + assert text_box.text == 'first assignment' + + html, bottom_center = page_1.children + line_box, = bottom_center.children + text_box, = line_box.children + assert text_box.text == 'first assignment' + + +@assert_no_logs +def test_margin_box_string_set_2(): + def simple_string_set_test(content_val, extra_style=""): + page_1, = render_pages(''' + +

    first assignment

    + ''' % dict(content_val=content_val, extra_style=extra_style)) + + html, top_center = page_1.children + line_box, = top_center.children + text_box, = line_box.children + if content_val in ('before', 'after'): + assert text_box.text == 'pseudo' + else: + assert text_box.text == 'first assignment' + + # Test each accepted value of `content()` as an arguemnt to `string-set` + for value in ('', 'text', 'before', 'after'): + if value in ('before', 'after'): + extra_style = "p:%s{content: 'pseudo'}" % value + simple_string_set_test(value, extra_style) + else: + simple_string_set_test(value) + + +@assert_no_logs +def test_margin_box_string_set_3(): + # Test `first` (default value) ie. use the first assignment on the page + page_1, = render_pages(''' + +

    first assignment

    +

    Second assignment

    + ''') + + html, top_center = page_1.children + line_box, = top_center.children + text_box, = line_box.children + assert text_box.text == 'first assignment' + + +@assert_no_logs +def test_margin_box_string_set_4(): + # test `first-except` ie. exclude from page on which value is assigned + page_1, page_2 = render_pages(''' + +

    first_excepted

    +
    + ''') + html, top_center = page_1.children + assert len(top_center.children) == 0 + + html, top_center = page_2.children + line_box, = top_center.children + text_box, = line_box.children + assert text_box.text == 'first_excepted' + + +@assert_no_logs +def test_margin_box_string_set_5(): + # Test `last` ie. use the most-recent assignment + page_1, = render_pages(''' + +

    String set

    +

    Second assignment

    + ''') + + html, top_center = page_1.children[:2] + line_box, = top_center.children + + text_box, = line_box.children + assert text_box.text == 'Second assignment' + + +@assert_no_logs +def test_margin_box_string_set_6(): + # Test multiple complex string-set values + page_1, = render_pages(''' + +