1
1
mirror of https://github.com/Kozea/WeasyPrint.git synced 2024-10-05 08:27:22 +03:00

Merge branch 'master' into media-queries

This commit is contained in:
Guillaume Ayoub 2019-01-24 18:36:53 +01:00
commit 857d889816
78 changed files with 3974 additions and 1780 deletions

View File

@ -1,11 +0,0 @@
[run]
branch = True
include = weasyprint/*
[report]
exclude_lines =
pragma: no cover
def __repr__
raise NotImplementedError
omit =
.*

3
.gitmodules vendored
View File

@ -1,3 +0,0 @@
[submodule "docs/sphinx_rtd_theme"]
path = docs/sphinx_rtd_theme
url = https://github.com/snide/sphinx_rtd_theme.git

View File

@ -1,3 +0,0 @@
[settings]
known_third_party=pytest
multi_line_output=4

View File

@ -12,6 +12,8 @@ matrix:
python: 3.5
- os: linux
python: 3.6
- dist: xenial
python: 3.7
- os: osx
language: generic
env: PYTHON_VERSION=3
@ -28,15 +30,15 @@ before_install:
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew tap caskroom/fonts; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew pin numpy gdal postgis; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew upgrade python; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew cask install font-dejavu-sans; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install cairo pango gdk-pixbuf libffi; fi
install:
# First line needed because of https://github.com/pypa/setuptools/pull/1089
# Needed because of https://github.com/pypa/setuptools/pull/1089
# Can be removed as soon as setuptools-36.2.0 is not the default version anymore on Travis with Python 3.4
- pip$PYTHON_VERSION install --upgrade setuptools
- pip$PYTHON_VERSION install --upgrade -e.[test]
script:
- python$PYTHON_VERSION setup.py test

View File

@ -1,8 +0,0 @@
recursive-include weasyprint/tests/resources *
include weasyprint/css/*.css
include CHANGES
include LICENSE
include pytest.ini
include tox.ini
recursive-include docs *
prune docs/_build

View File

@ -1,5 +1,184 @@
WeasyPrint changelog
====================
======
News
======
Version 44
----------
Released on 2018-12-29.
Bug fixes:
* `#742 <https://github.com/Kozea/WeasyPrint/issues/742>`_:
Don't crash during PDF generation when locale uses commas as decimal separator
* `#746 <https://github.com/Kozea/WeasyPrint/issues/746>`_:
Close file when reading VERSION
* Improve speed and memory usage for long texts.
Documentation:
* `#733 <https://github.com/Kozea/WeasyPrint/pull/733>`_:
Small documentation fixes
* `#735 <https://github.com/Kozea/WeasyPrint/pull/735>`_:
Fix broken links in NEWS.rst
Version 43
----------
Released on 2018-11-09.
Bug fixes:
* `#726 <https://github.com/Kozea/WeasyPrint/issues/726>`_:
Make empty strings clear previous values of named strings
* `#729 <https://github.com/Kozea/WeasyPrint/issues/729>`_:
Include tools in packaging
This version also includes the changes from unstable rc1 and rc2 versions
listed below.
Version 43rc2
-------------
Released on 2018-11-02.
**This version is experimental, don't use it in production. If you find bugs,
please report them!**
Bug fixes:
* `#706 <https://github.com/Kozea/WeasyPrint/issues/706>`_:
Fix text-indent at the beginning of a page
* `#687 <https://github.com/Kozea/WeasyPrint/issues/687>`_:
Allow query strings in file:// URIs
* `#720 <https://github.com/Kozea/WeasyPrint/issues/720>`_:
Optimize minimum size calculation of long inline elements
* `#717 <https://github.com/Kozea/WeasyPrint/issues/717>`_:
Display <details> tags as blocks
* `#691 <https://github.com/Kozea/WeasyPrint/issues/691>`_:
Don't recalculate max content widths when distributing extra space for tables
* `#722 <https://github.com/Kozea/WeasyPrint/issues/722>`_:
Fix bookmarks and strings set on images
* `#723 <https://github.com/Kozea/WeasyPrint/issues/723>`_:
Warn users when string() is not used in page margin
Version 43rc1
-------------
Released on 2018-10-15.
**This version is experimental, don't use it in production. If you find bugs,
please report them!**
Dependencies:
* Python 3.4+ is now needed, Python 2.x is not supported anymore
* Cairo 1.15.4+ is now needed, but 1.10+ should work with missing features
(such as links, outlines and metadata)
* Pdfrw is not needed anymore
New features:
* `Beautiful website <https://weasyprint.org>`_
* `#579 <https://github.com/Kozea/WeasyPrint/issues/579>`_:
Initial support of flexbox
* `#592 <https://github.com/Kozea/WeasyPrint/pull/592>`_:
Support @font-face on Windows
* `#306 <https://github.com/Kozea/WeasyPrint/issues/306>`_:
Add a timeout parameter to the URL fetcher functions
* `#594 <https://github.com/Kozea/WeasyPrint/pull/594>`_:
Split tests using modern pytest features
* `#599 <https://github.com/Kozea/WeasyPrint/pull/599>`_:
Make tests pass on Windows
* `#604 <https://github.com/Kozea/WeasyPrint/pull/604>`_:
Handle target counters and target texts
* `#631 <https://github.com/Kozea/WeasyPrint/pull/631>`_:
Enable counter-increment and counter-reset in page context
* `#622 <https://github.com/Kozea/WeasyPrint/issues/622>`_:
Allow pathlib.Path objects for HTML, CSS and Attachment classes
* `#674 <https://github.com/Kozea/WeasyPrint/issues/674>`_:
Add extensive installation instructions for Windows
Bug fixes:
* `#558 <https://github.com/Kozea/WeasyPrint/issues/558>`_:
Fix attachments
* `#565 <https://github.com/Kozea/WeasyPrint/issues/565>`_,
`#596 <https://github.com/Kozea/WeasyPrint/issues/596>`_,
`#539 <https://github.com/Kozea/WeasyPrint/issues/539>`_:
Fix many PDF rendering, printing and compatibility problems
* `#614 <https://github.com/Kozea/WeasyPrint/issues/614>`_:
Avoid crashes and endless loops caused by a Pango bug
* `#662 <https://github.com/Kozea/WeasyPrint/pull/662>`_:
Fix warnings and errors when generating documentation
* `#666 <https://github.com/Kozea/WeasyPrint/issues/666>`_,
`#685 <https://github.com/Kozea/WeasyPrint/issues/685>`_:
Fix many table layout rendering problems
* `#680 <https://github.com/Kozea/WeasyPrint/pull/680>`_:
Don't crash when there's no font available
* `#662 <https://github.com/Kozea/WeasyPrint/pull/662>`_:
Fix support of some align values in tables
Version 0.42.3
--------------
Released on 2018-03-27.
Bug fixes:
* `#583 <https://github.com/Kozea/WeasyPrint/issues/583>`_:
Fix floating-point number error to fix floating box layout
* `#586 <https://github.com/Kozea/WeasyPrint/issues/586>`_:
Don't optimize resume_at when splitting lines with trailing spaces
* `#582 <https://github.com/Kozea/WeasyPrint/issues/582>`_:
Fix table layout with no overflow
* `#580 <https://github.com/Kozea/WeasyPrint/issues/580>`_:
Fix inline box breaking function
* `#576 <https://github.com/Kozea/WeasyPrint/issues/576>`_:
Split replaced_min_content_width and replaced_max_content_width
* `#574 <https://github.com/Kozea/WeasyPrint/issues/574>`_:
Respect text direction and don't translate rtl columns twice
* `#569 <https://github.com/Kozea/WeasyPrint/issues/569>`_:
Get only first line's width of inline children to get linebox width
Version 0.42.2
--------------
Released on 2018-02-04.
Bug fixes:
* `#560 <https://github.com/Kozea/WeasyPrint/issues/560>`_:
Fix a couple of crashes and endless loops when breaking lines.
Version 0.42.1
--------------
Released on 2018-02-01.
Bug fixes:
* `#566 <https://github.com/Kozea/WeasyPrint/issues/566>`_:
Don't crash when using @font-config.
* `#567 <https://github.com/Kozea/WeasyPrint/issues/567>`_:
Fix text-indent with text-align: justify.
* `#465 <https://github.com/Kozea/WeasyPrint/issues/465>`_:
Fix string(\*, start).
* `#562 <https://github.com/Kozea/WeasyPrint/issues/562>`_:
Handle named pages with pseudo-class.
* `#507 <https://github.com/Kozea/WeasyPrint/issues/507>`_:
Fix running headers.
* `#557 <https://github.com/Kozea/WeasyPrint/issues/557>`_:
Avoid infinite loops in inline_line_width.
* `#555 <https://github.com/Kozea/WeasyPrint/issues/555>`_:
Fix margins, borders and padding in column layouts.
Version 0.42
------------
@ -678,7 +857,7 @@ Released on 2013-02-27.
- Correctly trim whitespace at the end of lines.
- Fix some cases with floats within inline content.
.. _text hyphenation: http://weasyprint.org/docs/features/#hyphenation
.. _text hyphenation: https://weasyprint.readthedocs.io/en/latest/features.html#css-text-module-level-3-4
Version 0.16
@ -686,8 +865,8 @@ Version 0.16
Released on 2012-12-13.
* Add the :obj:`zoom` parameter to :meth:`HTML.write_pdf` and
:meth:`Document.write_pdf() <weasyprint.document.Document.write_pdf>`
* Add the ``zoom`` parameter to ``HTML.write_pdf`` and
``Document.write_pdf() <weasyprint.document.Document.write_pdf>``
* Fix compatibility with old (and buggy) pycairo versions.
WeasyPrint is now tested on 1.8.8 in addition to the latest.
* Fix layout bugs related to line trailing spaces.
@ -700,7 +879,7 @@ Released on 2012-10-09.
* Add a low-level API that enables painting pages individually on any
cairo surface.
* **Backward-incompatible change**: remove the :meth:`HTML.get_png_pages`
* **Backward-incompatible change**: remove the ``HTML.get_png_pages``
method. The new low-level API covers this functionality and more.
* Add support for the ``font-stretch`` property.
* Add support for ``@page:blank`` to select blank pages.
@ -871,8 +1050,9 @@ Version 0.6
Released on 2012-02-29.
* *Backward incompatible*: completely change the Python API.
See the documentation: http://weasyprint.org/using/#as-a-python-library
* *Backward incompatible*: completely change the Python API. See the
documentation:
https://weasyprint.readthedocs.io/en/latest/tutorial.html#as-a-python-library
* *Backward incompatible*: Proper margin collapsing.
This changes how blocks are rendered: adjoining margins "collapse"
(their maximum is used) instead of accumulating.

View File

@ -1,19 +1,6 @@
==========
WeasyPrint
##########
.. image:: https://img.shields.io/pypi/l/WeasyPrint.svg?style=flat-square
:target: https://pypi.python.org/pypi/WeasyPrint/
:alt: License
.. image:: http://img.shields.io/pypi/v/WeasyPrint.svg?style=flat-square
:target: https://pypi.python.org/pypi/WeasyPrint/
:alt: Latest Version
.. image:: http://img.shields.io/travis/Kozea/WeasyPrint.svg?style=flat-square
:target: http://travis-ci.org/Kozea/WeasyPrint
:alt: Build status
|
==========
WeasyPrint is a visual rendering engine for HTML and CSS that can export
to PDF. It aims to support web standards for printing.
@ -25,6 +12,8 @@ pagination, and meant to be easy to hack on.
* Free software: BSD licensed
* Python 3.4+
* Website: http://weasyprint.org/
* Latest documentation: http://weasyprint.org/docs/
* Website: https://weasyprint.org/
* Documentation: https://weasyprint.readthedocs.io/
* Source code and issue tracker: https://github.com/Kozea/WeasyPrint
* Tests: https://travis-ci.org/Kozea/WeasyPrint
* Support: https://www.patreon.com/kozea

View File

@ -1,2 +1,2 @@
.. currentmodule:: weasyprint
.. include:: ../CHANGES
.. include:: ../NEWS.rst

View File

@ -1,8 +1,6 @@
# WeasyPrint documentation build configuration file.
import codecs
import os
import re
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
@ -19,22 +17,21 @@ source_suffix = '.rst'
master_doc = 'index'
# General information about the project.
project = u'WeasyPrint'
copyright = u'2011-2017, Simon Sapin and contributors, see AUTHORs'
project = 'WeasyPrint'
copyright = '2011-2018, Simon Sapin and contributors, see AUTHORs'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The full version, including alpha/beta/rc tags.
release = re.search("VERSION = '([^']+)'", codecs.open(
release = open(
os.path.join(
os.path.dirname(__file__), os.pardir, 'weasyprint', '__init__.py'),
encoding="utf-8",
).read().strip()).group(1)
os.path.dirname(__file__), os.pardir,
'weasyprint', 'VERSION')).read().strip()
# The short X.Y version.
version = '.'.join(release.split('.')[:2])
version = release
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
@ -58,7 +55,7 @@ html_logo = '_static/logo.png'
# The name of an image file (within the static path) to use as favicon of the
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
# pixels large.
html_favicon = 'icon.ico'
html_favicon = '_static/icon.ico'
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
@ -71,16 +68,16 @@ htmlhelp_basename = 'WeasyPrintdoc'
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
('index', 'weasyprint', u'WeasyPrint Documentation',
[u'Simon Sapin and contributors, see AUTHORs'], 1)
('index', 'weasyprint', 'WeasyPrint Documentation',
['Simon Sapin and contributors, see AUTHORs'], 1)
]
# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [(
'index', 'WeasyPrint', u'WeasyPrint Documentation',
u'Simon Sapin and contributors, see AUTHORs',
'index', 'WeasyPrint', 'WeasyPrint Documentation',
'Simon Sapin and contributors, see AUTHORs',
'WeasyPrint', 'One line description of project.',
'Miscellaneous'),
]
@ -88,6 +85,6 @@ texinfo_documents = [(
# Example configuration for intersphinx: refer to the Python standard library.
intersphinx_mapping = {
'python': ('http://docs.python.org/', None),
'pycairo': ('http://www.cairographics.org/documentation/pycairo/2/', None),
'cairocffi': ('http://cairocffi.readthedocs.io/', None),
'pycairo': ('https://pycairo.readthedocs.io/en/latest/', None),
'cairocffi': ('http://cairocffi.readthedocs.io/en/latest/', None),
}

View File

@ -81,10 +81,6 @@ Fonts
WeasyPrint can use any font that Pango can find installed on the system. Fonts
are automatically embedded in PDF files.
On Windows and macOS, Pango uses the native font-managing libraries. You can
use the tools provided by your OS to know which fonts are available. WeasyPrint
should support any font format that's supported by the operating system.
On Linux, Pango uses fontconfig to access fonts. You can list the available
fonts thanks to the ``fc-list`` command, and know which font is matched by a
given pattern thanks to ``fc-match``. Copying a font file into the
@ -93,6 +89,15 @@ install a new font. WeasyPrint should support `any font format handled by
FreeType <https://en.wikipedia.org/wiki/FreeType#File_formats>`_ (any format
widely used except WOFF2).
On Windows and macOS, **Pango >= 1.38** is required to use fontconfig and
FreeType like it does on Linux.
Both, ``fc-list`` and ``fc-match`` probably will be present, too.
Installing new fonts on your system as usual should make them available to Pango.
Otherwise (Pango < 1.38) on Windows and macOS, the native font-managing libraries are used.
You must then use the tools provided by your OS to know which fonts are available.
WeasyPrint should support any font format thats supported by the operating system.
CSS
---
@ -173,7 +178,7 @@ New properties defined in Level 3 are supported:
- the ``overflow-wrap`` property replacing ``word-wrap``;
- the ``full-width`` value of the ``text-transform`` property; and
- the ``tab-space`` property.
- the ``tab-size`` property.
Experimental_ properties controling hyphenation_ are supported by WeasyPrint:
@ -251,8 +256,7 @@ but that should not be a problem for common use.
The shorthand ``font`` and ``font-variant`` properties are supported.
WeasyPrint supports the ``@font-face`` rule on Linux and macOS, but does
**not** support it on Windows.
WeasyPrint supports the ``@font-face`` rule, provided that Pango >= 1.38 is installed.
WeasyPrint does **not** support the ``@font-feature-values`` rule and the
values of ``font-variant-alternates`` other than ``normal`` and
@ -281,14 +285,12 @@ All the features of this draft are available, including:
- the ``@page`` rule and the ``:left``, ``:right``, ``:first`` and ``:blank``
selectors;
- the page margin boxes;
- the page-based counters (with known bugs `#91`_, `#93`_, `#289`_);
- the page-based counters (with known limitations `#93`_);
- the page ``size``, ``bleed`` and ``marks`` properties;
- the named pages.
.. _CSS Paged Media Module Level 3: http://dev.w3.org/csswg/css3-page/
.. _#91: https://github.com/Kozea/WeasyPrint/issues/91
.. _#93: https://github.com/Kozea/WeasyPrint/issues/93
.. _#289: https://github.com/Kozea/WeasyPrint/issues/289
CSS Generated Content for Paged Media Module
@ -298,7 +300,7 @@ The `CSS Generated Content for Paged Media Module`_ (GCPM) is a working draft
defining "new properties and values, so that authors may bring new techniques
(running headers and footers, footnotes, leaders, bookmarks) to paged media".
Two features from this module have been implemented in WeasyPrint.
Three features from this module have been implemented in WeasyPrint.
.. _bookmarks:
@ -320,14 +322,32 @@ first or last element of a type present on a page, and display these strings in
page borders. This feature is really useful to add the title of the current
chapter at the top of the pages of a book for example.
The named strings can embed static strings, counters, tag contents and tag
attributes.
The named strings can embed static strings, counters, cross-references, tag contents
and tag attributes.
.. code-block:: css
@top-center { content: string(chapter); }
h2 { string-set: chapter "Current chapter: " content() }
The third feature is internal `Cross-references`_,
which makes it possible to retrieve counter or content values from
targets (anchors or ids) in the current document:
.. code-block:: css
a::after {
content: ", on page " target-counter(attr(href), page);
}
a::after {
content: ", see " target-text(attr(href));
}
In particular, ``target-counter()`` and ``target-text()`` are useful when
it comes to tables of contents,
see `an example <https://github.com/Kozea/WeasyPrint/pull/652#issuecomment-403276559>`_.
The other features of GCPM are **not** implemented:
- running elements (``running()`` and ``element()``);
@ -336,13 +356,12 @@ The other features of GCPM are **not** implemented:
``footnote-policy``);
- page selectors and page groups (``:nth()`` pseudo-class);
- leaders (``content: leader()``);
- cross-references (``target-counter()``, ``target-counters()`` and
``target-text()``);
- bookmark states (``bookmark-state``).
.. _CSS Generated Content for Paged Media Module: http://www.w3.org/TR/css-gcpm-3/
.. _PDF bookmarks: http://www.w3.org/TR/css-gcpm-3/#bookmarks
.. _Named strings: http://www.w3.org/TR/css-gcpm-3/#named-strings
.. _Cross-references: https://www.w3.org/TR/css-gcpm-3/#cross-references
.. _experimental: http://www.w3.org/TR/css-2010/#experimental
.. _user agent stylesheet: https://github.com/Kozea/WeasyPrint/blob/master/weasyprint/css/html5_ua.css
@ -350,14 +369,14 @@ The other features of GCPM are **not** implemented:
CSS Color Module Level 3
~~~~~~~~~~~~~~~~~~~~~~~~
The `CSS Color Module Level 3`_ is a recommandation defining "CSS properties
The `CSS Color Module Level 3`_ is a recommendation defining "CSS properties
which allow authors to specify the foreground color and opacity of an
element". Its main goal is to specify how colors are defined, including color
keywords and the ``#rgb``, ``#rrggbb``, ``rgb()``, ``rgba()``, ``hsl()``,
``hsla()`` syntaxes. Opacity and alpha compositing are also defined in this
document.
This recommandation is fully implemented in WeasyPrint, except the deprecated
This recommendation is fully implemented in WeasyPrint, except the deprecated
System Colors.
.. _CSS Color Module Level 3: http://www.w3.org/TR/css3-color/
@ -386,7 +405,7 @@ transformations (``matrix3d``, ``rotate(3d|X|Y|Z)``, ``translate(3d|Z)``,
CSS Backgrounds and Borders Module Level 3
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The `CSS Backgrounds and Borders Module Level 3`_ is a candidate recommandation
The `CSS Backgrounds and Borders Module Level 3`_ is a candidate recommendation
defining properties dealing "with the decoration of the border area and with
the background of the content, padding and border areas".
@ -425,7 +444,7 @@ CSS Image Values and Replaced Content Module Level 3 / 4
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The `Image Values and Replaced Content Module Level 3`_ is a candidate
recommandation introducing "additional ways of representing 2D images, for
recommendation introducing "additional ways of representing 2D images, for
example as a list of URIs denoting fallbacks, or as a gradient", defining
"several properties for manipulating raster images and for sizing or
positioning replaced elements" and "generic sizing algorithm for replaced
@ -456,7 +475,7 @@ CSS Basic User Interface Module Level 3
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The `CSS Basic User Interface Module Level 3`_ also known as CSS3 UI is a
candidate recommandation describing "CSS properties which enable authors to
candidate recommendation describing "CSS properties which enable authors to
style user interface related properties and values."
Only one new property defined in this document is implemented in WeasyPrint:

View File

@ -10,14 +10,14 @@ install the `development version`_ of WeasyPrint:
git clone git://github.com/Kozea/WeasyPrint.git
cd WeasyPrint
virtualenv --system-site-packages env
python3 -m venv env
. env/bin/activate
pip install Sphinx -e .[test]
pip install -e .[doc,test]
weasyprint --help
This will install WeasyPrint in “editable” mode (which means that you dont
need to re-install it every time you make a change in the source code) as well
as `pytest <http://pytest.org/>`_ and `Sphinx <http://sphinx.pocoo.org/>`_.
as `pytest <http://pytest.org/>`_ and `Sphinx <http://www.sphinx-doc.org/>`_.
Lastly, in order to pass unit tests, your system must have as default font any
font with a condensed variant (i.e. DejaVu) - typically installable via your
@ -49,23 +49,21 @@ Dive into the source
The rest of this document is a high-level overview of WeasyPrints source
code. For more details, see the various docstrings or even the code itself.
When in doubt, feel free to `ask <http://weasyprint.org/community>`_!
When in doubt, feel free to `ask <http://weasyprint.org/about/>`_!
Much like `in web browsers
<http://www.html5rocks.com/en/tutorials/internals/howbrowserswork/#The_main_flow>`_,
the rendering of a document in WeasyPrint goes like this:
1. The HTML document is fetched and parsed into a tree of elements (like DOM)
1. The HTML document is fetched and parsed into a tree of elements (like DOM).
2. CSS stylesheets (either found in the HTML or supplied by the user) are
fetched and parsed
3. The stylesheets are applied to the DOM tree
4. The DOM tree with styles is transformed into a *formatting structure* made of rectangular boxes.
fetched and parsed.
3. The stylesheets are applied to the DOM-like tree.
4. The DOM-like tree with styles is transformed into a *formatting structure*
made of rectangular boxes.
5. These boxes are *laid-out* with fixed dimensions and position onto pages.
6. For each page, the boxes:
- are re-ordered to observe stacking rules, and
- are drawn on a PDF page.
6. For each page, the boxes are re-ordered to observe stacking rules, and are
drawn on a PDF page.
7. Cairos PDF is modified to add metadata such as bookmarks and hyperlinks.
@ -82,6 +80,7 @@ CSS
As with HTML, CSS stylesheets are parsed in the :class:`weasyprint.CSS` class
with an external library, tinycss2_.
After the In addition to the actual parsing, the :mod:`weasyprint.css` and
:mod:`weasyprint.css.validation` modules do some pre-processing:
@ -90,10 +89,9 @@ After the In addition to the actual parsing, the :mod:`weasyprint.css` and
from raw tinycss2 tokens into a higher-level form.
* Shorthand properties are expanded. For example, ``margin`` becomes
``margin-top``, ``margin-right``, ``margin-bottom`` and ``margin-left``.
* Hyphens in property names are replaced by underscores (``margin-top``
becomes ``margin_top``) so that they can be used as Python attribute names
later on. This transformation is safe since none for the know (not ignored)
properties have an underscore character.
* Hyphens in property names are replaced by underscores (``margin-top`` becomes
``margin_top``). This transformation is safe since none of the known (not
ignored) properties have an underscore character.
* Selectors are pre-compiled with cssselect2_.
.. _tinycss2: https://pypi.python.org/pypi/tinycss2
@ -124,11 +122,10 @@ an absolute length or a percentage.
The final result of the :func:`~weasyprint.css.get_all_computed_styles`
function is a big dict where keys are ``(element, pseudo_element_type)``
tuples, and keys are style dict objects. Elements are
ElementTree elements, while the type of pseudo-element is a string
for eg. ``::first-line`` selectors, or :obj:`None` for “normal”
elements. Style dict objects are dicts with attribute read-only access
mapping property names to the computed values. (The return value is not the
tuples, and keys are style dict objects. Elements are ElementTree elements,
while the type of pseudo-element is a string for eg. ``::first-line``
selectors, or :obj:`None` for “normal” elements. Style dict objects are dicts
mapping property names to the computed values. (The return value is not the
dict itself, but a convenience :func:`style_for` function for accessing it.)
@ -152,13 +149,14 @@ see the module and class docstrings.
The :mod:`weasyprint.formatting_structure.build` module takes an ElementTree
tree with associated computed styles, and builds a formatting structure. It
generates the right boxes for each element and ensures they conform to the
models rules. (Eg. an inline box can not contain a block.) Each box has a
models rules (eg. an inline box can not contain a block). Each box has a
:attr:`.style` attribute containing the style dict of computed values.
The main logic is based on the ``display`` property, but it can be overridden
for some elements by adding a handler in the ``weasyprint.html`` module.
This is how ``<img>`` and ``<td colspan=3>`` are currently implemented,
for example.
This module is rather short as most of HTML is defined in CSS rather than
in Python, in the `user agent stylesheet`_.
@ -205,7 +203,7 @@ parts of the code.
The final result of the layout is a list of :class:`PageBox` objects.
.. [#] These are the coordinates *if* no `CSS transform`_ applies.
Transforms change the actual location of boxes, but they are applies
Transforms change the actual location of boxes, but they are applied
later during drawing and do not affect layout.
.. _used values: http://www.w3.org/TR/CSS21/cascade.html#used-value
.. _CSS transform: http://www.w3.org/TR/css3-transforms/
@ -232,6 +230,6 @@ The code lives in the :mod:`weasyprint.draw` module.
Metadata
........
Finally (step 8), the :mod:`weasyprint.pdf` module parses the PDF file
produced by cairo and makes appends to it to add meta-data:
internal and external hyperlinks, as well as outlines / bookmarks.
Finally (step 7), the :mod:`weasyprint.pdf` module parses (if needed) the PDF
file produced by cairo and adds metadata that cannot be added by cairo:
attachments, embedded files, trim box and bleed box.

View File

@ -3,22 +3,23 @@ Installing
WeasyPrint |version| depends on:
* CPython_ ≥ 3.4
* CPython_ ≥ 3.4.0
* cairo_ ≥ 1.15.4 [#]_
* Pango_ ≥ 1.38.0 [#]_
* setuptools_ ≥ 30.3.0 [#]_
* CFFI_ ≥ 0.6
* html5lib_ ≥ 0.999999999
* cairocffi_ ≥ 0.5
* cairocffi_ ≥ 0.9.0
* tinycss2_ ≥ 0.5
* cssselect2_ ≥ 0.1
* CairoSVG_ ≥ 1.0.20
* Pyphen_ ≥ 0.8
* pdfrw_ ≥ 0.4
* GDK-PixBuf_ ≥ 2.25.0 [#]_
.. _CPython: http://www.python.org/
.. _cairo: http://cairographics.org/
.. _Pango: http://www.pango.org/
.. _setuptools: https://pypi.org/project/setuptools/
.. _CFFI: https://cffi.readthedocs.io/
.. _html5lib: https://html5lib.readthedocs.io/
.. _cairocffi: https://cairocffi.readthedocs.io/
@ -26,7 +27,6 @@ WeasyPrint |version| depends on:
.. _cssselect2: https://cssselect2.readthedocs.io/
.. _CairoSVG: http://cairosvg.org/
.. _Pyphen: http://pyphen.org/
.. _pdfrw: https://github.com/pmaupin/pdfrw/
.. _GDK-PixBuf: https://live.gnome.org/GdkPixbuf
@ -34,16 +34,15 @@ Python, cairo, Pango and GDK-PixBuf need to be installed separately. See
platform-specific instructions for :ref:`Linux <linux>`, :ref:`macOS <macos>`
and :ref:`Windows <windows>` below.
Install WeasyPrint with pip_.
This will automatically install most of dependencies.
You probably need either virtualenv_ (recommended) or using ``sudo``.
Install WeasyPrint with pip_. This will automatically install most of
dependencies. You probably need either a virtual environment (venv,
recommended) or using ``sudo``.
.. _virtualenv: http://www.virtualenv.org/
.. _pip: http://pip-installer.org/
.. code-block:: sh
virtualenv ./venv
python3 -m venv ./venv
. ./venv/bin/activate
pip install WeasyPrint
@ -73,7 +72,7 @@ and open your browser at http://127.0.0.1:5000/.
If everything goes well, youre ready to :doc:`start using </tutorial>`
WeasyPrint! Otherwise, please copy the full error message and
`report the problem <http://weasyprint.org/community/>`_.
`report the problem <https://github.com/Kozea/WeasyPrint/issues/>`_.
.. [#] cairo ≥ 1.15.4 is best but older versions may work too. The test suite
passes on cairo 1.14, and passes with some tests marked as “expected
@ -81,11 +80,15 @@ WeasyPrint! Otherwise, please copy the full error message and
you get incomplete SVG renderings, please read `#339
<https://github.com/Kozea/WeasyPrint/issues/339>`_. If you get invalid
PDF files, please read `#565
<https://github.com/Kozea/WeasyPrint/issues/565>`_.
<https://github.com/Kozea/WeasyPrint/issues/565>`_. Some PDF metadata
including PDF information, hyperlinks and bookmarks require 1.15.4.
.. [#] pango ≥ 1.29.3 is required, but 1.38.0 is needed to handle `@font-face`
CSS rules.
.. [#] setuptools ≥ 30.3.0 is required to install WeasyPrint, but 39.2.0 is
required to build the package.
.. [#] Without it, PNG and SVG are the only supported image formats.
JPEG, GIF and others are not available.
@ -104,7 +107,7 @@ CFFI needs *libffi* with development files. On Debian, the package is called
``libffi-dev``.
If your favorite system is not listed here but you know the package names,
`tell us <http://weasyprint.org/community/>`_ so we can add it here.
`tell us <http://weasyprint.org/about/>`_ so we can add it here.
Debian / Ubuntu
~~~~~~~~~~~~~~~
@ -149,6 +152,16 @@ install it with pip after installing the following packages:
emerge pip setuptools wheel cairo pango gdk-pixbuf cffi
Alpine
~~~~~~
For Alpine Linux 3.6 or newer:
.. code-block:: sh
apk --update --upgrade add gcc musl-dev jpeg-dev zlib-dev libffi-dev cairo-dev pango-dev gdk-pixbuf-dev
.. _macos:
macOS
@ -164,6 +177,14 @@ official installation method relies on Homebrew:
Don't forget to use the `pip3` command to install WeasyPrint, as `pip` may be
using the version of Python installed with macOS.
If you get the `Fontconfig error: Cannot load default config file` message,
then try reinstalling fontconfig with the `universal` option:
.. code-block:: sh
brew uninstall fontconfig
brew install fontconfig --universal
You can also try with Macports, but please notice that this solution is not
tested and thus not recommended (**also known as "you're on your own and may
end up crying blood with sad dolphins for eternity"**):
@ -180,35 +201,264 @@ Windows
Dear Windows user, please follow these steps carefully.
Really carefully. Don't cheat.
Really carefully. Dont cheat.
**If you decide to install Python or GTK 32 bit on Windows 64 bit, you're on
your own, don't even try to report an issue, kittens will die because of you.**
Besides a proper Python installation and a few Python packages, WeasyPrint
needs the Pango, Cairo and GDK-PixBuf libraries. They are required for the
graphical stuff: Text and image rendering. These libraries aren't Python
packages. They are part of `GTK+ <https://en.wikipedia.org/wiki/GTK+>`_
(formerly known as GIMP Toolkit), and must be installed separately.
- Install `Python 3.6.x <https://www.python.org/downloads/release/python>`_
**with "Add Python 3.6 to PATH" checked**:
The following installation instructions for the GTK+ libraries don't work on
Windows XP. That means: Windows Vista or later is required.
- "Windows x86 executable installer" on Windows 32 bit,
- "Windows x86-64 executable installer" on Windows 64 bit,
Of course you can decide to install ancient WeasyPrint versions with an
erstwhile Python, combine it with outdated GTK+ libraries on any Windows
version you like, but if you decide to do that **youre on your own, dont even
try to report an issue, kittens will die because of you.**
- install GTK **with "Set up PATH environment variable to include GTK+"
checked**:
Step 1 - Install Python
~~~~~~~~~~~~~~~~~~~~~~~
- on Windows 32 bit: `gtk2-runtime-x.x.x-x-x-x-ash.exe
<http://gtk-win.sourceforge.net/home/index.php/Main/Downloads>`_,
- on Windows 64 bit: `gtk3-runtime-x.x.x-x-x-x-ts-win64.exe
<https://github.com/tschoonj/GTK-for-Windows-Runtime-Environment-Installer>`_,
Install the `latest Python 3.x <https://www.python.org/downloads/windows/>`_
- reboot,
- install `Visual C++ Build Tools
<https://landinghub.visualstudio.com/visual-cpp-build-tools>`_ as explained
in `Python's wiki <https://wiki.python.org/moin/WindowsCompilers>`_,
- install WeasyPrint with ``python -m pip install weasyprint``,
- test with ``python -m weasyprint http://weasyprint.org weasyprint.pdf``.
- On Windows 32 bit download the "Windows **x86** executable installer"
- On Windows 64 bit download the "Windows **x86-64** executable installer"
Follow the `instructions <https://docs.python.org/3/using/windows.html>`_.
You may customize your installation as you like, but we suggest that you
"Add Python 3.x to PATH" for convenience and let the installer "install pip".
Step 2 - Update pip and setuptools packages
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Python is bundled with modules that may have been updated since the release.
Please open a *Command Prompt* and execute the following command:
.. code-block:: console
python -m pip install --upgrade pip setuptools
Step 3 - Install WeasyPrint
~~~~~~~~~~~~~~~~~~~~~~~~~~~
In the console window execute the following command to install the WeasyPrint
package:
.. code-block:: console
python -m pip install WeasyPrint
Step 4 - Install the GTK+ libraries
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
There's one thing you **must** observe:
- If your Python is 32 bit you must use the 32 bit versions of those libraries.
- If your Python is 64 bit you must use the 64 bit versions of those libraries.
If you mismatch the bitness, the warning about kittens dying applies.
In case you forgot which Python you installed, ask Python (in the console
window):
.. code-block:: console
python --version --version
Having installed Python 64 bit you can either use the :ref:`GTK+ 64 Bit
Installer <gtk64installer>` or install the 64-bit :ref:`GTK+ via MSYS2
<msys2_gtk>`.
On Windows 32 bit or if you decided to install Python 32 bit on your Windows 64
bit machine you'll have to install the 32-bit :ref:`GTK+ via MSYS2
<msys2_gtk>`.
.. note::
Installing those libraries doesn't mean something extraordinary. It only
means that the files must be on your computer and WeasyPrint must be able
to find them, which is achieved by putting the path-to-the-libs into your
Windows ``PATH``.
.. _msys2_gtk:
Install GTK+ with the aid of MSYS2
""""""""""""""""""""""""""""""""""
Sadly the `GTK+ Runtime for 32 bit Windows
<https://gtk-win.sourceforge.io/home/index.php/Main/Home>`_ was discontinued in
April 2017. Since then developers are advised to either bundle GTK+ with their
software (which is beyond the capacities of the WeasyPrint maintainers) or
install it through the `MSYS2 project <https://msys2.github.io/>`_.
With the help of MSYS2, both the 32 bit as well as the 64 bit GTK+ can be
installed. If you installed the 64 bit Python and don't want to bother with
MSYS2, then go ahead and use the :ref:`GTK+ 64 Bit Installer <gtk64installer>`.
MSYS2 is a development environment. We (somehow) mis-use it to only supply the
up-to-date GTK+ runtime library files in a subfolder we can inject into our
``PATH``. But maybe you get interested in the full powers of MSYS2. It's the
perfect tool for experimenting with `MinGW
<https://en.wikipedia.org/wiki/MinGW>`_ and cross-platform development -- look
at its `wiki <https://github.com/msys2/msys2/wiki>`_.
Ok, let's install GTK3+.
* Download and run the `MSYS2 installer <http://www.msys2.org/>`_
- On 32 bit Windows: "msys2-**i686**-xxxxxxxx.exe"
- On 64 bit Windows: "msys2-**x86_64**-xxxxxxxx.exe"
You alternatively may download a zipped archive, unpack it and run
``msys2_shell.cmd`` as described in the `MSYS2 wiki
<https://github.com/msys2/msys2/wiki/MSYS2-installation>`_.
* Update the MSYS2 shell with
.. code-block:: console
pacman -Syuu
Close the shell by clicking the close button in the upper right corner of the window.
* Restart the MSYS2 shell. Repeat the command
.. code-block:: console
pacman -Su
until it says that there are no more packages to update.
* Install the GTK+ package and its dependencies.
To install the 32 bit (**i686**) GTK run the following command:
.. code-block:: console
pacman -S mingw-w64-i686-gtk3
The command for the 64 bit (**x86_64**) version is:
.. code-block:: console
pacman -S mingw-w64-x86_64-gtk3
The **x86_64** package cannot be installed in the 32 bit MSYS2!
* Close the shell:
.. code-block:: console
exit
* Now that all the GTK files needed by WeasyPrint are in the ``.\mingw32``
respectively in the ``.\mingw64`` subfolder of your MSYS2 installation directory,
we can (and must) make them accessible by injecting the appropriate folder into the
``PATH``.
Let's assume you installed MSYS2 in ``C:\msys2``. Then the folder to inject is:
* ``C:\msys2\mingw32\bin`` for the 32 bit GTK+
* ``C:\msys2\mingw64\bin`` for the 64 bit GTK+
You can either persist it through *Advanced System Settings* -- if you don't
know how to do that, read `How to set the path and environment variables in
Windows <https://www.computerhope.com/issues/ch000549.htm>`_ -- or
temporarily inject the folder before you run WeasyPrint.
.. _gtk64installer:
GTK+ 64 Bit Installer
""""""""""""""""""""""
If your Python is 64 bit you can use an installer extracted from MSYS2
and provided by Tom Schoonjans.
* Download and run the latest `gtk3-runtime-x.x.x-x-x-x-ts-win64.exe
<https://github.com/tschoonj/GTK-for-Windows-Runtime-Environment-Installer>`_
* If you prefer to manage your ``PATH`` environment varaiable yourself you
should uncheck "Set up PATH environment variable to include GTK+" and supply
it later -- either persist it through *Advanced System Settings* or
temporarily inject it before you run WeasyPrint.
.. note::
Checking the option doesn't insert the GTK-path at the beginning of your
system ``PATH``, but rather **appends** it. If there is alread another
(outdated) GTK on your ``PATH`` this will lead to unpleasant problems.
In any case: When executing WeasyPrint the GTK libraries must be on its ``PATH``.
Step 5 - Run WeasyPrint
~~~~~~~~~~~~~~~~~~~~~~~
Now that everything is in place you can test WeasyPrint.
Open a fresh *Command Prompt* and execute
.. code-block:: console
python -m weasyprint http://weasyprint.org weasyprint.pdf
If you get an error like ``OSError: dlopen() failed to load a library: cairo /
cairo-2`` it's because Cairo (or the library given in your error) is not
available in one of the folders listed in your ``PATH`` environment
variable. Reinstalling GTK (and carefully reading the warnings above) will
probably solve your problem. You can also find extra help in `this bug report
<https://github.com/Kozea/WeasyPrint/issues/589>`_.
cairo-2`` its probably because Cairo (or another GTK+ library mentioned in the
error message) is not properly available in the folders listed in your ``PATH``
environment variable.
Since you didn't cheat and followed the instructions the up-to-date and
complete set of GTK libraries **must** be present and the error is an error.
Lets find out. Enter the following command:
.. code-block:: console
WHERE libcairo-2.dll
This should respond with
*path\\to\\recently\\installed\\gtk\\binaries\\libcairo-2.dll*, for example:
.. code-block:: console
C:\msys2\mingw64\bin\libcairo-2.dll
If your system answers with *nothing found* or returns a filename not related
to your recently-installed-gtk or lists more than one location and the first
file in the list isn't actually in a subfolder of your recently-installed-gtk,
then we have caught the culprit.
Depending on the GTK installation route you took, the proper folder name is
something along the lines of:
* ``C:\msys2\mingw32\bin``
* ``C:\msys2\mingw64\bin``
* ``C:\Program Files\GTK3-Runtime Win64\bin``
Determine the correct folder and execute the following commands, replace
``<path-to-recently-installed-gtk>`` accordingly:
.. code-block:: console
SET PROPER_GTK_FOLDER=<path-to-recently-installed-gtk>
SET PATH=%PROPER_GTK_FOLDER%;%PATH%
This puts the appropriate GTK at the beginning of your ``PATH`` and
it's files are the first found when WeasyPrint requires them.
Call WeasyPrint again:
.. code-block:: console
python -m weasyprint http://weasyprint.org weasyprint.pdf
If the error is gone you should either fix your ``PATH`` permanently (via
*Advanced System Settings*) or execute the above ``SET PATH`` command by
default (once!) before you start using WeasyPrint.
If the error still occurs and if you really didn't cheat then you are allowed
to open a `new issue <https://github.com/Kozea/WeasyPrint/issues/new>`_. You
can also find extra help in this `bug report
<https://github.com/Kozea/WeasyPrint/issues/589>`_. If you cheated, then, you
know: Kittens already died.

View File

@ -28,6 +28,28 @@ If you have many documents to convert you may prefer using the Python API
in long-lived processes to avoid paying the start-up costs every time.
Adjusting Document Dimensions
.............................
Currently, WeasyPrint does not provide support for adjusting page size
or document margins via command-line flags. This is best accomplished
with the CSS ``@page`` at-rule. Consider the following example:
.. code-block:: css
@page {
size: Letter; /* Change from the default size of A4 */
margin: 2.5cm; /* Set margin on each page */
}
There is much more which can be achieved with the ``@page`` at-rule,
such as page numbers, headers, etc. Read more about the page_ at-rule,
and find an example here_.
.. _page: https://developer.mozilla.org/en-US/docs/Web/CSS/@page
.. _here: https://weasyprint.org
As a Python library
-------------------
.. currentmodule:: weasyprint
@ -81,7 +103,7 @@ that, although the argument must be named:
.. code-block:: python
from weasyprint import HTML
from weasyprint import HTML, CSS
# HTML('<h1>foo') would be filename
HTML(string='''
@ -250,23 +272,10 @@ by configuring the ``weasyprint`` logger object:
logger = logging.getLogger('weasyprint')
logger.addHandler(logging.FileHandler('/path/to/weasyprint.log'))
The ``INFO`` level is used to report the rendering progress. It is useful to
get feedback when WeasyPrint is launched in a terminal (using the ``--verbose``
option), or to give this feedback to end users when used as a library. To catch
these logs, you can for example use a filter:
.. code-block:: python
import logging
class LoggerFilter(logging.Filter):
def filter(self, record):
if record.level == logging.INFO:
print(record.getMessage())
return False
logger = logging.getLogger('weasyprint')
logger.addFilter(LoggerFilter())
The ``weasyprint.progress`` logger is used to report the rendering progress. It
is useful to get feedback when WeasyPrint is launched in a terminal (using the
``--verbose`` or ``--debug`` option), or to give this feedback to end users
when used as a library.
See the documentation of the :mod:`logging` module for details.
@ -321,7 +330,7 @@ WeasyPrint as a PNG image. Start it with:
Security
--------
When used with untrusted HTMl or untrusted CSS, WeasyPrint can meet security
When used with untrusted HTML or untrusted CSS, WeasyPrint can meet security
problems. You will need extra configuration in your Python application to avoid
high memory use, endless renderings or local files leaks.

View File

@ -1,3 +1,75 @@
[metadata]
name = WeasyPrint
url = https://weasyprint.org/
version = file: weasyprint/VERSION
license = BSD
description = The Gorgeous Document Factory
long_description = file: README.rst
long_description_content_type = text/x-rst
author = Simon Sapin
author_email = community@kozea.fr
platforms =
Linux
macOS
Windows
keywords =
html
css
pdf
converter
classifiers =
Development Status :: 5 - Production/Stable
Intended Audience :: Developers
License :: OSI Approved :: BSD License
Programming Language :: Python :: 3
Programming Language :: Python :: 3.4
Programming Language :: Python :: 3.5
Programming Language :: Python :: 3.6
Programming Language :: Python :: 3.7
Topic :: Internet :: WWW/HTTP
Topic :: Text Processing :: Markup :: HTML
Topic :: Multimedia :: Graphics :: Graphics Conversion
Topic :: Printing
[options]
packages = find:
zip_safe = false
setup_requires = pytest-runner
install_requires =
cffi>=0.6
html5lib>=0.999999999
cairocffi>=0.9.0
tinycss2>=0.5
cssselect2>=0.1
CairoSVG>=1.0.20
Pyphen>=0.8
tests_require =
pytest-runner
pytest-cov
pytest-flake8
pytest-isort
[options.entry_points]
console-scripts = weasyprint = weasyprint.__main__:main
[options.package_data]
weasyprint = VERSION
weasyprint.tests = resources/*.*, resources/*/*
weasyprint.css = *.css
[options.extras_require]
doc =
sphinx
sphinx_rtd_theme
test =
pytest-runner
pytest-cov
pytest-flake8
pytest-isort
[bdist_wheel]
python-tag = py3
[build_sphinx]
source-dir = docs
build-dir = docs/_build
@ -6,5 +78,21 @@ build-dir = docs/_build
test = pytest
[tool:pytest]
addopts = --cov --flake8 --isort
addopts = --flake8 --isort
norecursedirs = build dist .cache .eggs .git
[coverage:run]
branch = True
include = weasyprint/*
[coverage:report]
exclude_lines =
pragma: no cover
def __repr__
raise NotImplementedError
omit =
.*
[isort]
default_section = THIRDPARTY
multi_line_output = 4

View File

@ -6,83 +6,18 @@
WeasyPrint converts web documents to PDF.
:copyright: Copyright 2011-2014 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
import re
import sys
from os import path
from setuptools import find_packages, setup
from setuptools import setup
if sys.version_info.major < 3:
raise RuntimeError(
'WeasyPrint does not support Python 2.x anymore. '
'Please use Python 3 or install an older version of WeasyPrint.')
VERSION = re.search(b"VERSION = '([^']+)'", open(
path.join(path.dirname(__file__), 'weasyprint', '__init__.py'), 'rb',
).read().strip()).group(1).decode('ascii')
LONG_DESCRIPTION = open(path.join(path.dirname(__file__), 'README.rst')).read()
REQUIREMENTS = [
# XXX: Keep this in sync with docs/install.rst
'html5lib>=0.999999999',
'tinycss2>=0.5',
'cssselect2>=0.1',
'cffi>=0.6',
'cairocffi>=0.5',
'Pyphen>=0.8',
'pdfrw>=0.4',
'CairoSVG>=1.0.20',
# C dependencies: Gdk-Pixbuf (optional), Pango, cairo.
]
needs_pytest = {'pytest', 'test', 'ptr'}.intersection(sys.argv)
pytest_runner = ['pytest-runner'] if needs_pytest else []
setup(
name='WeasyPrint',
version=VERSION,
url='http://weasyprint.org/',
license='BSD',
description='WeasyPrint converts web documents to PDF.',
long_description=LONG_DESCRIPTION,
author='Simon Sapin',
author_email='simon.sapin@kozea.fr',
classifiers=[
'Development Status :: 5 - Production/Stable',
'Intended Audience :: Developers',
'License :: OSI Approved :: BSD License',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Topic :: Internet :: WWW/HTTP',
'Topic :: Text Processing :: Markup :: HTML',
'Topic :: Multimedia :: Graphics :: Graphics Conversion',
'Topic :: Printing',
],
packages=find_packages(),
package_data={
'weasyprint.tests': ['resources/*.*', 'resources/*/*'],
'weasyprint.css': ['*.css']},
zip_safe=False,
install_requires=REQUIREMENTS,
setup_requires=pytest_runner,
test_suite='weasyprint.tests',
tests_require=[
'pytest-runner', 'pytest-cov', 'pytest-flake8', 'pytest-isort'],
extras_require={
'test': [
'pytest-runner', 'pytest-cov', 'pytest-flake8', 'pytest-isort']},
entry_points={
'console_scripts': [
'weasyprint = weasyprint.__main__:main',
],
},
)
setup()

View File

@ -1,5 +0,0 @@
#!/usr/bin/env python
from weasyprint.__main__ import main
if __name__ == '__main__':
main()

1
weasyprint/VERSION Normal file
View File

@ -0,0 +1 @@
44

View File

@ -7,12 +7,14 @@
The public API is what is accessible from this "root" packages
without importing sub-modules.
:copyright: Copyright 2011-2014 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
import os
import sys
from pathlib import Path
import contextlib
import html5lib
@ -25,8 +27,20 @@ if sys.version_info.major < 3:
'WeasyPrint does not support Python 2.x anymore. '
'Please use Python 3 or install an older version of WeasyPrint.')
VERSION = '0.42'
__version__ = VERSION
if hasattr(sys, 'frozen'):
if hasattr(sys, '_MEIPASS'):
# Frozen with PyInstaller
# See https://github.com/Kozea/WeasyPrint/pull/540
ROOT = sys._MEIPASS
else:
# Frozen with something else (py2exe, etc.)
# See https://github.com/Kozea/WeasyPrint/pull/269
ROOT = os.path.dirname(sys.executable)
else:
ROOT = os.path.dirname(__file__)
with open(os.path.join(ROOT, 'VERSION')) as fp:
VERSION = __version__ = fp.read().strip()
# Used for 'User-Agent' in HTTP and 'Creator' in PDF
VERSION_STRING = 'WeasyPrint %s (http://weasyprint.org/)' % VERSION
@ -38,7 +52,7 @@ __all__ = ['HTML', 'CSS', 'Attachment', 'Document', 'Page',
# Import after setting the version, as the version is used in other modules
from .urls import (fetch, default_url_fetcher, path2url, ensure_url,
url_is_absolute) # noqa
from .logger import LOGGER # noqa
from .logger import LOGGER, PROGRESS_LOGGER # noqa
# Some imports are at the end of the file (after the CSS class)
# to work around circular imports.
@ -57,7 +71,7 @@ class HTML(object):
absolute.
:param url: An absolute, fully qualified URL.
:param file_obj: A file-like: any object with a :meth:`~file.read` method.
:param string: A string of HTML source. (This argument must be named.)
:param string: A string of HTML source.
Specifying multiple inputs is an error:
``HTML(filename="foo.html", url="localhost://bar.html")``
@ -82,7 +96,7 @@ class HTML(object):
def __init__(self, guess=None, filename=None, url=None, file_obj=None,
string=None, encoding=None, base_url=None,
url_fetcher=default_url_fetcher, media_type='print'):
LOGGER.info(
PROGRESS_LOGGER.info(
'Step 1 - Fetching and parsing HTML - %s',
guess or filename or url or
getattr(file_obj, 'name', 'HTML string'))
@ -237,8 +251,8 @@ class HTML(object):
class CSS(object):
"""Represents a CSS stylesheet parsed by tinycss2.
An instance is created in the same way as :class:`HTML`, except that
the ``tree`` argument is not available. All other arguments are the same.
An instance is created in the same way as :class:`HTML`, with the same
arguments.
An additional argument called ``font_config`` must be provided to handle
``@font-config`` rules. The same ``fonts.FontConfiguration`` object must be
@ -254,7 +268,7 @@ class CSS(object):
url_fetcher=default_url_fetcher, _check_mime_type=False,
media_type='print', font_config=None, matcher=None,
page_rules=None):
LOGGER.info(
PROGRESS_LOGGER.info(
'Step 2 - Fetching and parsing CSS - %s',
filename or url or getattr(file_obj, 'name', 'CSS string'))
result = _select_source(
@ -323,6 +337,8 @@ def _select_source(guess=None, filename=None, url=None, file_obj=None,
elif guess is not None:
if hasattr(guess, 'read'):
type_ = 'file_obj'
elif isinstance(guess, Path):
type_ = 'filename'
elif url_is_absolute(guess):
type_ = 'url'
else:
@ -334,6 +350,8 @@ def _select_source(guess=None, filename=None, url=None, file_obj=None,
with result as result:
yield result
elif filename is not None:
if isinstance(filename, Path):
filename = str(filename)
if base_url is None:
base_url = path2url(filename)
with open(filename, 'rb') as file_obj:

View File

@ -4,7 +4,7 @@
Command-line interface to WeasyPrint.
:copyright: Copyright 2011-2014 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
@ -44,20 +44,20 @@ def main(argv=None, stdout=None, stdin=None):
:ref:`stylesheet-origins`) to add to the document
(e.g. ``-s print.css``). Multiple stylesheets are allowed.
.. option:: -m <type>, --media-type <type>
Set the media type to use for ``@media``. Defaults to ``print``.
.. option:: -r <dpi>, --resolution <dpi>
For PNG output only. Set the resolution in PNG pixel per CSS inch.
Defaults to 96, which means that PNG pixels match CSS pixels.
.. option:: --base-url <URL>
.. option:: -u <URL>, --base-url <URL>
Set the base for relative URLs in the HTML input.
Defaults to the inputs own URL, or the current directory for stdin.
.. option:: -m <type>, --media-type <type>
Set the media type to use for ``@media``. Defaults to ``print``.
.. option:: -a <file>, --attachment <file>
Adds an attachment to the document. The attachment is
@ -68,6 +68,14 @@ def main(argv=None, stdout=None, stdin=None):
Follow HTML presentational hints.
.. option:: -v, --verbose
Show warnings and information messages.
.. option:: -d, --debug
Show debugging messages.
.. option:: --version
Show the version number. Other options and arguments are ignored.
@ -95,7 +103,7 @@ def main(argv=None, stdout=None, stdin=None):
parser.add_argument('-r', '--resolution', type=float,
help='PNG only: the resolution in pixel per CSS inch. '
'Defaults to 96, one PNG pixel per CSS pixel.')
parser.add_argument('--base-url',
parser.add_argument('-u', '--base-url',
help='Base for relative URLs in the HTML input. '
"Defaults to the input's own filename or URL "
'or the current directory for stdin.')
@ -105,7 +113,9 @@ def main(argv=None, stdout=None, stdin=None):
parser.add_argument('-p', '--presentational-hints', action='store_true',
help='Follow HTML presentational hints.')
parser.add_argument('-v', '--verbose', action='store_true',
help='Show various debugging information.')
help='Show warnings and information messages.')
parser.add_argument('-d', '--debug', action='store_true',
help='Show debugging messages.')
parser.add_argument(
'input', help='URL or filename of the HTML input, or - for stdin')
parser.add_argument(
@ -162,8 +172,10 @@ def main(argv=None, stdout=None, stdin=None):
parser.error('--attachment only applies for the PDF format.')
# Default to logging to stderr.
if args.verbose:
if args.debug:
LOGGER.setLevel(logging.DEBUG)
elif args.verbose:
LOGGER.setLevel(logging.INFO)
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter('%(levelname)s: %(message)s'))
LOGGER.addHandler(handler)

View File

@ -12,12 +12,13 @@
:func:`get_all_computed_styles` function does everything, but it is itsef
based on other functions in this module.
:copyright: Copyright 2011-2014 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
from collections import namedtuple
from logging import DEBUG, WARNING
import cssselect2
import tinycss2
@ -27,7 +28,7 @@ from .properties import INITIAL_NOT_COMPUTED
from .utils import remove_whitespace
from .validation import preprocess_declarations
from .validation.descriptors import preprocess_descriptors
from ..logger import LOGGER
from ..logger import LOGGER, PROGRESS_LOGGER
from ..urls import get_url_attribute, url_join, URLFetchingError
from .. import CSS
@ -61,7 +62,7 @@ class StyleFor:
# values: a PropertyValue-like object
self._computed_styles = computed_styles = {}
LOGGER.info('Step 3 - Applying CSS')
PROGRESS_LOGGER.info('Step 3 - Applying CSS')
for specificity, attributes in find_style_attributes(
html.etree_element, presentational_hints, html.base_url):
element, declarations, base_url = attributes
@ -369,8 +370,11 @@ def find_style_attributes(tree, presentational_hints=False, base_url=None):
element, style_attribute)
elif element.tag in ('tr', 'td', 'th', 'thead', 'tbody', 'tfoot'):
align = element.get('align', '').lower()
if align in ('left', 'right', 'justify'):
# TODO: we should align descendants too
# TODO: we should align descendants too
if align == 'middle':
yield specificity, check_style_attribute(
element, 'text-align:center')
elif align in ('center', 'left', 'right', 'justify'):
yield specificity, check_style_attribute(
element, 'text-align:%s' % align)
if element.get('background'):
@ -400,7 +404,10 @@ def find_style_attributes(tree, presentational_hints=False, base_url=None):
elif element.tag == 'caption':
align = element.get('align', '').lower()
# TODO: we should align descendants too
if align in ('left', 'right', 'justify'):
if align == 'middle':
yield specificity, check_style_attribute(
element, 'text-align:center')
elif align in ('center', 'left', 'right', 'justify'):
yield specificity, check_style_attribute(
element, 'text-align:%s' % align)
elif element.tag == 'col':
@ -732,18 +739,27 @@ def preprocess_stylesheet(device_media_type, base_url, stylesheet_rules,
declarations = list(preprocess_declarations(
base_url, tinycss2.parse_declaration_list(rule.content)))
if declarations:
logger_level = WARNING
try:
selectors = cssselect2.compile_selector_list(rule.prelude)
for selector in selectors:
matcher.add_selector(selector, declarations)
if selector.pseudo_element not in PSEUDO_ELEMENTS:
raise cssselect2.SelectorError(
'Unknown pseudo-element: %s'
% selector.pseudo_element)
if selector.pseudo_element.startswith('-'):
logger_level = DEBUG
raise cssselect2.SelectorError(
'ignored prefixed pseudo-element: %s'
% selector.pseudo_element)
else:
raise cssselect2.SelectorError(
'unknown pseudo-element: %s'
% selector.pseudo_element)
ignore_imports = True
except cssselect2.SelectorError as exc:
LOGGER.warning("Invalid or unsupported selector '%s', %s",
tinycss2.serialize(rule.prelude), exc)
LOGGER.log(
logger_level,
"Invalid or unsupported selector '%s', %s",
tinycss2.serialize(rule.prelude), exc)
continue
else:
ignore_imports = True

View File

@ -304,7 +304,7 @@ def length(computer, name, value, font_size=None, pixels_only=False):
context=None, font_size=font_size,
style=computer.computed)
layout.set_text('0')
line, = layout.iter_lines()
line, _ = layout.get_first_line()
logical_width, _ = text.get_size(line, computer.computed)
result = value.value * logical_width
elif unit == 'em':
@ -680,9 +680,8 @@ def strut_layout(style, context=None):
if key in context.strut_layouts:
return context.strut_layouts[key]
layout = text.Layout(
context=context, font_size=style['font_size'], style=style)
line, = layout.iter_lines()
layout = text.Layout(context, style['font_size'], style)
line, _ = layout.get_first_line()
_, _, _, _, text_height, baseline = text.first_line_metrics(
line, '', layout, resume_at=None, space_collapse=False, style=style)
if style['line_height'] == 'normal':
@ -705,7 +704,7 @@ def ex_ratio(style):
# TODO: use context to use @font-face fonts
layout = text.Layout(context=None, font_size=font_size, style=style)
layout.set_text('x')
line, = layout.iter_lines()
line, _ = layout.get_first_line()
_, ink_height_above_baseline = text.get_ink_position(line)
# Zero means some kind of failure, fallback is 0.5.
# We round to try keeping exact values that were altered by Pango.

View File

@ -237,6 +237,7 @@ dd { display: block; margin-left: 40px; unicode-bidi: isolate; }
dd[dir=ltr][dir][dir] { margin-left: 0; margin-right: 40px; }
dd[dir=rtl][dir][dir] { margin-left: 40px; margin-right: 0; }
details { display: block; unicode-bidi: isolate; }
del { text-decoration: line-through; }
dfn { font-style: italic; }

View File

@ -4,7 +4,7 @@
Various data about known properties.
:copyright: Copyright 2011-2014 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""

View File

@ -8,7 +8,7 @@
counter_values and stuff needed to build pending targets later,
when the layout of all targetted anchors has been done.
:copyright: Copyright 2018 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""

View File

@ -20,12 +20,11 @@ from ... import LOGGER
# Not applicable to the print media
NOT_PRINT_MEDIA = {
# Aural media:
# Aural media
'azimuth',
'cue',
'cue-after',
'cue-before',
'cursor',
'elevation',
'pause',
'pause-after',
@ -42,6 +41,23 @@ NOT_PRINT_MEDIA = {
'stress',
'voice-family',
'volume',
# Interactive
'cursor',
# Animations and transitions
'animation',
'animation-delay',
'animation-direction',
'animation-duration',
'animation-fill-mode',
'animation-iteration-count',
'animation-name',
'animation-play-state',
'animation-timing-function',
'transition',
'transition-delay',
'transition-duration',
'transition-property',
'transition-timing-function',
}
@ -73,7 +89,7 @@ def preprocess_declarations(base_url, declarations):
if name in NOT_PRINT_MEDIA:
validation_error(
'warning', 'the property does not apply for the print media')
'debug', 'the property does not apply for the print media')
continue
if name.startswith(PREFIX):
@ -99,6 +115,10 @@ def preprocess_declarations(base_url, declarations):
unprefixed_name)
continue
if name.startswith('-'):
validation_error('debug', 'prefixed selectors are ignored')
continue
expander_ = EXPANDERS.get(name, validate_non_shorthand)
tokens = remove_whitespace(declaration.value)
try:

View File

@ -492,7 +492,7 @@ def expand_flex(base_url, name, tokens):
yield 'flex-shrink', 0
yield 'flex-basis', 'auto'
else:
grow, shrink, basis = 0, 1, Dimension(0, 'px')
grow, shrink, basis = 1, 1, Dimension(0, 'px')
grow_found, shrink_found, basis_found = False, False, False
for token in tokens:
# "A unitless zero that is not already preceded by two flex factors

View File

@ -1064,7 +1064,8 @@ def flex_wrap(keyword):
def justify_content(keyword):
"""``justify-content`` property validation."""
return keyword in (
'flex-start', 'flex-end', 'center', 'space-between', 'space-around')
'flex-start', 'flex-end', 'center', 'space-between', 'space-around',
'space-evenly', 'stretch')
@property()
@ -1089,7 +1090,7 @@ def align_content(keyword):
"""``align-content`` property validation."""
return keyword in (
'flex-start', 'flex-end', 'center', 'space-between', 'space-around',
'stretch')
'space-evenly', 'stretch')
@property(unstable=True)

View File

@ -2,7 +2,7 @@
weasyprint.document
-------------------
:copyright: Copyright 2011-2014 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
@ -22,18 +22,20 @@ from .draw import draw_page, stacked
from .fonts import FontConfiguration
from .formatting_structure import boxes
from .formatting_structure.build import build_formatting_structure
from .html import W3C_DATE_RE
from .images import get_image_from_uri as original_get_image_from_uri
from .layout import layout_document
from .layout.backgrounds import percentage
from .logger import LOGGER
from .logger import LOGGER, PROGRESS_LOGGER
from .pdf import write_pdf_metadata
if cairo.cairo_version() < 11504:
warnings.warn(
'There are known rendering problems with cairo < 1.15.4. '
'WeasyPrint may work with older versions, but please read the note '
'about the needed cairo version on the "Install" page of the '
'documentation before reporting bugs.')
'There are known rendering problems and missing features with '
'cairo < 1.15.4. WeasyPrint may work with older versions, but please '
'read the note about the needed cairo version on the "Install" page '
'of the documentation before reporting bugs. '
'http://weasyprint.readthedocs.io/en/latest/install.html')
def _get_matrix(box):
@ -142,6 +144,31 @@ def _gather_links_and_bookmarks(box, bookmarks, links, anchors, matrix):
_gather_links_and_bookmarks(child, bookmarks, links, anchors, matrix)
def _w3c_date_to_iso(string, attr_name):
"""Tranform W3C date to ISO-8601 format."""
if string is None:
return None
match = W3C_DATE_RE.match(string)
if match is None:
LOGGER.warning('Invalid %s date: %r', attr_name, string)
return None
groups = match.groupdict()
iso_date = '%04i-%02i-%02iT%02i:%02i:%02i' % (
int(groups['year']),
int(groups['month'] or 1),
int(groups['day'] or 1),
int(groups['hour'] or 0),
int(groups['minute'] or 0),
int(groups['second'] or 0))
if groups['hour']:
assert groups['minute']
assert groups['tz_hour'].startswith(('+', '-'))
assert groups['tz_minute']
iso_date += '%+03i:%02i' % (
int(groups['tz_hour']), int(groups['tz_minute']))
return iso_date
class Page(object):
"""Represents a single rendered page.
@ -322,7 +349,7 @@ class Document(object):
page_rules, target_collector)
get_image_from_uri = functools.partial(
original_get_image_from_uri, {}, html.url_fetcher)
LOGGER.info('Step 4 - Creating formatting structure')
PROGRESS_LOGGER.info('Step 4 - Creating formatting structure')
root_box = build_formatting_structure(
html.etree_element, style_for, get_image_from_uri,
html.base_url, target_collector)
@ -390,7 +417,8 @@ class Document(object):
"""Resolve internal hyperlinks.
Links to a missing anchor are removed with a warning.
If multiple anchors have the same name, the first is used.
If multiple anchors have the same name, the first one is used.
:returns:
A generator yielding lists (one per page) like :attr:`Page.links`,
@ -400,26 +428,29 @@ class Document(object):
and ``x, y`` are in CSS pixels from the top-left of the page.
"""
anchors = {}
anchors = set()
paged_anchors = []
for i, page in enumerate(self.pages):
paged_anchors.append([])
for anchor_name, (point_x, point_y) in page.anchors.items():
anchors.setdefault(anchor_name, (i, point_x, point_y))
if anchor_name not in anchors:
paged_anchors[-1].append((anchor_name, point_x, point_y))
anchors.add(anchor_name)
for page in self.pages:
page_links = []
for link in page.links:
link_type, anchor_name, rectangle = link
if link_type == 'internal':
target = anchors.get(anchor_name)
if target is None:
if anchor_name not in anchors:
LOGGER.error(
'No anchor #%s for internal URI reference',
anchor_name)
else:
page_links.append((link_type, target, rectangle))
page_links.append((link_type, anchor_name, rectangle))
else:
# External link
page_links.append(link)
yield page_links
yield page_links, paged_anchors.pop(0)
def make_bookmark_tree(self):
"""Make a tree of all bookmarks in the document.
@ -466,6 +497,41 @@ class Document(object):
last_by_depth.append(children)
return root
def add_hyperlinks(self, links, anchors, context, scale):
"""Include hyperlinks in current page."""
if cairo.cairo_version() < 11504:
return
# We round floats to avoid locale problems, see
# https://github.com/Kozea/WeasyPrint/issues/742
# TODO: Instead of using rects, we could use the drawing rectangles
# defined by cairo when drawing targets. This would give a feeling
# similiar to what browsers do with links that span multiple lines.
for link in links:
link_type, link_target, rectangle = link
if link_type == 'external':
attributes = "rect=[{} {} {} {}] uri='{}'".format(*(
[int(round(i * scale)) for i in rectangle] +
[link_target.replace("'", '%27')]))
elif link_type == 'internal':
attributes = "rect=[{} {} {} {}] dest='{}'".format(*(
[int(round(i * scale)) for i in rectangle] +
[link_target.replace("'", '%27')]))
elif link_type == 'attachment':
# Attachments are handled in write_pdf_metadata
continue
context.tag_begin(cairo.TAG_LINK, attributes)
context.tag_end(cairo.TAG_LINK)
for anchor in anchors:
anchor_name, x, y = anchor
attributes = "name='{}' x={} y={}".format(
anchor_name.replace("'", '%27'), int(round(x * scale)),
int(round(y * scale)))
context.tag_begin(cairo.TAG_DEST, attributes)
context.tag_end(cairo.TAG_DEST)
def write_pdf(self, target=None, zoom=1, attachments=None):
"""Paint the pages in a PDF file, with meta-data.
@ -496,8 +562,13 @@ class Document(object):
# (1, 1) is overridden by .set_size() below.
surface = cairo.PDFSurface(file_obj, 1, 1)
context = cairo.Context(surface)
LOGGER.info('Step 6 - Drawing')
for page in self.pages:
PROGRESS_LOGGER.info('Step 6 - Drawing')
paged_links_and_anchors = list(self.resolve_links())
for page, links_and_anchors in zip(
self.pages, paged_links_and_anchors):
links, anchors = links_and_anchors
surface.set_size(
math.floor(scale * (
page.width + page.bleed['left'] + page.bleed['right'])),
@ -507,12 +578,73 @@ class Document(object):
context.translate(
page.bleed['left'] * scale, page.bleed['top'] * scale)
page.paint(context, scale=scale)
self.add_hyperlinks(links, anchors, context, scale)
surface.show_page()
PROGRESS_LOGGER.info('Step 7 - Adding PDF metadata')
# TODO: overwrite producer when possible in cairo
if cairo.cairo_version() >= 11504:
# Set document information
for attr, key in (
('title', cairo.PDF_METADATA_TITLE),
('description', cairo.PDF_METADATA_SUBJECT),
('generator', cairo.PDF_METADATA_CREATOR)):
value = getattr(self.metadata, attr)
if value is not None:
surface.set_metadata(key, value)
for attr, key in (
('authors', cairo.PDF_METADATA_AUTHOR),
('keywords', cairo.PDF_METADATA_KEYWORDS)):
value = getattr(self.metadata, attr)
if value is not None:
surface.set_metadata(key, ', '.join(value))
for attr, key in (
('created', cairo.PDF_METADATA_CREATE_DATE),
('modified', cairo.PDF_METADATA_MOD_DATE)):
value = getattr(self.metadata, attr)
if value is not None:
surface.set_metadata(key, _w3c_date_to_iso(value, attr))
# Set bookmarks
bookmarks = self.make_bookmark_tree()
levels = [cairo.PDF_OUTLINE_ROOT] * len(bookmarks)
while bookmarks:
title, destination, children = bookmarks.pop(0)
page, x, y = destination
# We round floats to avoid locale problems, see
# https://github.com/Kozea/WeasyPrint/issues/742
link_attribs = 'page={} pos=[{} {}]'.format(
page + 1, int(round(x * scale)),
int(round(y * scale)))
outline = surface.add_outline(
levels.pop(), title, link_attribs, 0)
levels.extend([outline] * len(children))
bookmarks = children + bookmarks
surface.finish()
LOGGER.info('Step 7 - Adding PDF metadata')
write_pdf_metadata(self, file_obj, scale, self.metadata, attachments,
self.url_fetcher)
# Add extra PDF metadata: attachments, embedded files
attachment_links = [
[link for link in page_links if link[0] == 'attachment']
for page_links, page_anchors in paged_links_and_anchors]
# Write extra PDF metadata only when there is a least one from:
# - attachments in metadata
# - attachments as function parameters
# - attachments as PDF links
# - bleed boxes
condition = (
self.metadata.attachments or
attachments or
any(attachment_links) or
any(any(page.bleed.values()) for page in self.pages))
if condition:
write_pdf_metadata(
file_obj, scale, self.url_fetcher,
self.metadata.attachments + (attachments or []),
attachment_links, self.pages)
if target is None:
return file_obj.getvalue()
@ -541,7 +673,7 @@ class Document(object):
cairo.FORMAT_ARGB32, max_width, sum_heights)
context = cairo.Context(surface)
pos_y = 0
LOGGER.info('Step 6 - Drawing')
PROGRESS_LOGGER.info('Step 6 - Drawing')
for page, width, height in zip(self.pages, widths, heights):
pos_x = (max_width - width) / 2
page.paint(context, pos_x, pos_y, scale=dppx, clip=True)

View File

@ -4,7 +4,7 @@
Take an "after layout" box tree and draw it onto a cairo context.
:copyright: Copyright 2011-2014 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
@ -1015,7 +1015,10 @@ def draw_text(context, textbox, enable_hinting):
context.move_to(textbox.position_x, textbox.position_y + textbox.baseline)
context.set_source_rgba(*textbox.style['color'])
show_first_line(context, textbox.pango_layout, enable_hinting)
textbox.pango_layout.reactivate(textbox.style)
show_first_line(context, textbox)
values = textbox.style['text_decoration']
thickness = textbox.style['font_size'] / 18 # Like other browsers do
@ -1042,6 +1045,8 @@ def draw_text(context, textbox, enable_hinting):
textbox.baseline - metrics.strikethrough_position,
thickness, enable_hinting)
textbox.pango_layout.deactivate()
def draw_text_decoration(context, textbox, offset_y, thickness,
enable_hinting):

View File

@ -4,12 +4,13 @@
Interface with external libraries managing fonts installed on the system.
:copyright: Copyright 2011-2016 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
import os
import pathlib
import sys
import tempfile
import warnings
@ -17,12 +18,14 @@ import warnings
from .logger import LOGGER
from .text import (
cairo, dlopen, ffi, get_font_features, gobject, pango, pangocairo)
from .urls import fetch
from .urls import FILESYSTEM_ENCODING, fetch
# Cairo crashes with font-size: 0 when using Win32 API
# See https://github.com/Kozea/WeasyPrint/pull/599
# Probably it will crash on macOS, too, when native font rendering is used,
# Set to True on startup when fontconfig is inoperable.
# Used by text/Layout() to mask font-size: 0 with a font_size of 1.
# TODO: Should we set it to true on Windows and macOS if Pango < 13800?
ZERO_FONTSIZE_CRASHES_CAIRO = False
@ -161,46 +164,80 @@ else:
def _check_font_configuration(font_config, warn=False):
"""Check whether the given font_config has fonts.
The default fontconfig configuration file may be missing, particularly
on Windows, giving "Fontconfig error: Cannot load default config file".
The default fontconfig configuration file may be missing (particularly
on Windows or macOS, where installation of fontconfig isn't as
standardized as on Liniux), resulting in "Fontconfig error: Cannot load
default config file".
No default config == No fonts.
Fontconfig tries to retrieve the system fonts as fallback, which may or
may not work, especially on macOS, where fonts can be installed at
various loactions.
On Windows (at least since fontconfig 2.13) the fallback seems to work.
No default config && system fonts fallback fails == No fonts.
Config file exists, but doesn't provide fonts == No fonts.
No fonts == expect ugly output.
If you happen to have an html without a valid @font-face all
letters turn into rectangles.
If you happen to have no fonts and an html without a valid @font-face
all letters turn into rectangles.
If you happen to have an html with at least one valid @font-face
all text is styled with that font.
On Windows and macOS we can cause Pango to use native font rendering
instead of rendering fonts with FreeType. But then we must do without
@font-face. Expect other missing features and ugly output.
"""
# Nobody ever complained about such a situation on Unix-like OSes.
if not sys.platform.startswith('win'):
# On Linux we can do nothing but give warnings.
has_native_mode = (
sys.platform.startswith('win') or
sys.platform.startswith('darwin'))
if not has_native_mode and not warn:
return True
# Having fonts means: fontconfig's config file returns fonts or
# fontconfig managed to retrieve system fallback-fonts. On Windows the
# fallback stragegy seems to work since fontconfig >= 2.13
fonts = fontconfig.FcConfigGetFonts(
font_config, fontconfig.FcSetSystem)
# Of course, with nfont == 1 the user wont be happy, too...
if fonts.nfont > 0:
return True
if warn:
config_files = fontconfig.FcConfigGetConfigFiles(font_config)
config_file = fontconfig.FcStrListNext(config_files)
if config_file == ffi.NULL:
warnings.warn(
'@font-face not supported: '
'Cannot load default config file')
# whats the reason for zero fonts?
config_files = fontconfig.FcConfigGetConfigFiles(font_config)
config_file = fontconfig.FcStrListNext(config_files)
if config_file == ffi.NULL:
# no config file, no system fonts found
# on Windows and macOS it might help to fall back to native font
# rendering
if has_native_mode:
if warn:
warnings.warn(
'@font-face not supported: '
'FontConfig cannot load default config file')
return False
else:
warnings.warn('@font-face not supported: no fonts configured')
if warn:
warnings.warn(
'FontConfig cannot load default config file.'
'Expect ugly output.')
return True
else:
# useless config file or indeed no fonts
if warn:
warnings.warn(
'FontConfig: No fonts configured. '
'Expect ugly output.')
return True
# TODO: on Windows we could try to add the system fonts like that:
# fontdir = os.path.join(os.environ['WINDIR'], 'Fonts')
# fontconfig.FcConfigAppFontAddDir(
# font_config,
# # not shure which encoding fontconfig expects
# # not sure which encoding fontconfig expects
# fontdir.encode('mbcs'))
# Fall back to default @font-face-less behaviour.
return False
class FontConfiguration(FontConfiguration):
def __init__(self):
"""Create a FT2 font configuration.
@ -226,6 +263,7 @@ else:
fontconfig.FcConfigDestroy(self._fontconfig_config)
else:
self.font_map = None
# On Windows the font tempfiles cannot be deleted,
# putting them in a subfolder made my life easier.
self._tempdir = None
@ -267,6 +305,13 @@ else:
result = ffi.new('FcResult *')
matching_pattern = fontconfig.FcFontMatch(
config, pattern, result)
# prevent RuntimeError, see issue #677
if matching_pattern == ffi.NULL:
LOGGER.debug(
'Failed to get matching local font for "%s"',
font_name.decode('utf-8'))
continue
# TODO: do many fonts have multiple family values?
fontconfig.FcPatternGetString(
matching_pattern, b'fullname', 0, family)
@ -281,19 +326,11 @@ else:
config, pattern, result)
fontconfig.FcPatternGetString(
matching_pattern, b'file', 0, filename)
# Can't use urlopen('file://...') on Windows.
# Fails with
# URLError: <urlopen error file on local host>
if sys.platform.startswith('win'):
fetch_as_url = False
url = ffi.string(filename[0]).decode(
sys.getfilesystemencoding())
else:
url = (
'file://' +
ffi.string(filename[0]).decode('utf-8'))
path = ffi.string(filename[0]).decode(
FILESYSTEM_ENCODING)
url = pathlib.Path(path).as_uri()
else:
LOGGER.warning(
LOGGER.debug(
'Failed to load local font "%s"',
font_name.decode('utf-8'))
continue
@ -308,7 +345,7 @@ else:
with open(url, 'rb') as fd:
font = fd.read()
except Exception as exc:
LOGGER.error(
LOGGER.debug(
'Failed to load font at "%s" (%s)', url, exc)
continue
font_features = {
@ -380,7 +417,7 @@ else:
# Though it seems to work without...
return filename
else:
LOGGER.error('Failed to load font at "%s"', url)
LOGGER.debug('Failed to load font at "%s"', url)
LOGGER.warning(
'Font-face "%s" cannot be loaded',
rule_descriptors['font_family'])

View File

@ -6,7 +6,7 @@
close to the element tree is it built from, or "after layout", with
line breaks and page breaks.
:copyright: Copyright 2011-2014 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""

View File

@ -51,7 +51,7 @@
See respective docstrings for details.
:copyright: Copyright 2011-2014 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""

View File

@ -8,7 +8,7 @@
This includes creating anonymous boxes and processing whitespace
as necessary.
:copyright: Copyright 2011-2014 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
@ -22,6 +22,7 @@ import tinycss2.color3
from . import boxes, counters
from .. import html
from ..css import properties
from ..logger import LOGGER
# Maps values of the ``display`` CSS property to box types.
BOX_TYPE_FROM_DISPLAY = {
@ -321,9 +322,15 @@ def compute_content_list(content_list, parent_box, counter_values, css_token,
texts.append(separator.join(
counters.format(counter_value, counter_style)
for counter_value in counter_values.get(counter_name, [0])))
elif type_ == 'string()' and in_page_context:
# string() is only valid in @page context
texts.append(context.get_string_set_for(page, *value))
elif type_ == 'string()':
if in_page_context:
texts.append(context.get_string_set_for(page, *value))
else:
# string() is currently only valid in @page context
# See https://github.com/Kozea/WeasyPrint/issues/723
LOGGER.warn(
'"string(%s)" is only allowed in page margins' %
(' '.join(value)))
elif type_ == 'target-counter()':
anchor_token, counter_name, counter_style = value
lookup_target = target_collector.lookup_target(
@ -406,7 +413,8 @@ def compute_content_list(content_list, parent_box, counter_values, css_token,
target_collector.collect_missing_counters(
parent_box, css_token, parse_again, missing_counters,
missing_target_counters)
return boxlist
return boxlist if (texts or boxlist) else None
def content_to_boxes(style, parent_box, quote_depth, counter_values,
@ -431,17 +439,26 @@ def content_to_boxes(style, parent_box, quote_depth, counter_values,
local_children.extend(content_to_boxes(
style, parent_box, orig_quote_depth, local_counters,
get_image_from_uri, target_collector))
parent_box.children = local_children
# TODO: redo the formatting structure of the parent instead of hacking
# the already formatted structure. Find why inline_in_blocks has
# sometimes already been called, and sometimes not.
if (len(parent_box.children) == 1 and
isinstance(parent_box.children[0], boxes.LineBox)):
parent_box.children[0].children = local_children
else:
parent_box.children = local_children
if style['content'] == 'inhibit':
return []
orig_quote_depth = quote_depth[:]
css_token = 'content'
return compute_content_list(
box_list = compute_content_list(
style['content'], parent_box, counter_values, css_token, parse_again,
target_collector, get_image_from_uri, quote_depth, style['quotes'],
context, page)
return box_list or []
def compute_string_set(element, box, string_name, content_list,
@ -466,7 +483,7 @@ def compute_string_set(element, box, string_name, content_list,
box_list = compute_content_list(
content_list, box, counter_values, css_token, parse_again,
target_collector, element=element)
if box_list:
if box_list is not None:
string = ''.join(
box.text for box in box_list if isinstance(box, boxes.TextBox))
# Avoid duplicates, care for parse_again and missing counters, don't
@ -498,8 +515,12 @@ def compute_bookmark_label(element, box, content_list, counter_values,
box_list = compute_content_list(
content_list, box, counter_values, css_token, parse_again,
target_collector, element=element)
box.bookmark_label = ''.join(
box.text for box in box_list if isinstance(box, boxes.TextBox))
if box_list is None:
box.bookmark_label = ''
else:
box.bookmark_label = ''.join(
box.text for box in box_list if isinstance(box, boxes.TextBox))
def set_content_lists(element, box, style, counter_values, target_collector):
@ -608,7 +629,7 @@ def add_box_marker(box, counter_values, get_image_from_uri):
box.outside_list_marker = marker_box
def is_whitespace(box, _has_non_whitespace=re.compile('\S').search):
def is_whitespace(box, _has_non_whitespace=re.compile('\\S').search):
"""Return True if ``box`` is a TextBox with only whitespace."""
return isinstance(box, boxes.TextBox) and not _has_non_whitespace(box.text)

View File

@ -7,7 +7,7 @@
These are defined in the same terms as CSS 3 Lists:
http://dev.w3.org/csswg/css3-lists/#predefined-counters
:copyright: Copyright 2011-2014 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""

View File

@ -9,7 +9,7 @@
have intrinsic dimensions. But the only replaced elements currently
supported in WeasyPrint are images with intrinsic dimensions.
:copyright: Copyright 2011-2014 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
@ -17,10 +17,9 @@
import logging
import os.path
import re
import sys
from urllib.parse import urljoin
from . import CSS
from . import CSS, ROOT
from .css import get_child_text
from .formatting_structure import boxes
from .logger import LOGGER
@ -30,19 +29,8 @@ from .urls import get_url_attribute
level = LOGGER.level
LOGGER.setLevel(logging.ERROR)
if hasattr(sys, 'frozen'):
if hasattr(sys, '_MEIPASS'):
# Frozen with PyInstaller
# See https://github.com/Kozea/WeasyPrint/pull/540
root = sys._MEIPASS
else:
# Frozen with something else (py2exe, etc.)
# See https://github.com/Kozea/WeasyPrint/pull/269
root = os.path.dirname(sys.executable)
else:
root = os.path.dirname(__file__)
HTML5_UA_STYLESHEET = CSS(filename=os.path.join(root, 'css', 'html5_ua.css'))
HTML5_PH_STYLESHEET = CSS(filename=os.path.join(root, 'css', 'html5_ph.css'))
HTML5_UA_STYLESHEET = CSS(filename=os.path.join(ROOT, 'css', 'html5_ua.css'))
HTML5_PH_STYLESHEET = CSS(filename=os.path.join(ROOT, 'css', 'html5_ph.css'))
LOGGER.setLevel(level)
@ -124,7 +112,12 @@ def make_replaced_box(element, box, image):
else:
# TODO: support images with 'display: table-cell'?
type_ = boxes.InlineReplacedBox
return type_(element.tag, box.style, image)
new_box = type_(element.tag, box.style, image)
# TODO: check other attributes that need to be copied
# TODO: find another solution
new_box.string_set = box.string_set
new_box.bookmark_label = box.bookmark_label
return new_box
@handler('img')
@ -347,22 +340,22 @@ def strip_whitespace(string):
W3C_DATE_RE = re.compile('''
^
[ \t\n\f\r]*
(?P<year>\d\d\d\d)
(?P<year>\\d\\d\\d\\d)
(?:
-(?P<month>0\d|1[012])
-(?P<month>0\\d|1[012])
(?:
-(?P<day>[012]\d|3[01])
-(?P<day>[012]\\d|3[01])
(?:
T(?P<hour>[01]\d|2[0-3])
:(?P<minute>[0-5]\d)
T(?P<hour>[01]\\d|2[0-3])
:(?P<minute>[0-5]\\d)
(?:
:(?P<second>[0-5]\d)
(?:\.\d+)? # Second fraction, ignored
:(?P<second>[0-5]\\d)
(?:\\.\\d+)? # Second fraction, ignored
)?
(?:
Z | # UTC
(?P<tz_hour>[+-](?:[01]\d|2[0-3]))
:(?P<tz_minute>[0-5]\d)
(?P<tz_hour>[+-](?:[01]\\d|2[0-3]))
:(?P<tz_minute>[0-5]\\d)
)
)?
)?

View File

@ -4,7 +4,7 @@
Fetch and decode images in various formats.
:copyright: Copyright 2011-2014 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""

View File

@ -13,7 +13,7 @@
See http://www.w3.org/TR/CSS21/cascade.html#used-value
:copyright: Copyright 2011-2014 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
@ -24,7 +24,7 @@ from .absolute import absolute_box_layout
from .pages import make_all_pages, make_margin_boxes
from .backgrounds import layout_backgrounds
from ..formatting_structure import boxes
from ..logger import LOGGER
from ..logger import PROGRESS_LOGGER
def initialize_page_maker(context, root_box):
@ -118,7 +118,8 @@ def layout_document(enable_hinting, style_for, get_image_from_uri, root_box,
for loop in range(max_loops):
if loop > 0:
LOGGER.info('Step 5 - Creating layout - Repagination #%i' % loop)
PROGRESS_LOGGER.info(
'Step 5 - Creating layout - Repagination #%i' % loop)
initial_total_pages = actual_total_pages
pages = list(make_all_pages(context, root_box, html, pages, style_for))

View File

@ -2,7 +2,7 @@
weasyprint.absolute
-------------------
:copyright: Copyright 2011-2014 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""

View File

@ -2,7 +2,7 @@
weasyprint.backgrounds
----------------------
:copyright: Copyright 2011-2014 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""

View File

@ -4,15 +4,14 @@
Page breaking and layout for block-level and block-container boxes.
:copyright: Copyright 2011-2014 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
from math import floor
from ..formatting_structure import boxes
from .absolute import AbsolutePlaceholder, absolute_layout
from .columns import columns_layout
from .flex import flex_layout
from .float import avoid_collisions, float_layout, get_clearance
from .inlines import (
@ -34,26 +33,38 @@ def block_level_layout(context, box, max_position_y, skip_stack,
content box of the current page area.
"""
if not isinstance(box, boxes.TableBox):
resolve_percentages(box, containing_block)
if box.margin_top == 'auto':
box.margin_top = 0
if box.margin_bottom == 'auto':
box.margin_bottom = 0
collapsed_margin = collapse_margin(
adjoining_margins + [box.margin_top])
box.clearance = get_clearance(context, box, collapsed_margin)
if box.clearance is not None:
top_border_edge = box.position_y + collapsed_margin + box.clearance
box.position_y = top_border_edge - box.margin_top
adjoining_margins = []
return block_level_layout_switch(
context, box, max_position_y, skip_stack, containing_block,
device_size, page_is_empty, absolute_boxes, fixed_boxes,
adjoining_margins)
def block_level_layout_switch(context, box, max_position_y, skip_stack,
containing_block, device_size, page_is_empty,
absolute_boxes, fixed_boxes,
adjoining_margins):
"""Call the layout function corresponding to the ``box`` type."""
if isinstance(box, boxes.TableBox):
return table_layout(
context, box, max_position_y, skip_stack, containing_block,
device_size, page_is_empty, absolute_boxes, fixed_boxes)
resolve_percentages(box, containing_block)
if box.margin_top == 'auto':
box.margin_top = 0
if box.margin_bottom == 'auto':
box.margin_bottom = 0
collapsed_margin = collapse_margin(adjoining_margins + [box.margin_top])
box.clearance = get_clearance(context, box, collapsed_margin)
if box.clearance is not None:
top_border_edge = box.position_y + collapsed_margin + box.clearance
box.position_y = top_border_edge - box.margin_top
adjoining_margins = []
if isinstance(box, boxes.BlockBox):
elif isinstance(box, boxes.BlockBox):
return block_box_layout(
context, box, max_position_y, skip_stack, containing_block,
device_size, page_is_empty, absolute_boxes, fixed_boxes,
@ -123,210 +134,6 @@ def block_box_layout(context, box, max_position_y, skip_stack,
return new_box, resume_at, next_page, adjoining_margins, collapsing_through
def columns_layout(context, box, max_position_y, skip_stack, containing_block,
device_size, page_is_empty, absolute_boxes, fixed_boxes,
adjoining_margins):
"""Lay out a multi-column ``box``."""
# Implementation of the multi-column pseudo-algorithm:
# https://www.w3.org/TR/css3-multicol/#pseudo-algorithm
count = None
width = None
style = box.style
if box.style['position'] == 'relative':
# New containing block, use a new absolute list
absolute_boxes = []
box = box.copy_with_children(box.children)
height = box.style['height']
if height != 'auto' and height.unit != '%':
assert height.unit == 'px'
known_height = True
max_position_y = min(
max_position_y, box.content_box_y() + height.value)
else:
known_height = False
# TODO: the available width can be unknown if the containing block needs
# the size of this block to know its own size.
block_level_width(box, containing_block)
available_width = box.width
if count is None:
if style['column_width'] == 'auto' and style['column_count'] != 'auto':
count = style['column_count']
width = max(
0, available_width - (count - 1) * style['column_gap']) / count
elif (style['column_width'] != 'auto' and
style['column_count'] == 'auto'):
count = max(1, int(floor(
(available_width + style['column_gap']) /
(style['column_width'] + style['column_gap']))))
width = (
(available_width + style['column_gap']) / count -
style['column_gap'])
else:
count = min(style['column_count'], int(floor(
(available_width + style['column_gap']) /
(style['column_width'] + style['column_gap']))))
width = (
(available_width + style['column_gap']) / count -
style['column_gap'])
def create_column_box():
column_box = box.anonymous_from(box, children=[
child.copy() for child in box.children])
resolve_percentages(column_box, containing_block)
column_box.is_column = True
column_box.width = width
column_box.position_x = box.content_box_x()
column_box.position_y = box.content_box_y()
return column_box
def column_descendants(box):
# TODO: this filtering condition is probably wrong
if isinstance(box, (boxes.TableBox, boxes.LineBox, boxes.ReplacedBox)):
yield box
if hasattr(box, 'descendants') and box.is_in_normal_flow():
for child in box.children:
if child.is_in_normal_flow():
yield child
for grand_child in column_descendants(child):
yield grand_child
# Balance.
#
# The current algorithm starts from the ideal height (the total height
# divided by the number of columns). We then iterate until the last column
# is not the highest one. At the end of each loop, we add the minimal
# height needed to make one direct child at the top of one column go to the
# end of the previous column.
#
# We must probably rely on a real rendering for each loop, but with a
# stupid algorithm like this it can last minutes.
#
# TODO: Rewrite this!
# - We assume that the children are normal lines or blocks.
# - We ignore the forced and avoided column breaks.
# Find the total height of the content
original_max_position_y = max_position_y
column_box = create_column_box()
new_child, _, _, _, _ = block_box_layout(
context, column_box, float('inf'), skip_stack, containing_block,
device_size, page_is_empty, [], [], [])
height = new_child.margin_height()
if style['column_fill'] == 'balance':
height /= count
box_column_descendants = list(column_descendants(new_child))
# Increase the column height step by step.
while True:
# For each step, we try to find the empty height needed to make the top
# element of column i+1 fit at the end of column i. We put this needed
# space in lost_spaces.
lost_spaces = []
column_number = 0
column_first_child = True
column_top = new_child.content_box_y()
for child in box_column_descendants:
child_height = child.margin_height()
child_bottom = child.position_y + child_height - column_top
if child_bottom > height:
# The child goes lower than the column height.
if column_number < count - 1:
# We're not in the last column.
if column_first_child:
# It's the first child of the column and we're already
# below the bottom of the column. The column's height
# has to be at least the size of the child. Let's put
# the height difference into lost_spaces and continue
# the while loop.
lost_spaces = [child_bottom - height]
break
# Put the child at the top of the next column and put the
# extra empty space that would have allowed this child to
# fit into lost_spaces.
lost_spaces.append(child_bottom - height)
column_number += 1
column_first_child = True
column_top = child.position_y
else:
# We're in the last column, there's no place left to put
# that child. We need to go for another round of the while
# loop.
break
column_first_child = False
else:
# We've seen all the children and they all fit in their
# columns. Balanced height has been found, quit the while loop.
break
height += min(lost_spaces)
# TODO: check box.style['max']-height
max_position_y = min(max_position_y, box.content_box_y() + height)
# Replace the current box children with columns
children = []
if box.children:
i = 0
while True:
if i == count - 1:
max_position_y = original_max_position_y
column_box = create_column_box()
if style['direction'] == 'rtl':
column_box.position_x += (
box.width - (i + 1) * width - i * style['column_gap'])
else:
column_box.position_x += i * (width + style['column_gap'])
new_child, column_skip_stack, column_next_page, _, _ = (
block_box_layout(
context, column_box, max_position_y, skip_stack,
containing_block, device_size, page_is_empty,
absolute_boxes, fixed_boxes, None))
if new_child is None:
break
next_page = column_next_page
skip_stack = column_skip_stack
children.append(new_child)
if skip_stack is None:
break
i += 1
if i == count and not known_height:
# [If] a declaration that constrains the column height (e.g.,
# using height or max-height). In this case, additional column
# boxes are created in the inline direction.
break
else:
next_page = {'break': 'any', 'page': None}
skip_stack = None
if box.children and not children:
# The box has children but none can be drawn, let's skip the whole box
return None, (0, None), {'break': 'any', 'page': None}, [0], False
# Set the height of box and the columns
box.children = children
if box.children:
heights = [child.margin_height() for child in box.children]
if box.height != 'auto':
heights.append(box.height)
if box.min_height != 'auto':
heights.append(box.min_height)
box.height = max(heights)
for child in box.children:
child.height = box.margin_height()
else:
box.height = 0
if box.style['position'] == 'relative':
# New containing block, resolve the layout of the absolute descendants
for absolute_box in absolute_boxes:
absolute_layout(context, absolute_box, box, fixed_boxes)
return box, skip_stack, next_page, [0], False
@handle_min_max_width
def block_replaced_width(box, containing_block, device_size):
# http://www.w3.org/TR/CSS21/visudet.html#block-replaced-width
@ -470,7 +277,7 @@ def block_container_layout(context, box, max_position_y, skip_stack,
this_box_adjoining_margins = adjoining_margins
collapsing_with_children = not (
box.border_top_width or box.padding_top or
box.border_top_width or box.padding_top or box.is_flex_item or
establishes_formatting_context(box) or box.is_for_root_element)
if collapsing_with_children:
# XXX not counting margins in adjoining_margins, if any
@ -673,13 +480,17 @@ def block_container_layout(context, box, max_position_y, skip_stack,
position_y += collapsed_margin
adjoining_margins = []
page_is_empty_with_no_children = page_is_empty and not any(
child for child in new_children
if not isinstance(child, AbsolutePlaceholder))
if not getattr(child, 'first_letter_style', None):
child.first_letter_style = first_letter_style
(new_child, resume_at, next_page, next_adjoining_margins,
collapsing_through) = block_level_layout(
context, child, max_position_y, skip_stack,
new_containing_block, device_size,
page_is_empty and not new_children,
page_is_empty_with_no_children,
absolute_boxes, fixed_boxes,
adjoining_margins)
skip_stack = None
@ -709,7 +520,7 @@ def block_container_layout(context, box, max_position_y, skip_stack,
new_child.border_box_y() + new_child.border_height())
if (new_position_y > max_position_y and
(new_children or not page_is_empty)):
not page_is_empty_with_no_children):
# The child overflows the page area, put it on the
# next page. (But dont delay whole blocks if eg.
# only the bottom border overflows.)

View File

@ -0,0 +1,223 @@
"""
weasyprint.layout.columns
-------------------------
Layout for columns.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
from math import floor
from ..formatting_structure import boxes
from .absolute import absolute_layout
from .percentages import resolve_percentages
def columns_layout(context, box, max_position_y, skip_stack, containing_block,
device_size, page_is_empty, absolute_boxes, fixed_boxes,
adjoining_margins):
"""Lay out a multi-column ``box``."""
# Avoid circular imports
from .blocks import block_box_layout, block_level_width
# Implementation of the multi-column pseudo-algorithm:
# https://www.w3.org/TR/css3-multicol/#pseudo-algorithm
count = None
width = None
style = box.style
if box.style['position'] == 'relative':
# New containing block, use a new absolute list
absolute_boxes = []
box = box.copy_with_children(box.children)
height = box.style['height']
if height != 'auto' and height.unit != '%':
assert height.unit == 'px'
known_height = True
max_position_y = min(
max_position_y, box.content_box_y() + height.value)
else:
known_height = False
# TODO: the available width can be unknown if the containing block needs
# the size of this block to know its own size.
block_level_width(box, containing_block)
available_width = box.width
if count is None:
if style['column_width'] == 'auto' and style['column_count'] != 'auto':
count = style['column_count']
width = max(
0, available_width - (count - 1) * style['column_gap']) / count
elif (style['column_width'] != 'auto' and
style['column_count'] == 'auto'):
count = max(1, int(floor(
(available_width + style['column_gap']) /
(style['column_width'] + style['column_gap']))))
width = (
(available_width + style['column_gap']) / count -
style['column_gap'])
else:
count = min(style['column_count'], int(floor(
(available_width + style['column_gap']) /
(style['column_width'] + style['column_gap']))))
width = (
(available_width + style['column_gap']) / count -
style['column_gap'])
def create_column_box():
column_box = box.anonymous_from(box, children=[
child.copy() for child in box.children])
resolve_percentages(column_box, containing_block)
column_box.is_column = True
column_box.width = width
column_box.position_x = box.content_box_x()
column_box.position_y = box.content_box_y()
return column_box
def column_descendants(box):
# TODO: this filtering condition is probably wrong
if isinstance(box, (boxes.TableBox, boxes.LineBox, boxes.ReplacedBox)):
yield box
if hasattr(box, 'descendants') and box.is_in_normal_flow():
for child in box.children:
if child.is_in_normal_flow():
yield child
for grand_child in column_descendants(child):
yield grand_child
# Balance.
#
# The current algorithm starts from the ideal height (the total height
# divided by the number of columns). We then iterate until the last column
# is not the highest one. At the end of each loop, we add the minimal
# height needed to make one direct child at the top of one column go to the
# end of the previous column.
#
# We must probably rely on a real rendering for each loop, but with a
# stupid algorithm like this it can last minutes.
#
# TODO: Rewrite this!
# - We assume that the children are normal lines or blocks.
# - We ignore the forced and avoided column breaks.
# Find the total height of the content
original_max_position_y = max_position_y
column_box = create_column_box()
new_child, _, _, _, _ = block_box_layout(
context, column_box, float('inf'), skip_stack, containing_block,
device_size, page_is_empty, [], [], [])
height = new_child.margin_height()
if style['column_fill'] == 'balance':
height /= count
box_column_descendants = list(column_descendants(new_child))
# Increase the column height step by step.
while True:
# For each step, we try to find the empty height needed to make the top
# element of column i+1 fit at the end of column i. We put this needed
# space in lost_spaces.
lost_spaces = []
column_number = 0
column_first_child = True
column_top = new_child.content_box_y()
for child in box_column_descendants:
child_height = child.margin_height()
child_bottom = child.position_y + child_height - column_top
if child_bottom > height:
# The child goes lower than the column height.
if column_number < count - 1:
# We're not in the last column.
if column_first_child:
# It's the first child of the column and we're already
# below the bottom of the column. The column's height
# has to be at least the size of the child. Let's put
# the height difference into lost_spaces and continue
# the while loop.
lost_spaces = [child_bottom - height]
break
# Put the child at the top of the next column and put the
# extra empty space that would have allowed this child to
# fit into lost_spaces.
lost_spaces.append(child_bottom - height)
column_number += 1
column_first_child = True
column_top = child.position_y
else:
# We're in the last column, there's no place left to put
# that child. We need to go for another round of the while
# loop.
break
column_first_child = False
else:
# We've seen all the children and they all fit in their
# columns. Balanced height has been found, quit the while loop.
break
height += min(lost_spaces)
# TODO: check box.style['max']-height
max_position_y = min(max_position_y, box.content_box_y() + height)
# Replace the current box children with columns
children = []
if box.children:
i = 0
while True:
if i == count - 1:
max_position_y = original_max_position_y
column_box = create_column_box()
if style['direction'] == 'rtl':
column_box.position_x += (
box.width - (i + 1) * width - i * style['column_gap'])
else:
column_box.position_x += i * (width + style['column_gap'])
new_child, column_skip_stack, column_next_page, _, _ = (
block_box_layout(
context, column_box, max_position_y, skip_stack,
containing_block, device_size, page_is_empty,
absolute_boxes, fixed_boxes, None))
if new_child is None:
break
next_page = column_next_page
skip_stack = column_skip_stack
children.append(new_child)
if skip_stack is None:
break
i += 1
if i == count and not known_height:
# [If] a declaration that constrains the column height (e.g.,
# using height or max-height). In this case, additional column
# boxes are created in the inline direction.
break
else:
next_page = {'break': 'any', 'page': None}
skip_stack = None
if box.children and not children:
# The box has children but none can be drawn, let's skip the whole box
return None, (0, None), {'break': 'any', 'page': None}, [0], False
# Set the height of box and the columns
box.children = children
if box.children:
heights = [child.margin_height() for child in box.children]
if box.height != 'auto':
heights.append(box.height)
if box.min_height != 'auto':
heights.append(box.min_height)
box.height = max(heights)
for child in box.children:
child.height = box.margin_height()
else:
box.height = 0
if box.style['position'] == 'relative':
# New containing block, resolve the layout of the absolute descendants
for absolute_box in absolute_boxes:
absolute_layout(context, absolute_box, box, fixed_boxes)
return box, skip_stack, next_page, [0], False

View File

@ -4,7 +4,7 @@
Layout for flex containers and flex-items.
:copyright: Copyright 2017-2018 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
@ -86,12 +86,19 @@ def flex_layout(context, box, max_position_y, skip_stack, containing_block,
box.border_left_width - box.border_right_width)
# Step 3
resolve_percentages(box, containing_block)
if isinstance(box, boxes.FlexBox):
blocks.block_level_width(box, containing_block)
else:
box.width = preferred.flex_max_content_width(context, box)
children = box.children
resolve_percentages(box, containing_block)
if box.margin_top == 'auto':
box.margin_top = 0
if box.margin_bottom == 'auto':
box.margin_bottom = 0
parent_box = box.copy_with_children(children)
if isinstance(parent_box, boxes.FlexBox):
blocks.block_level_width(parent_box, containing_block)
else:
parent_box.width = preferred.flex_max_content_width(
context, parent_box)
original_skip_stack = skip_stack
if skip_stack is not None:
children = children[skip_stack[0]:]
skip_stack = skip_stack[1]
@ -108,8 +115,8 @@ def flex_layout(context, box, max_position_y, skip_stack, containing_block,
else:
main_flex_direction = None
resolve_percentages(child, (0, 0), main_flex_direction)
child.position_x = box.content_box_x()
child.position_y = box.content_box_y()
child.position_x = parent_box.content_box_x()
child.position_y = parent_box.content_box_y()
if child.min_width == 'auto':
specified_size = (
child.width if child.width != 'auto' else float('inf'))
@ -131,15 +138,18 @@ def flex_layout(context, box, max_position_y, skip_stack, containing_block,
new_child.style['max_height'] = Dimension(float('inf'), 'px')
new_child = blocks.block_level_layout(
context, new_child, float('inf'), child_skip_stack,
box, device_size, page_is_empty, absolute_boxes,
fixed_boxes, adjoining_margins=[])[0]
parent_box, device_size, page_is_empty, absolute_boxes=[],
fixed_boxes=[], adjoining_margins=[])[0]
content_size = new_child.height
child.min_height = min(specified_size, content_size)
child.style = child.style.copy()
resolve_one_percentage(child, 'flex_basis', available_main_space)
flex_basis = child.flex_basis
if child.style['flex_basis'] == 'content':
flex_basis = child.flex_basis = 'content'
else:
resolve_one_percentage(child, 'flex_basis', available_main_space)
flex_basis = child.flex_basis
# "If a value would resolve to auto for width, it instead resolves
# to content for flex-basis." Let's do this for height too.
@ -185,7 +195,7 @@ def flex_layout(context, box, max_position_y, skip_stack, containing_block,
new_child.width = float('inf')
new_child = blocks.block_level_layout(
context, new_child, float('inf'), child_skip_stack,
box, device_size, page_is_empty, absolute_boxes,
parent_box, device_size, page_is_empty, absolute_boxes,
fixed_boxes, adjoining_margins=[])[0]
child.flex_base_size = new_child.margin_height()
elif child.style[axis] == 'min-content':
@ -197,7 +207,7 @@ def flex_layout(context, box, max_position_y, skip_stack, containing_block,
new_child.width = 0
new_child = blocks.block_level_layout(
context, new_child, float('inf'), child_skip_stack,
box, device_size, page_is_empty, absolute_boxes,
parent_box, device_size, page_is_empty, absolute_boxes,
fixed_boxes, adjoining_margins=[])[0]
child.flex_base_size = new_child.margin_height()
else:
@ -213,6 +223,7 @@ def flex_layout(context, box, max_position_y, skip_stack, containing_block,
child_skip_stack = None
# Step 4
# TODO: the whole step has to be fixed
if axis == 'width':
blocks.block_level_width(box, containing_block)
else:
@ -221,11 +232,14 @@ def flex_layout(context, box, max_position_y, skip_stack, containing_block,
else:
box.height = 0
for i, child in enumerate(children):
if not child.is_flex_item:
continue
child_height = (
child.hypothetical_main_size +
child.border_top_width + child.border_bottom_width +
child.padding_top + child.padding_bottom)
if child_height + box.height > main_space:
if getattr(box, axis) == 'auto' and (
child_height + box.height > available_main_space):
resume_at = (i, None)
children = children[:i + 1]
break
@ -237,22 +251,23 @@ def flex_layout(context, box, max_position_y, skip_stack, containing_block,
line = []
line_size = 0
axis_size = getattr(box, axis)
for child in sorted(children, key=lambda item: item.style['order']):
for i, child in enumerate(
sorted(children, key=lambda item: item.style['order'])):
if not child.is_flex_item:
continue
line_size += child.hypothetical_main_size
if box.style['flex_wrap'] != 'nowrap' and line_size > axis_size:
if line:
flex_lines.append(FlexLine(line))
line = [child]
line = [(i, child)]
line_size = child.hypothetical_main_size
else:
line.append(child)
line.append((i, child))
flex_lines.append(FlexLine(line))
line = []
line_size = 0
else:
line.append(child)
line.append((i, child))
if line:
flex_lines.append(FlexLine(line))
@ -268,14 +283,14 @@ def flex_layout(context, box, max_position_y, skip_stack, containing_block,
for line in flex_lines:
# Step 6 - 9.7.1
hypothetical_main_size = sum(
child.hypothetical_main_size for child in line)
child.hypothetical_main_size for i, child in line)
if hypothetical_main_size < available_main_space:
flex_factor_type = 'grow'
else:
flex_factor_type = 'shrink'
# Step 6 - 9.7.2
for child in line:
for i, child in line:
if flex_factor_type == 'grow':
child.flex_factor = child.style['flex_grow']
else:
@ -292,19 +307,19 @@ def flex_layout(context, box, max_position_y, skip_stack, containing_block,
# Step 6 - 9.7.3
initial_free_space = available_main_space
for child in line:
for i, child in line:
if child.frozen:
initial_free_space -= child.target_main_size
else:
initial_free_space -= child.flex_base_size
# Step 6 - 9.7.4
while not all(child.frozen for child in line):
while not all(child.frozen for i, child in line):
unfrozen_factor_sum = 0
remaining_free_space = available_main_space
# Step 6 - 9.7.4.b
for child in line:
for i, child in line:
if child.frozen:
remaining_free_space -= child.target_main_size
else:
@ -332,20 +347,20 @@ def flex_layout(context, box, max_position_y, skip_stack, containing_block,
if remaining_free_space == 0:
# "Do nothing", but we at least set the flex_base_size as
# target_main_size for next step.
for child in line:
for i, child in line:
if not child.frozen:
child.target_main_size = child.flex_base_size
else:
scaled_flex_shrink_factors_sum = 0
flex_grow_factors_sum = 0
for child in line:
for i, child in line:
if not child.frozen:
child.scaled_flex_shrink_factor = (
child.flex_base_size * child.style['flex_shrink'])
scaled_flex_shrink_factors_sum += (
child.scaled_flex_shrink_factor)
flex_grow_factors_sum += child.style['flex_grow']
for child in line:
for i, child in line:
if not child.frozen:
if flex_factor_type == 'grow':
ratio = (
@ -367,15 +382,15 @@ def flex_layout(context, box, max_position_y, skip_stack, containing_block,
# Step 6 - 9.7.4.d
# TODO: First part of this step is useless until 3.E is correct
for child in line:
for i, child in line:
child.adjustment = 0
if not child.frozen and child.target_main_size < 0:
child.adjustment = -child.target_main_size
child.target_main_size = 0
# Step 6 - 9.7.4.e
adjustments = sum(child.adjustment for child in line)
for child in line:
adjustments = sum(child.adjustment for i, child in line)
for i, child in line:
if adjustments == 0:
child.frozen = True
elif adjustments > 0 and child.adjustment > 0:
@ -384,7 +399,7 @@ def flex_layout(context, box, max_position_y, skip_stack, containing_block,
child.frozen = True
# Step 6 - 9.7.5
for child in line:
for i, child in line:
if axis == 'width':
child.width = (
child.target_main_size -
@ -411,36 +426,33 @@ def flex_layout(context, box, max_position_y, skip_stack, containing_block,
child_skip_stack = skip_stack
for line in flex_lines:
new_flex_line = FlexLine()
for child in line:
# TODO: Find another way than calling block_level_layout to get
# baseline and child.height
for i, child in line:
# TODO: Find another way than calling block_level_layout_switch to
# get baseline and child.height
if child.margin_top == 'auto':
child.margin_top = 0
if child.margin_bottom == 'auto':
child.margin_bottom = 0
child_copy = child.copy_with_children(child.children)
if child_copy.margin_top == 'auto':
child_copy.margin_top = 0
if child_copy.margin_bottom == 'auto':
child_copy.margin_bottom = 0
blocks.block_level_width(child_copy, box)
new_child = blocks.block_level_layout(
context, child_copy,
available_cross_space + box.content_box_y(), child_skip_stack,
box, device_size, page_is_empty, absolute_boxes,
fixed_boxes, adjoining_margins=[])[0]
if new_child is None:
# TODO: "If the item does not have a baseline in the
# necessary axis, then one is synthesized from the flex
# items border box."
child._baseline = 0
else:
child._baseline = find_in_flow_baseline(new_child)
blocks.block_level_width(child_copy, parent_box)
new_child, _, _, adjoining_margins, _ = (
blocks.block_level_layout_switch(
context, child_copy, float('inf'), child_skip_stack,
parent_box, device_size, page_is_empty, absolute_boxes,
fixed_boxes, adjoining_margins=[]))
child._baseline = find_in_flow_baseline(new_child)
if cross == 'height':
# TODO: check that
child.height = 0 if new_child is None else new_child.height
child.height = new_child.height
# As flex items margins never collapse (with other flex items
# or with the flex container), we can add the adjoining margins
# to the child bottom margin.
child.margin_bottom += blocks.collapse_margin(
adjoining_margins)
else:
child.width = min_content_width(context, child, outer=False)
new_flex_line.append(child)
new_flex_line.append((i, child))
# Skip stack is only for the first child
child_skip_stack = None
@ -457,7 +469,7 @@ def flex_layout(context, box, max_position_y, skip_stack, containing_block,
for line in flex_lines:
collected_items = []
not_collected_items = []
for child in line:
for i, child in line:
align_self = child.style['align_self']
if (box.style['flex_direction'].startswith('row') and
align_self == 'baseline' and
@ -528,7 +540,7 @@ def flex_layout(context, box, max_position_y, skip_stack, containing_block,
# Step 11
for line in flex_lines:
for child in line:
for i, child in line:
align_self = child.style['align_self']
if align_self == 'auto':
align_self = box.style['align_items']
@ -572,7 +584,7 @@ def flex_layout(context, box, max_position_y, skip_stack, containing_block,
position_axis = original_position_axis
if axis == 'width':
free_space = box.width
for child in line:
for i, child in line:
free_space -= child.border_width()
if child.margin_left != 'auto':
free_space -= child.margin_left
@ -580,7 +592,7 @@ def flex_layout(context, box, max_position_y, skip_stack, containing_block,
free_space -= child.margin_right
else:
free_space = box.height
for child in line:
for i, child in line:
free_space -= child.border_height()
if child.margin_top != 'auto':
free_space -= child.margin_top
@ -588,7 +600,7 @@ def flex_layout(context, box, max_position_y, skip_stack, containing_block,
free_space -= child.margin_bottom
margins = 0
for child in line:
for i, child in line:
if axis == 'width':
if child.margin_left == 'auto':
margins += 1
@ -601,7 +613,7 @@ def flex_layout(context, box, max_position_y, skip_stack, containing_block,
margins += 1
if margins:
free_space /= margins
for child in line:
for i, child in line:
if axis == 'width':
if child.margin_left == 'auto':
child.margin_left = free_space
@ -620,10 +632,14 @@ def flex_layout(context, box, max_position_y, skip_stack, containing_block,
position_axis += free_space / 2
elif justify_content == 'space-around':
position_axis += free_space / len(line) / 2
elif justify_content == 'space-evenly':
position_axis += free_space / (len(line) + 1)
for child in line:
for i, child in line:
if axis == 'width':
child.position_x = position_axis
if justify_content == 'stretch':
child.width += free_space / len(line)
else:
child.position_y = position_axis
position_axis += (
@ -634,6 +650,8 @@ def flex_layout(context, box, max_position_y, skip_stack, containing_block,
elif justify_content == 'space-between':
if len(line) > 1:
position_axis += free_space / (len(line) - 1)
elif justify_content == 'space-evenly':
position_axis += free_space / (len(line) + 1)
# Step 13
position_cross = (
@ -642,7 +660,7 @@ def flex_layout(context, box, max_position_y, skip_stack, containing_block,
for line in flex_lines:
line.lower_baseline = 0
# TODO: don't duplicate this loop
for child in line:
for i, child in line:
align_self = child.style['align_self']
if align_self == 'auto':
align_self = box.style['align_items']
@ -650,7 +668,7 @@ def flex_layout(context, box, max_position_y, skip_stack, containing_block,
# TODO: handle vertical text
child.baseline = child._baseline - position_cross
line.lower_baseline = max(line.lower_baseline, child.baseline)
for child in line:
for i, child in line:
cross_margins = (
(child.margin_top, child.margin_bottom) if cross == 'height'
else (child.margin_left, child.margin_right))
@ -754,7 +772,7 @@ def flex_layout(context, box, max_position_y, skip_stack, containing_block,
if extra_cross_size > 0:
cross_translate = 0
for line in flex_lines:
for child in line:
for i, child in line:
if child.is_flex_item:
child.translate(**{direction: cross_translate})
if box.style['align_content'] == 'flex-end':
@ -766,21 +784,26 @@ def flex_layout(context, box, max_position_y, skip_stack, containing_block,
child.translate(**{
direction: extra_cross_size /
len(flex_lines) / 2})
elif box.style['align_content'] == 'space-evenly':
child.translate(**{
direction: extra_cross_size /
(len(flex_lines) + 1)})
if box.style['align_content'] == 'space-between':
cross_translate += extra_cross_size / (len(flex_lines) - 1)
elif box.style['align_content'] == 'space-around':
cross_translate += extra_cross_size / len(flex_lines)
elif box.style['align_content'] == 'space-evenly':
cross_translate += extra_cross_size / (len(flex_lines) + 1)
# TODO: don't use block_box_layout, see TODOs in Step 14 and
# build.flex_children.
box = box.copy()
box.children = []
i = 0
child_skip_stack = skip_stack
for line in flex_lines:
for child in line:
for i, child in line:
if child.is_flex_item:
new_child, child_resume_at = blocks.block_box_layout(
new_child, child_resume_at = blocks.block_level_layout_switch(
context, child, max_position_y, child_skip_stack, box,
device_size, page_is_empty, absolute_boxes, fixed_boxes,
adjoining_margins=[])[:2]
@ -791,13 +814,15 @@ def flex_layout(context, box, max_position_y, skip_stack, containing_block,
list_marker_layout(context, new_child)
box.children.append(new_child)
if child_resume_at is not None:
if resume_at:
resume_at = (resume_at[0] + i, child_resume_at)
if original_skip_stack:
first_level_skip = original_skip_stack[0]
else:
resume_at = (i, child_resume_at)
first_level_skip = 0
if resume_at:
first_level_skip += resume_at[0]
resume_at = (first_level_skip + i, child_resume_at)
if resume_at:
break
i += 1
# Skip stack is only for the first child
child_skip_stack = None
@ -805,6 +830,7 @@ def flex_layout(context, box, max_position_y, skip_stack, containing_block,
break
# Set box height
# TODO: this is probably useless because of step #15
if axis == 'width' and box.height == 'auto':
if flex_lines:
box.height = sum(line.cross_size for line in flex_lines)

View File

@ -2,7 +2,9 @@
weasyprint.float
----------------
:copyright: Copyright 2011-2014 Simon Sapin and contributors, see AUTHORS.
Layout for floating boxes.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
@ -26,7 +28,7 @@ def float_width(box, context, containing_block):
def float_layout(context, box, containing_block, device_size, absolute_boxes,
fixed_boxes):
"""Set the width and position of floating ``box``."""
# avoid a circular imports
# Avoid circular imports
from .blocks import block_container_layout
from .flex import flex_layout
from .inlines import inline_replaced_box_width_height

View File

@ -4,7 +4,7 @@
Line breaking and layout for inline-level boxes.
:copyright: Copyright 2011-2014 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
@ -34,7 +34,7 @@ def iter_line_boxes(context, box, position_y, skip_stack, containing_block,
``line`` is a laid-out LineBox with as much content as possible that
fits in the available width.
:param linebox: a non-laid-out :class:`LineBox`
:param box: a non-laid-out :class:`LineBox`
:param position_y: vertical top position of the line box on the page
:param skip_stack: ``None`` to start at the beginning of ``linebox``,
or a ``resume_at`` value to continue just after an
@ -44,6 +44,12 @@ def iter_line_boxes(context, box, position_y, skip_stack, containing_block,
:param device_size: ``(width, height)`` of the current page.
"""
resolve_percentages(box, containing_block)
if skip_stack is None:
# TODO: wrong, see https://github.com/Kozea/WeasyPrint/issues/679
resolve_one_percentage(box, 'text_indent', containing_block.width)
else:
box.text_indent = 0
while 1:
line, resume_at = get_next_linebox(
context, box, position_y, skip_stack, containing_block,
@ -56,6 +62,7 @@ def iter_line_boxes(context, box, position_y, skip_stack, containing_block,
if resume_at is None:
return
skip_stack = resume_at
box.text_indent = 0
first_letter_style = None
@ -63,15 +70,6 @@ def get_next_linebox(context, linebox, position_y, skip_stack,
containing_block, device_size, absolute_boxes,
fixed_boxes, first_letter_style):
"""Return ``(line, resume_at)``."""
resolve_percentages(linebox, containing_block)
if skip_stack is None:
# text-indent only at the start of the first line
# Other percentages (margins, width, ...) do not apply.
resolve_one_percentage(linebox, 'text_indent', containing_block.width)
else:
linebox.text_indent = 0
first_letter_style = None
skip_stack = skip_first_whitespace(linebox, skip_stack)
if skip_stack == 'continue':
return None, None
@ -101,7 +99,7 @@ def get_next_linebox(context, linebox, position_y, skip_stack,
waiting_floats = []
(line, resume_at, preserved_line_break, first_letter,
last_letter) = split_inline_box(
last_letter, float_width) = split_inline_box(
context, linebox, position_x, max_x, skip_stack, containing_block,
device_size, line_absolutes, line_fixed, line_placeholders,
waiting_floats, line_children=[])
@ -114,6 +112,8 @@ def get_next_linebox(context, linebox, position_y, skip_stack,
new_position_x, _, new_available_width = avoid_collisions(
context, linebox, containing_block, outer=False)
# TODO: handle rtl
new_available_width -= float_width['right']
alignment_available_width = (
new_available_width + new_position_x - linebox.position_x)
offset_x = text_align(
@ -569,6 +569,7 @@ def split_inline_level(context, box, position_x, max_x, skip_stack,
"""
resolve_percentages(box, containing_block)
float_widths = {'left': 0, 'right': 0}
if isinstance(box, boxes.TextBox):
box.position_x = position_x
if skip_stack is None:
@ -599,7 +600,7 @@ def split_inline_level(context, box, position_x, max_x, skip_stack,
if box.margin_right == 'auto':
box.margin_right = 0
(new_box, resume_at, preserved_line_break, first_letter,
last_letter) = split_inline_box(
last_letter, float_widths) = split_inline_box(
context, box, position_x, max_x, skip_stack, containing_block,
device_size, absolute_boxes, fixed_boxes, line_placeholders,
waiting_floats, line_children)
@ -628,7 +629,9 @@ def split_inline_level(context, box, position_x, max_x, skip_stack,
last_letter = '\u2e80'
else: # pragma: no cover
raise TypeError('Layout for %s not handled yet' % type(box).__name__)
return new_box, resume_at, preserved_line_break, first_letter, last_letter
return (
new_box, resume_at, preserved_line_break, first_letter, last_letter,
float_widths)
def split_inline_box(context, box, position_x, max_x, skip_stack,
@ -653,15 +656,13 @@ def split_inline_box(context, box, position_x, max_x, skip_stack,
box.border_left_width)
right_spacing = (box.padding_right + box.margin_right +
box.border_right_width)
if is_start:
position_x += left_spacing
content_box_left = position_x
children = []
waiting_children = []
preserved_line_break = False
first_letter = last_letter = None
float_translate = 0
float_widths = {'left': 0, 'right': 0}
float_resume_at = 0
if box.style['position'] == 'relative':
@ -710,22 +711,19 @@ def split_inline_box(context, box, position_x, max_x, skip_stack,
waiting_children.append((index, child))
# Translate previous line children
dx = max(child.margin_width(), 0)
float_widths[child.style['float']] += dx
if child.style['float'] == 'left':
dx = max(child.margin_width(), 0)
if isinstance(box, boxes.LineBox):
# The parent is the line, update the current position
# for the next child. When the parent is not the line
# (it is an inline block), the current position of the
# line is updated by the block itself (see next
# line is updated by the box itself (see next
# split_inline_level call).
position_x += dx
elif child.style['float'] == 'right':
dx = min(-child.margin_width(), 0)
# Update the maximum x position for the next children
max_x += dx
# The float_translate variable will be used to translate the
# current box if it's an inline block that has floats inside.
float_translate += dx
max_x -= dx
for _, old_child in line_children:
if not old_child.is_in_normal_flow():
continue
@ -739,18 +737,29 @@ def split_inline_box(context, box, position_x, max_x, skip_stack,
last_child = (i == len(box_children) - 1)
available_width = max_x
new_child, resume_at, preserved, first, last = split_inline_level(
context, child, position_x, available_width, skip_stack,
containing_block, device_size, absolute_boxes, fixed_boxes,
line_placeholders, waiting_floats, line_children)
child_waiting_floats = []
new_child, resume_at, preserved, first, last, new_float_widths = (
split_inline_level(
context, child, position_x, available_width, skip_stack,
containing_block, device_size, absolute_boxes, fixed_boxes,
line_placeholders, child_waiting_floats, line_children))
if last_child and right_spacing and resume_at is None:
# TODO: we should take care of children added into absolute_boxes,
# fixed_boxes and other lists.
available_width -= right_spacing
new_child, resume_at, preserved, first, last = split_inline_level(
context, child, position_x, available_width, skip_stack,
containing_block, device_size, absolute_boxes, fixed_boxes,
line_placeholders, waiting_floats, line_children)
if box.style['direction'] == 'rtl':
available_width -= left_spacing
else:
available_width -= right_spacing
new_child, resume_at, preserved, first, last, new_float_widths = (
split_inline_level(
context, child, position_x, available_width, skip_stack,
containing_block, device_size, absolute_boxes, fixed_boxes,
line_placeholders, child_waiting_floats, line_children))
if box.style['direction'] == 'rtl':
max_x -= new_float_widths['left']
else:
max_x -= new_float_widths['right']
skip_stack = None
if preserved:
@ -815,7 +824,7 @@ def split_inline_box(context, box, position_x, max_x, skip_stack,
# waiting child again with this constraint. We may
# find a better way.
max_x = child.position_x + child.margin_width() - 1
child_new_child, child_resume_at, _, _, _ = (
child_new_child, child_resume_at, _, _, _, _ = (
split_inline_level(
context, child, child.position_x, max_x,
None, box, device_size,
@ -879,11 +888,13 @@ def split_inline_box(context, box, position_x, max_x, skip_stack,
# Too wide, can't break waiting children and the inline is
# non-empty: put child entirely on the next line.
resume_at = (children[-1][0] + 1, None)
child_waiting_floats = []
break
position_x = new_position_x
waiting_children.append((index, new_child))
waiting_floats.extend(child_waiting_floats)
if resume_at is not None:
children.extend(waiting_children)
resume_at = (index, resume_at)
@ -892,18 +903,30 @@ def split_inline_box(context, box, position_x, max_x, skip_stack,
children.extend(waiting_children)
resume_at = None
is_end = resume_at is None
new_box = box.copy_with_children(
[box_child for index, box_child in children],
is_start=is_start, is_end=resume_at is None)
is_start=is_start, is_end=is_end)
if isinstance(box, boxes.LineBox):
# Line boxes already have a position_x which may not be the same
# as content_box_left when text-indent is non-zero.
# This is important for justified text.
new_box.width = position_x - new_box.position_x
# We must reset line box width according to its new children
in_flow_children = [
box_child for box_child in new_box.children
if box_child.is_in_normal_flow()]
if in_flow_children:
new_box.width = (
in_flow_children[-1].position_x +
in_flow_children[-1].margin_width() -
new_box.position_x)
else:
new_box.width = 0
else:
new_box.position_x = initial_position_x
if (is_start and box.style['direction'] == 'ltr') or (
is_end and box.style['direction'] == 'rtl'):
for child in new_box.children:
child.translate(dx=left_spacing)
new_box.width = position_x - content_box_left
new_box.translate(dx=float_translate, ignore_floats=True)
new_box.translate(dx=float_widths['left'], ignore_floats=True)
line_height, new_box.baseline = strut_layout(box.style, context)
new_box.height = box.style['font_size']
@ -923,7 +946,9 @@ def split_inline_box(context, box, position_x, max_x, skip_stack,
if resume_at[0] < float_resume_at:
resume_at = (float_resume_at, None)
return new_box, resume_at, preserved_line_break, first_letter, last_letter
return (
new_box, resume_at, preserved_line_break, first_letter, last_letter,
float_widths)
def split_text_box(context, box, available_width, skip):
@ -952,7 +977,7 @@ def split_text_box(context, box, available_width, skip):
# No need to encode whats after resume_at (if set) or length (if
# resume_at is not set). One code point is one or more byte, so
# UTF-8 indexes are always bigger or equal to Unicode indexes.
new_text = layout.text_bytes.decode('utf8')
new_text = layout.text
encoded = text.encode('utf8')
if resume_at is not None:
between = encoded[length:resume_at].decode('utf8')
@ -1170,10 +1195,10 @@ def text_align(context, line, available_width, last):
justify_line(context, line, offset)
return 0
if align == 'center':
offset /= 2.
return offset / 2
else:
assert align == 'right'
return offset
return offset
def justify_line(context, line, extra_width):

View File

@ -4,7 +4,7 @@
Layout for list markers (for ``display: list-item``).
:copyright: Copyright 2011-2014 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""

View File

@ -2,7 +2,7 @@
weasyprint.layout.min_max
-------------------------
:copyright: Copyright 2011-2014 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""

View File

@ -4,7 +4,7 @@
Layout for pages and CSS3 margin boxes.
:copyright: Copyright 2011-2014 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
@ -14,7 +14,7 @@ import copy
from ..css import (
PageType, computed_from_cascaded, matching_page_types, set_computed_styles)
from ..formatting_structure import boxes, build
from ..logger import LOGGER
from ..logger import PROGRESS_LOGGER
from .absolute import absolute_layout
from .blocks import block_container_layout, block_level_layout
from .min_max import handle_min_max_height, handle_min_max_width
@ -582,7 +582,7 @@ def make_page(context, root_box, page_type, resume_at, page_number,
# spans across multiple pages
cached_anchors = []
cached_lookups = []
for (_, _, _, _, x_remake_state) in page_maker[:page_number-1]:
for (_, _, _, _, x_remake_state) in page_maker[:page_number - 1]:
cached_anchors.extend(x_remake_state.get('anchors', []))
cached_lookups.extend(x_remake_state.get('content_lookups', []))
@ -773,7 +773,7 @@ def make_all_pages(context, root_box, html, pages, style_for):
if (len(pages) == 0 or
remake_state['content_changed'] or
remake_state['pages_wanted']):
LOGGER.info('Step 5 - Creating layout - Page %i', i + 1)
PROGRESS_LOGGER.info('Step 5 - Creating layout - Page %i', i + 1)
# Reset remake_state
remake_state['content_changed'] = False
remake_state['pages_wanted'] = False
@ -783,7 +783,7 @@ def make_all_pages(context, root_box, html, pages, style_for):
i, context, root_box, html, style_for)
yield page
else:
LOGGER.info(
PROGRESS_LOGGER.info(
'Step 5 - Creating layout - Page %i (up-to-date)', i + 1)
resume_at = context.page_maker[i + 1][0]
yield pages[i]

View File

@ -4,7 +4,7 @@
Resolve percentages into fixed values.
:copyright: Copyright 2011-2014 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
@ -91,12 +91,15 @@ def resolve_percentages(box, containing_block, main_flex_direction=None):
else:
assert height.unit == 'px'
box.height = height.value
resolve_one_percentage(box, 'min_height', 0)
resolve_one_percentage(box, 'max_height', float('inf'))
resolve_one_percentage(box, 'min_height', 0, main_flex_direction)
resolve_one_percentage(
box, 'max_height', float('inf'), main_flex_direction)
else:
resolve_one_percentage(box, 'height', cb_height)
resolve_one_percentage(box, 'min_height', cb_height)
resolve_one_percentage(box, 'max_height', cb_height)
resolve_one_percentage(
box, 'min_height', cb_height, main_flex_direction)
resolve_one_percentage(
box, 'max_height', cb_height, main_flex_direction)
# Used value == computed value
for side in ['top', 'right', 'bottom', 'left']:

View File

@ -8,13 +8,12 @@
Terms used (max-content width, min-content width) are defined in David
Baron's unofficial draft (http://dbaron.org/css/intrinsic/).
:copyright: Copyright 2011-2016 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
import sys
from itertools import zip_longest
from .. import text
from ..formatting_structure import boxes
@ -220,6 +219,12 @@ def column_group_content_width(context, box):
def inline_line_widths(context, box, outer, is_line_start, minimum,
skip_stack=None, first_line=False):
if box.style['text_indent'].unit == '%':
# TODO: this is wrong, text-indent percentages should be resolved
# before calling this function.
text_indent = 0
else:
text_indent = box.style['text_indent'].value
current_line = 0
if skip_stack is None:
skip = 0
@ -288,14 +293,15 @@ def inline_line_widths(context, box, outer, is_line_start, minimum,
current_line += lines[0]
if len(lines) > 1:
# Forced line break
yield current_line
yield current_line + text_indent
text_indent = 0
if len(lines) > 2:
for line in lines[1:-1]:
yield line
current_line = lines[-1]
is_line_start = lines[-1] == 0
skip_stack = None
yield current_line
yield current_line + text_indent
def _percentage_contribution(box):
@ -328,6 +334,9 @@ def table_and_columns_preferred_widths(context, box, outer=True):
http://dbaron.org/css/intrinsic/
"""
# Avoid a circular import
from .tables import distribute_excess_width
table = box.get_wrapped_table()
result = context.tables.get(table)
if result:
@ -391,6 +400,8 @@ def table_and_columns_preferred_widths(context, box, outer=True):
continue
break
colspan_cells = []
# Define the intermediate content widths
min_content_widths = [0 for i in range(grid_width)]
max_content_widths = [0 for i in range(grid_width)]
@ -410,23 +421,29 @@ def table_and_columns_preferred_widths(context, box, outer=True):
intrinsic_percentages[i],
_percentage_contribution(groups[i]))
for cell in zipped_grid[i]:
if cell and cell.colspan == 1:
min_content_widths[i] = max(
min_content_widths[i], min_content_width(context, cell))
max_content_widths[i] = max(
max_content_widths[i], max_content_width(context, cell))
intrinsic_percentages[i] = max(
intrinsic_percentages[i],
_percentage_contribution(cell))
if cell:
if cell.colspan == 1:
min_content_widths[i] = max(
min_content_widths[i],
min_content_width(context, cell))
max_content_widths[i] = max(
max_content_widths[i],
max_content_width(context, cell))
intrinsic_percentages[i] = max(
intrinsic_percentages[i],
_percentage_contribution(cell))
else:
colspan_cells.append(cell)
# Intermediate content widths for span N
# Intermediate content widths for span > 1 is wrong in the 4.1 section, as
# explained in its third issue. Min- and max-content widths are handled by
# the excess width distribution method, and percentages do not distribute
# widths to columns that have originating cells.
# Intermediate intrinsic percentage widths for span > 1
for span in range(1, grid_width):
min_contributions = []
max_contributions = []
percentage_contributions = []
for i in range(grid_width):
min_contribution = min_content_widths[i]
max_contribution = max_content_widths[i]
percentage_contribution = intrinsic_percentages[i]
for j, cell in enumerate(zipped_grid[i]):
indexes = [k for k in range(i + 1) if grid[j][k]]
@ -437,59 +454,7 @@ def table_and_columns_preferred_widths(context, box, outer=True):
if origin_cell.colspan - 1 != span:
continue
cell_slice = slice(origin, origin + origin_cell.colspan)
# TODO: it's wrong when two columns have no space between them
# because all their cells span between the two columns
baseline_border_spacing = (
(origin_cell.colspan - 1) *
table.style['border_spacing'][0])
baseline_min_content = sum(
max(a, b) for a, b in zip_longest(
min_contributions[cell_slice],
min_content_widths[cell_slice],
fillvalue=0))
baseline_max_content = sum(
max(a, b) for a, b in zip_longest(
max_contributions[cell_slice],
max_content_widths[cell_slice],
fillvalue=0))
baseline_percentage = sum(
intrinsic_percentages[cell_slice])
# Cell contribution to min- and max-content widths
content_width_diff = (
max_content_widths[i] - min_content_widths[i])
baseline_diff = baseline_max_content - baseline_min_content
if baseline_diff:
diff_ratio = content_width_diff / baseline_diff
else:
diff_ratio = 0
cell_min_width = max(
0,
min_content_width(context, origin_cell) -
baseline_max_content - baseline_border_spacing)
cell_max_width = max(
0,
max_content_width(context, origin_cell) -
baseline_max_content - baseline_border_spacing)
clamped_cell_width = min(
cell_min_width,
baseline_max_content - baseline_min_content)
if baseline_max_content:
ratio = max_content_widths[i] / baseline_max_content
else:
ratio = 0
min_contribution = max(
min_contribution,
min_content_widths[i] +
diff_ratio * clamped_cell_width +
(1 - ratio) * cell_min_width)
max_contribution = max(
max_contribution,
max_content_widths[i] + (1 - ratio) * cell_max_width)
baseline_percentage = sum(intrinsic_percentages[cell_slice])
# Cell contribution to intrinsic percentage width
if intrinsic_percentages[i] == 0:
@ -505,7 +470,10 @@ def table_and_columns_preferred_widths(context, box, outer=True):
other_columns_contributions_sum = sum(
other_columns_contributions)
if other_columns_contributions_sum == 0:
ratio = 1 / len(other_columns_contributions)
if other_columns_contributions:
ratio = 1 / len(other_columns_contributions)
else:
ratio = 1
else:
ratio = (
max_content_widths[i] /
@ -514,18 +482,58 @@ def table_and_columns_preferred_widths(context, box, outer=True):
percentage_contribution,
diff * ratio)
min_contributions.append(min_contribution)
max_contributions.append(max_contribution)
percentage_contributions.append(percentage_contribution)
min_content_widths = min_contributions
max_content_widths = max_contributions
intrinsic_percentages = percentage_contributions
# Define constrainedness
constrainedness = [False for i in range(grid_width)]
for i in range(grid_width):
if (column_groups[i] and column_groups[i].style['width'] != 'auto' and
column_groups[i].style['width'].unit != '%'):
constrainedness[i] = True
continue
if (columns[i] and columns[i].style['width'] != 'auto' and
columns[i].style['width'].unit != '%'):
constrainedness[i] = True
continue
for cell in zipped_grid[i]:
if (cell and cell.colspan == 1 and
cell.style['width'] != 'auto' and
cell.style['width'].unit != '%'):
constrainedness[i] = True
break
intrinsic_percentages = [
min(percentage, 100 - sum(intrinsic_percentages[:i]))
for i, percentage in enumerate(intrinsic_percentages)]
# Max- and min-content widths for span > 1
for cell in colspan_cells:
min_content = min_content_width(context, cell)
max_content = max_content_width(context, cell)
column_slice = slice(cell.grid_x, cell.grid_x + cell.colspan)
columns_min_content = sum(min_content_widths[column_slice])
columns_max_content = sum(max_content_widths[column_slice])
if table.style['border_collapse'] == 'separate':
spacing = (cell.colspan - 1) * table.style['border_spacing'][0]
else:
spacing = 0
if min_content > columns_min_content + spacing:
excess_width = min_content - (columns_min_content + spacing)
distribute_excess_width(
context, zipped_grid, excess_width, min_content_widths,
constrainedness, intrinsic_percentages, max_content_widths,
column_slice)
if max_content > columns_max_content + spacing:
excess_width = max_content - (columns_max_content + spacing)
distribute_excess_width(
context, zipped_grid, excess_width, max_content_widths,
constrainedness, intrinsic_percentages, max_content_widths,
column_slice)
# Calculate the max- and min-content widths of table and columns
small_percentage_contributions = [
max_content_widths[i] / (intrinsic_percentages[i] / 100.)
@ -562,24 +570,6 @@ def table_and_columns_preferred_widths(context, box, outer=True):
[sum(max_content_widths), large_percentage_contribution] +
small_percentage_contributions))
# Define constrainedness
constrainedness = [False for i in range(grid_width)]
for i in range(grid_width):
if (column_groups[i] and column_groups[i].style['width'] != 'auto' and
column_groups[i].style['width'].unit != '%'):
constrainedness[i] = True
continue
if (columns[i] and columns[i].style['width'] != 'auto' and
columns[i].style['width'].unit != '%'):
constrainedness[i] = True
continue
for cell in zipped_grid[i]:
if (cell and cell.colspan == 1 and
cell.style['width'] != 'auto' and
cell.style['width'].unit != '%'):
constrainedness[i] = True
break
if table.style['width'] != 'auto' and table.style['width'].unit == 'px':
# "percentages on the following properties are treated instead as
# though they were the following: width: auto"
@ -669,35 +659,33 @@ def replaced_max_content_width(box, outer=True):
def flex_min_content_width(context, box, outer=True):
"""Return the min-content width for an ``FlexContainerBox``."""
# TODO: take care of outer
# TODO: use real values, see
# https://www.w3.org/TR/css-flexbox-1/#intrinsic-sizes
min_contents = [
min_content_width(context, child, outer=True)
for child in box.children if child.is_flex_item]
if not min_contents:
return 0
return adjust(box, outer, 0)
if (box.style['flex_direction'].startswith('row') and
box.style['flex_wrap'] == 'nowrap'):
return sum(min_contents)
return adjust(box, outer, sum(min_contents))
else:
return max(min_contents)
return adjust(box, outer, max(min_contents))
def flex_max_content_width(context, box, outer=True):
"""Return the max-content width for an ``FlexContainerBox``."""
# TODO: take care of outer
# TODO: use real values, see
# https://www.w3.org/TR/css-flexbox-1/#intrinsic-sizes
max_contents = [
max_content_width(context, child, outer=True)
for child in box.children if child.is_flex_item]
if not max_contents:
return 0
return adjust(box, outer, 0)
if box.style['flex_direction'].startswith('row'):
return sum(max_contents)
return adjust(box, outer, sum(max_contents))
else:
return max(max_contents)
return adjust(box, outer, max(max_contents))
def trailing_whitespace_size(context, box):

View File

@ -5,7 +5,7 @@
Layout for images and other replaced elements.
http://dev.w3.org/csswg/css-images-3/#sizing
:copyright: Copyright 2011-2014 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""

View File

@ -4,7 +4,7 @@
Layout for tables and internal table boxes.
:copyright: Copyright 2011-2016 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
@ -58,7 +58,7 @@ def table_layout(context, table, max_position_y, skip_stack,
in horizontal_borders[skipped_rows]) / 2
# Make this a sub-function so that many local variables like rows_x
# need not be passed as parameters.
# don't need to be passed as parameters.
def group_layout(group, position_y, max_position_y,
page_is_empty, skip_stack):
resume_at = None
@ -107,6 +107,9 @@ def table_layout(context, table, max_position_y, skip_stack,
cell.margin_left = 0
cell.width = 0
borders_plus_padding = cell.border_width() # with width==0
# TODO: we should remove the number of columns with no
# originating cells to cell.colspan, see
# test_layout_table_auto_49
cell.width = (
sum(spanned_widths) +
border_spacing_x * (cell.colspan - 1) -
@ -570,6 +573,11 @@ def auto_table_layout(context, box, containing_block):
min_content_specified_guess[i] = column_min_content_widths[i]
if assignable_width <= sum(max_content_guess):
# Default values shouldn't be used, but we never know.
# See https://github.com/Kozea/WeasyPrint/issues/770
lower_guess = guesses[0]
upper_guess = guesses[-1]
# We have to work around floating point rounding errors here.
# The 1e-9 value comes from PEP 485.
for guess in guesses:
@ -583,9 +591,10 @@ def auto_table_layout(context, box, containing_block):
else:
break
if upper_guess == lower_guess:
# TODO: Uncomment the assert when bugs #770 and #628 are closed
# Equivalent to "assert assignable_width == sum(upper_guess)"
assert abs(assignable_width - sum(upper_guess)) <= (
assignable_width * 1e-9)
# assert abs(assignable_width - sum(upper_guess)) <= (
# assignable_width * 1e-9)
table.column_widths = upper_guess
else:
added_widths = [
@ -596,140 +605,21 @@ def auto_table_layout(context, box, containing_block):
lower_guess[i] + added_widths[i] * available_ratio
for i in range(len(grid))]
else:
# Distribute available width to columns
# http://dbaron.org/css/intrinsic/#distributetocols
table.column_widths = max_content_guess
excess_width = assignable_width - sum(max_content_guess)
# First group
columns = [
(i, column) for i, column in enumerate(grid)
if not constrainedness[i] and
column_intrinsic_percentages[i] == 0 and
any(max_content_width(context, cell) for cell in column if cell)]
if columns:
widths = [
max(max_content_width(context, cell)
for cell in column if cell)
for i, column in columns]
current_widths = [
table.column_widths[i] for i, column in columns]
differences = [
max(0, width[0] - width[1])
for width in zip(widths, current_widths)]
if sum(differences) > excess_width:
differences = [
difference / sum(differences) * excess_width
for difference in differences]
excess_width -= sum(differences)
for i, difference in enumerate(differences):
table.column_widths[columns[i][0]] += difference
if excess_width <= 0:
return
# Second group
columns = [
i for i, column in enumerate(grid)
if not constrainedness[i] and
column_intrinsic_percentages[i] == 0]
if columns:
for i in columns:
table.column_widths[i] += excess_width / len(columns)
return
# Third group
columns = [
(i, column) for i, column in enumerate(grid)
if constrainedness[i] and
column_intrinsic_percentages[i] == 0 and
any(max_content_width(context, cell) for cell in column if cell)]
if columns:
widths = [
max(max_content_width(context, cell)
for cell in column if cell)
for i, column in columns]
current_widths = [
table.column_widths[i] for i, column in columns]
differences = [
max(0, width[0] - width[1])
for width in zip(widths, current_widths)]
if sum(differences) > excess_width:
differences = [
difference / sum(differences) * excess_width
for difference in differences]
excess_width -= sum(differences)
for i, difference in enumerate(differences):
table.column_widths[columns[i][0]] += difference
if excess_width <= 0:
return
# Fourth group
columns = [
(i, column) for i, column in enumerate(grid)
if column_intrinsic_percentages[i] > 0]
if columns:
fixed_width = sum(
table.column_widths[j] for j in range(len(grid))
if j not in [i for i, column in columns])
percentage_width = sum(
column_intrinsic_percentages[i]
for i, column in columns)
if fixed_width and percentage_width >= 100:
# Sum of the percentages are greater than 100%
ratio = excess_width
elif fixed_width == 0:
# No fixed width, let's take the whole excess width
ratio = excess_width
excess_width = distribute_excess_width(
context, grid, excess_width, table.column_widths, constrainedness,
column_intrinsic_percentages, column_max_content_widths)
if excess_width:
if table_min_content_width < table.width - excess_width:
# Reduce the width of the size from the excess width that has
# not been distributed.
table.width -= excess_width
else:
ratio = fixed_width / (100 - percentage_width)
widths = [
column_intrinsic_percentages[i] * ratio
for i, column in columns]
current_widths = [
table.column_widths[i] for i, column in columns]
# Allow to reduce the size of the columns to respect the percentage
differences = [
width[0] - width[1]
for width in zip(widths, current_widths)]
if sum(differences) > excess_width:
differences = [
difference / sum(differences) * excess_width
for difference in differences]
excess_width -= sum(differences)
for i, difference in enumerate(differences):
table.column_widths[columns[i][0]] += difference
if excess_width <= 0:
return
# Bonus: we've tried our best to distribute the extra size, but we
# failed. Instead of blindly distributing the size among all the colums
# and breaking all the rules (as said in the draft), let's try to
# change the columns with no constraint at all, then resize the table,
# and at least break the rules to make the columns fill the table.
# Fifth group, part 1
columns = [
i for i, column in enumerate(grid)
if any(column) and
column_intrinsic_percentages[i] == 0 and
not any(
max_content_width(context, cell)
for cell in column if cell)]
if columns:
for i in columns:
table.column_widths[i] += excess_width / len(columns)
return
if table_min_content_width < table.width - excess_width:
# Reduce the width of the size from the excess width that has not
# been distributed.
table.width -= excess_width
else:
# Fifth group, part 2, aka desperately break the rules
columns = [i for i, column in enumerate(grid) if any(column)]
for i in columns:
table.column_widths[i] += excess_width / len(columns)
# Break rules
columns = [i for i, column in enumerate(grid) if any(column)]
for i in columns:
table.column_widths[i] += excess_width / len(columns)
def table_wrapper_width(context, wrapper, containing_block):
@ -776,3 +666,129 @@ def find_in_flow_baseline(box, last=False, baseline_types=(boxes.LineBox,)):
result = find_in_flow_baseline(child, last, baseline_types)
if result is not None:
return result
def distribute_excess_width(context, grid, excess_width, column_widths,
constrainedness, column_intrinsic_percentages,
column_max_content_widths,
column_slice=slice(0, None)):
"""Distribute available width to columns.
Return excess width left when it's impossible without breaking rules.
See http://dbaron.org/css/intrinsic/#distributetocols
"""
# First group
columns = [
(i + column_slice.start, column)
for i, column in enumerate(grid[column_slice])
if not constrainedness[i + column_slice.start] and
column_intrinsic_percentages[i + column_slice.start] == 0 and
column_max_content_widths[i + column_slice.start] > 0]
if columns:
current_widths = [column_widths[i] for i, column in columns]
differences = [
max(0, width[0] - width[1])
for width in zip(column_max_content_widths, current_widths)]
if sum(differences) > excess_width:
differences = [
difference / sum(differences) * excess_width
for difference in differences]
excess_width -= sum(differences)
for i, difference in enumerate(differences):
column_widths[columns[i][0]] += difference
if excess_width <= 0:
return
# Second group
columns = [
i + column_slice.start for i, column in enumerate(grid[column_slice])
if not constrainedness[i + column_slice.start] and
column_intrinsic_percentages[i + column_slice.start] == 0]
if columns:
for i in columns:
column_widths[i] += excess_width / len(columns)
return
# Third group
columns = [
(i + column_slice.start, column)
for i, column in enumerate(grid[column_slice])
if constrainedness[i + column_slice.start] and
column_intrinsic_percentages[i + column_slice.start] == 0 and
column_max_content_widths[i + column_slice.start] > 0]
if columns:
current_widths = [column_widths[i] for i, column in columns]
differences = [
max(0, width[0] - width[1])
for width in zip(column_max_content_widths, current_widths)]
if sum(differences) > excess_width:
differences = [
difference / sum(differences) * excess_width
for difference in differences]
excess_width -= sum(differences)
for i, difference in enumerate(differences):
column_widths[columns[i][0]] += difference
if excess_width <= 0:
return
# Fourth group
columns = [
(i + column_slice.start, column)
for i, column in enumerate(grid[column_slice])
if column_intrinsic_percentages[i + column_slice.start] > 0]
if columns:
fixed_width = sum(
column_widths[j] for j in range(len(grid))
if j not in [i for i, column in columns])
percentage_width = sum(
column_intrinsic_percentages[i]
for i, column in columns)
if fixed_width and percentage_width >= 100:
# Sum of the percentages are greater than 100%
ratio = excess_width
elif fixed_width == 0:
# No fixed width, let's take the whole excess width
ratio = excess_width
else:
ratio = fixed_width / (100 - percentage_width)
widths = [
column_intrinsic_percentages[i] * ratio for i, column in columns]
current_widths = [column_widths[i] for i, column in columns]
# Allow to reduce the size of the columns to respect the percentage
differences = [
width[0] - width[1]
for width in zip(widths, current_widths)]
if sum(differences) > excess_width:
differences = [
difference / sum(differences) * excess_width
for difference in differences]
excess_width -= sum(differences)
for i, difference in enumerate(differences):
column_widths[columns[i][0]] += difference
if excess_width <= 0:
return
# Bonus: we've tried our best to distribute the extra size, but we
# failed. Instead of blindly distributing the size among all the colums
# and breaking all the rules (as said in the draft), let's try to
# change the columns with no constraint at all, then resize the table,
# and at least break the rules to make the columns fill the table.
# Fifth group, part 1
columns = [
i + column_slice.start for i, column in enumerate(grid[column_slice])
if any(column) and
column_intrinsic_percentages[i + column_slice.start] == 0 and
not any(
max_content_width(context, cell)
for cell in column if cell)]
if columns:
for i in columns:
column_widths[i] += excess_width / len(columns)
return
# Fifth group, part 2, aka abort
return excess_width

View File

@ -15,7 +15,7 @@
fonts and various non-fatal problems;
- infos are used to advertise rendering steps.
:copyright: Copyright 2011-2014 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
@ -25,3 +25,5 @@ import logging
LOGGER = logging.getLogger('weasyprint')
LOGGER.setLevel(logging.WARNING)
LOGGER.addHandler(logging.NullHandler())
PROGRESS_LOGGER = logging.getLogger('weasyprint.progress')

View File

@ -2,10 +2,27 @@
weasyprint.pdf
--------------
Post-process the PDF files created by cairo and add metadata such as
hyperlinks and bookmarks.
Post-process the PDF files created by cairo and extra metadata (including
attachments, embedded files, trim & bleed boxes).
:copyright: Copyright 2011-2014 Simon Sapin and contributors, see AUTHORS.
Rather than trying to parse any valid PDF, we make some assumptions
that hold for cairo in order to simplify the code:
* All newlines are '\n', not '\r' or '\r\n'
* Except for number 0 (which is always free) there is no "free" object.
* Most white space separators are made of a single 0x20 space.
* Indirect dictionary objects do not contain '>>' at the start of a line
except to mark the end of the object, followed by 'endobj'.
(In other words, '>>' markers for sub-dictionaries are indented.)
* The Page Tree is flat: all kids of the root page node are page objects,
not page tree nodes.
However the code uses a lot of assert statements so that if an assumptions
is not true anymore, the code should (hopefully) fail with an exception
rather than silently behave incorrectly.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
@ -13,97 +30,343 @@
import hashlib
import io
import mimetypes
import os
import re
import string
import zlib
from urllib.parse import unquote
from urllib.parse import unquote, urlsplit
import cairocffi as cairo
from pdfrw import PdfArray, PdfDict, PdfName, PdfReader, PdfString, PdfWriter
from . import VERSION_STRING, Attachment
from .html import W3C_DATE_RE
from . import Attachment
from .logger import LOGGER
from .urls import URLFetchingError, iri_to_uri, urlsplit
from .urls import URLFetchingError
def convert_bookmarks_units(bookmarks, matrices):
converted_bookmarks = []
for label, target, children in bookmarks:
page, x, y = target
x, y = matrices[target[0]].transform_point(x, y)
children = convert_bookmarks_units(children, matrices)
converted_bookmarks.append((label, (page, x, y), children))
return converted_bookmarks
def pdf_escape(value):
"""Escape parentheses and backslashes in ``value``.
def prepare_metadata(document, scale, pages):
"""Change metadata into data structures closer to the PDF objects.
In particular, convert from WeasyPrint units (CSS pixels from
the top-left corner) to PDF units (points from the bottom-left corner.)
:param scale:
PDF points per CSS pixels.
Defaults to 0.75, but is affected by `zoom` in
:meth:`weasyprint.document.Document.write_pdf`.
``value`` must be unicode, or latin1 bytestring.
"""
# X and width unchanged; Y = page_height - Y; height = -height
matrices = [cairo.Matrix(xx=scale, yy=-scale, y0=page.height * scale)
for page in document.pages]
links = []
for page_links, matrix in zip(document.resolve_links(), matrices):
new_page_links = []
for link_type, target, rectangle in page_links:
if link_type == 'internal':
target_page, target_x, target_y = target
target = (
(pages[target_page].indirect,) +
matrices[target_page].transform_point(target_x, target_y))
rect_x, rect_y, width, height = rectangle
rect_x, rect_y = matrix.transform_point(rect_x, rect_y)
width, height = matrix.transform_distance(width, height)
# x, y, w, h => x0, y0, x1, y1
rectangle = rect_x, rect_y, rect_x + width, rect_y + height
new_page_links.append((link_type, target, rectangle))
links.append(new_page_links)
bookmarks = convert_bookmarks_units(
document.make_bookmark_tree(), matrices)
return bookmarks, links
if isinstance(value, bytes):
value = value.decode('latin1')
return value.translate({40: r'\(', 41: r'\)', 92: r'\\'})
def _create_compressed_file_object(source):
class PDFFormatter(string.Formatter):
"""Like str.format except:
* Results are byte strings
* The new !P conversion flags encodes a PDF string.
(UTF-16 BE with a BOM, then backslash-escape parentheses.)
Except for fields marked !P, everything should be ASCII-only.
"""
Create a file like object as ``/EmbeddedFile`` compressing it with deflate.
def convert_field(self, value, conversion):
if conversion == 'P':
# Make a round-trip back through Unicode for the .translate()
# method. (bytes.translate only maps to single bytes.)
# Use latin1 to map all byte values.
return '({0})'.format(pdf_escape(
('\ufeff' + value).encode('utf-16-be').decode('latin1')))
else:
return super(PDFFormatter, self).convert_field(value, conversion)
def vformat(self, format_string, args, kwargs):
result = super(PDFFormatter, self).vformat(format_string, args, kwargs)
return result.encode('latin1')
pdf_format = PDFFormatter().format
class PDFDictionary(object):
def __init__(self, object_number, byte_string):
self.object_number = object_number
self.byte_string = byte_string
def __repr__(self):
return self.__class__.__name__ + repr(
(self.object_number, self.byte_string))
_re_cache = {}
def get_value(self, key, value_re):
regex = self._re_cache.get((key, value_re))
if not regex:
regex = re.compile(pdf_format('/{0} {1}', key, value_re))
self._re_cache[key, value_re] = regex
return regex.search(self.byte_string).group(1)
def get_type(self):
"""Get dictionary type.
:returns: the value for the /Type key.
"""
# No end delimiter, + defaults to greedy
return self.get_value('Type', '/(\\w+)').decode('ascii')
def get_indirect_dict(self, key, pdf_file):
"""Read the value for `key` and follow the reference.
We assume that it is an indirect dictionary object.
:return: a new PDFDictionary instance.
"""
object_number = int(self.get_value(key, '(\\d+) 0 R'))
return type(self)(object_number, pdf_file.read_object(object_number))
def get_indirect_dict_array(self, key, pdf_file):
"""Read the value for `key` and follow the references.
We assume that it is an array of indirect dictionary objects.
:return: a list of new PDFDictionary instance.
"""
parts = self.get_value(key, '\\[(.+?)\\]').split(b' 0 R')
# The array looks like this: ' <a> 0 R <b> 0 R <c> 0 R '
# so `parts` ends up like this [' <a>', ' <b>', ' <c>', ' ']
# With the trailing white space in the list.
trail = parts.pop()
assert not trail.strip()
class_ = type(self)
read = pdf_file.read_object
return [class_(n, read(n)) for n in map(int, parts)]
class PDFFile(object):
trailer_re = re.compile(
b'\ntrailer\n(.+)\nstartxref\n(\\d+)\n%%EOF\n$', re.DOTALL)
def __init__(self, fileobj):
# cairos trailer only has Size, Root and Info.
# The trailer + startxref + EOF is typically under 100 bytes
fileobj.seek(-200, os.SEEK_END)
trailer, startxref = self.trailer_re.search(fileobj.read()).groups()
trailer = PDFDictionary(None, trailer)
startxref = int(startxref)
fileobj.seek(startxref)
line = next(fileobj)
assert line == b'xref\n'
line = next(fileobj)
first_object, total_objects = line.split()
assert first_object == b'0'
total_objects = int(total_objects)
line = next(fileobj)
assert line == b'0000000000 65535 f \n'
objects_offsets = [None]
for object_number in range(1, total_objects):
line = next(fileobj)
assert line[10:] == b' 00000 n \n'
objects_offsets.append(int(line[:10]))
self.fileobj = fileobj
#: Maps object number -> bytes from the start of the file
self.objects_offsets = objects_offsets
info = trailer.get_indirect_dict('Info', self)
catalog = trailer.get_indirect_dict('Root', self)
page_tree = catalog.get_indirect_dict('Pages', self)
pages = page_tree.get_indirect_dict_array('Kids', self)
# Check that the tree is flat
assert all(p.get_type() == 'Page' for p in pages)
self.startxref = startxref
self.info = info
self.catalog = catalog
self.page_tree = page_tree
self.pages = pages
self.finished = False
self.overwritten_objects_offsets = {}
self.new_objects_offsets = []
def read_object(self, object_number):
"""
:param object_number:
An integer N so that 1 <= N < len(self.objects_offsets)
:returns:
The object content as a byte string.
"""
fileobj = self.fileobj
fileobj.seek(self.objects_offsets[object_number])
line = next(fileobj)
assert line.endswith(b' 0 obj\n')
assert int(line[:-7]) == object_number # len(b' 0 obj\n') == 7
object_lines = []
for line in fileobj:
if line == b'>>\n':
assert next(fileobj) == b'endobj\n'
# No newline, well add it when writing.
object_lines.append(b'>>')
return b''.join(object_lines)
object_lines.append(line)
def overwrite_object(self, object_number, byte_string):
"""Write the new content for an existing object at the end of the file.
:param object_number:
An integer N so that 1 <= N < len(self.objects_offsets)
:param byte_string:
The new object content as a byte string.
"""
self.overwritten_objects_offsets[object_number] = (
self._write_object(object_number, byte_string))
def extend_dict(self, dictionary, new_content):
"""Overwrite a dictionary object.
Content is added inside the << >> delimiters.
"""
assert dictionary.byte_string.endswith(b'>>')
self.overwrite_object(
dictionary.object_number,
dictionary.byte_string[:-2] + new_content + b'\n>>')
def next_object_number(self):
"""Return object number that would be used by write_new_object()."""
return len(self.objects_offsets) + len(self.new_objects_offsets)
def write_new_object(self, byte_string):
"""Write a new object at the end of the file.
:param byte_string:
The object content as a byte string.
:return:
The new object number.
"""
object_number = self.next_object_number()
self.new_objects_offsets.append(
self._write_object(object_number, byte_string))
return object_number
def finish(self):
"""Write cross-ref table and trailer for new and overwritten objects.
This makes `fileobj` a valid (updated) PDF file.
"""
new_startxref, write = self._start_writing()
self.finished = True
write(b'xref\n')
# Dont bother sorting or finding contiguous numbers,
# just write a new sub-section for each overwritten object.
for object_number, offset in self.overwritten_objects_offsets.items():
write(pdf_format(
'{0} 1\n{1:010} 00000 n \n', object_number, offset))
if self.new_objects_offsets:
first_new_object = len(self.objects_offsets)
write(pdf_format(
'{0} {1}\n', first_new_object, len(self.new_objects_offsets)))
for object_number, offset in enumerate(
self.new_objects_offsets, start=first_new_object):
write(pdf_format('{0:010} 00000 n \n', offset))
write(pdf_format(
'trailer\n<< '
'/Size {size} /Root {root} 0 R /Info {info} 0 R /Prev {prev}'
' >>\nstartxref\n{startxref}\n%%EOF\n',
size=self.next_object_number(),
root=self.catalog.object_number,
info=self.info.object_number,
prev=self.startxref,
startxref=new_startxref))
def _write_object(self, object_number, byte_string):
offset, write = self._start_writing()
write(pdf_format('{0} 0 obj\n', object_number))
write(byte_string)
write(b'\nendobj\n')
return offset
def _start_writing(self):
assert not self.finished
fileobj = self.fileobj
fileobj.seek(0, os.SEEK_END)
return fileobj.tell(), fileobj.write
def _write_compressed_file_object(pdf, file):
"""Write a compressed file like object as ``/EmbeddedFile``.
Compressing is done with deflate. In fact, this method writes multiple PDF
objects to include length, compressed length and MD5 checksum.
:return:
the object representing the compressed file stream object
the object number of the compressed file stream object
"""
object_number = pdf.next_object_number()
# Make sure we stay in sync with our object numbers
expected_next_object_number = object_number + 4
length_number = object_number + 1
md5_number = object_number + 2
uncompressed_length_number = object_number + 3
offset, write = pdf._start_writing()
write(pdf_format('{0} 0 obj\n', object_number))
write(pdf_format(
'<< /Type /EmbeddedFile /Length {0} 0 R /Filter '
'/FlateDecode /Params << /CheckSum {1} 0 R /Size {2} 0 R >> >>\n',
length_number, md5_number, uncompressed_length_number))
write(b'stream\n')
uncompressed_length = 0
compressed_length = 0
md5 = hashlib.md5()
compress = zlib.compressobj()
for data in iter(lambda: file.read(4096), b''):
uncompressed_length += len(data)
pdf_file_object = PdfDict(
Type=PdfName('EmbeddedFile'), Filter=PdfName('FlateDecode'))
# pdfrw needs Latin-1-decoded unicode strings in object.stream
pdf_file_object.stream = ''
size = 0
for data in iter(lambda: source.read(4096), b''):
size += len(data)
md5.update(data)
pdf_file_object.stream += compress.compress(data).decode('latin-1')
pdf_file_object.stream += compress.flush(zlib.Z_FINISH).decode('latin-1')
pdf_file_object.Params = PdfDict(
CheckSum=PdfString('<{}>'.format(md5.hexdigest())), Size=size)
return pdf_file_object
compressed = compress.compress(data)
compressed_length += len(compressed)
write(compressed)
compressed = compress.flush(zlib.Z_FINISH)
compressed_length += len(compressed)
write(compressed)
write(b'\nendstream\n')
write(b'endobj\n')
pdf.new_objects_offsets.append(offset)
pdf.write_new_object(pdf_format("{0}", compressed_length))
pdf.write_new_object(pdf_format("<{0}>", md5.hexdigest()))
pdf.write_new_object(pdf_format("{0}", uncompressed_length))
assert pdf.next_object_number() == expected_next_object_number
return object_number
def _get_filename_from_result(url, result):
"""
Derives a filename from a fetched resource. This is either the filename
returned by the URL fetcher, the last URL path component or a synthetic
name if the URL has no path
"""Derive a filename from a fetched resource.
This is either the filename returned by the URL fetcher, the last URL path
component or a synthetic name if the URL has no path.
"""
filename = None
@ -150,13 +413,38 @@ def _get_filename_from_result(url, result):
return filename
def _create_pdf_attachment(attachment, url_fetcher):
"""
Create an attachment to the PDF stream
def _write_pdf_embedded_files(pdf, attachments, url_fetcher):
"""Write attachments as embedded files (document attachments).
:return:
the object representing the ``/Filespec`` object or :obj:`None` if the
the object number of the name dictionary or :obj:`None`
"""
file_spec_ids = []
for attachment in attachments:
file_spec_id = _write_pdf_attachment(pdf, attachment, url_fetcher)
if file_spec_id is not None:
file_spec_ids.append(file_spec_id)
# We might have failed to write any attachment at all
if len(file_spec_ids) == 0:
return None
content = [b'<< /Names [']
for fs in file_spec_ids:
content.append(pdf_format('\n(attachment{0}) {0} 0 R ',
fs))
content.append(b'\n] >>')
return pdf.write_new_object(b''.join(content))
def _write_pdf_attachment(pdf, attachment, url_fetcher):
"""Write an attachment to the PDF stream.
:return:
the object number of the ``/Filespec`` object or :obj:`None` if the
attachment couldn't be read.
"""
try:
# Attachments from document links like <link> or <a> can only be URLs.
@ -171,144 +459,71 @@ def _create_pdf_attachment(attachment, url_fetcher):
with attachment.source as (source_type, source, url, _):
if isinstance(source, bytes):
source = io.BytesIO(source)
pdf_file_object = _create_compressed_file_object(source)
file_stream_id = _write_compressed_file_object(pdf, source)
except URLFetchingError as exc:
LOGGER.error('Failed to load attachment: %s', exc)
return None
# TODO: Use the result object from a URL fetch operation to provide more
# details on the possible filename
return PdfDict(
Type=PdfName('Filespec'), F=PdfString.encode(''),
UF=PdfString.encode(_get_filename_from_result(url, None)),
EF=PdfDict(F=pdf_file_object),
Desc=PdfString.encode(attachment.description or ''))
filename = _get_filename_from_result(url, None)
return pdf.write_new_object(pdf_format(
'<< /Type /Filespec /F () /UF {0!P} /EF << /F {1} 0 R >> '
'/Desc {2!P}\n>>',
filename,
file_stream_id,
attachment.description or ''))
def create_bookmarks(bookmarks, pages, parent=None):
count = len(bookmarks)
bookmark_objects = []
for label, target, children in bookmarks:
destination = (
pages[target[0]].indirect,
PdfName('XYZ'), target[1], target[2], 0)
bookmark_object = PdfDict(
Title=PdfString.encode(label), A=PdfDict(
Type=PdfName('Action'), S=PdfName('GoTo'),
D=PdfArray(destination)))
bookmark_object.indirect = True
children_objects, children_count = create_bookmarks(
children, pages, parent=bookmark_object)
bookmark_object.Count = 1 + children_count
if bookmark_objects:
bookmark_object.Prev = bookmark_objects[-1]
bookmark_objects[-1].Next = bookmark_object
if children_objects:
bookmark_object.First = children_objects[0]
bookmark_object.Last = children_objects[-1]
if parent is not None:
bookmark_object.Parent = parent
count += children_count
bookmark_objects.append(bookmark_object)
return bookmark_objects, count
def write_pdf_metadata(fileobj, scale, url_fetcher, attachments,
attachment_links, pages):
"""Add PDF metadata that are not handled by cairo.
Includes:
- attachments
- embedded files
- trim box
- bleed box
def write_pdf_metadata(document, fileobj, scale, metadata, attachments,
url_fetcher):
"""Append to a seekable file-like object to add PDF metadata."""
fileobj.seek(0)
trailer = PdfReader(fileobj)
pages = trailer.Root.Pages.Kids
"""
pdf = PDFFile(fileobj)
bookmarks, links = prepare_metadata(document, scale, pages)
if bookmarks:
bookmark_objects, count = create_bookmarks(bookmarks, pages)
trailer.Root.Outlines = PdfDict(
Type=PdfName('Outlines'), Count=count,
First=bookmark_objects[0], Last=bookmark_objects[-1])
# Add embedded files
attachments = metadata.attachments + (attachments or [])
if attachments:
embedded_files = []
for attachment in attachments:
attachment_object = _create_pdf_attachment(attachment, url_fetcher)
if attachment_object is not None:
embedded_files.append(PdfString.encode('attachment'))
embedded_files.append(attachment_object)
if embedded_files:
trailer.Root.Names = PdfDict(
EmbeddedFiles=PdfDict(Names=PdfArray(embedded_files)))
embedded_files_id = _write_pdf_embedded_files(
pdf, attachments, url_fetcher)
if embedded_files_id is not None:
params = b''
if embedded_files_id is not None:
params += pdf_format(' /Names << /EmbeddedFiles {0} 0 R >>',
embedded_files_id)
pdf.extend_dict(pdf.catalog, params)
# A single link can be split in multiple regions. We don't want to embedded
# Add attachments
# A single link can be split in multiple regions. We don't want to embed
# a file multiple times of course, so keep a reference to every embedded
# URL and reuse the object number.
# TODO: If we add support for descriptions this won't always be correct,
# because two links might have the same href, but different titles.
annot_files = {}
for page_links in links:
for page_links in attachment_links:
for link_type, target, rectangle in page_links:
if link_type == 'attachment' and target not in annot_files:
# TODO: use the title attribute as description
annot_files[target] = _create_pdf_attachment(
(target, None), url_fetcher)
annot_files[target] = _write_pdf_attachment(
pdf, (target, None), url_fetcher)
# TODO: splitting a link into multiple independent rectangular annotations
# works well for pure links, but rather mediocre for other annotations and
# fails completely for transformed (CSS) or complex link shapes (area).
# It would be better to use /AP for all links and coalesce link shapes that
# originate from the same HTML link. This would give a feeling similiar to
# what browsers do with links that span multiple lines.
for page, page_links in zip(pages, links):
annotations = PdfArray()
for link_type, target, rectangle in page_links:
if link_type != 'attachment' or annot_files[target] is None:
annotation = PdfDict(
Type=PdfName('Annot'), Subtype=PdfName('Link'),
Rect=PdfArray(rectangle), Border=PdfArray((0, 0, 0)))
if link_type == 'internal':
destination = (
target[0], PdfName('XYZ'), target[1], target[2], 0)
annotation.A = PdfDict(
Type=PdfName('Action'), S=PdfName('GoTo'),
D=PdfArray(destination))
else:
annotation.A = PdfDict(
Type=PdfName('Action'), S=PdfName('URI'),
URI=PdfString.encode(iri_to_uri(target)))
else:
assert annot_files[target] is not None
ap = PdfDict(N=PdfDict(
BBox=PdfArray(rectangle), Subtype=PdfName('Form'),
Type=PdfName('XObject')))
# evince needs /T or fails on an internal assertion. PDF
# doesn't require it.
annotation = PdfDict(
Type=PdfName('Annot'), Subtype=PdfName('FileAttachment'),
T=PdfString.encode(''), Rect=PdfArray(rectangle),
Border=PdfArray((0, 0, 0)), FS=annot_files[target],
AP=ap)
annotations.append(annotation)
for pdf_page, document_page, page_links in zip(
pdf.pages, pages, attachment_links):
if annotations:
page.Annots = annotations
# Add bleed box
trailer.Info.Producer = VERSION_STRING
for attr, key in (('title', 'Title'), ('description', 'Subject'),
('generator', 'Creator')):
value = getattr(metadata, attr)
if value is not None:
setattr(trailer.Info, key, value)
for attr, key in (('authors', 'Author'), ('keywords', 'Keywords')):
value = getattr(metadata, attr)
if value is not None:
setattr(trailer.Info, key, ', '.join(getattr(metadata, attr)))
for attr, key in (('created', 'CreationDate'), ('modified', 'ModDate')):
value = w3c_date_to_pdf(getattr(metadata, attr), attr)
if value is not None:
setattr(trailer.Info, key, value)
for page, document_page in zip(pages, document.pages):
left, top, right, bottom = (float(value) for value in page.MediaBox)
media_box = pdf_page.get_value(
'MediaBox', '\\[(.+?)\\]').decode('ascii').strip()
left, top, right, bottom = (
float(value) for value in media_box.split(' '))
# Convert pixels into points
bleed = {
key: value * 0.75 for key, value in document_page.bleed.items()}
@ -317,7 +532,6 @@ def write_pdf_metadata(document, fileobj, scale, metadata, attachments,
trim_top = top + bleed['top']
trim_right = right - bleed['right']
trim_bottom = bottom - bleed['bottom']
page.TrimBox = PdfArray((trim_left, trim_top, trim_right, trim_bottom))
# Arbitrarly set PDF BleedBox between CSS bleed box (PDF MediaBox) and
# CSS page box (PDF TrimBox), at most 10 points from the TrimBox.
@ -325,40 +539,53 @@ def write_pdf_metadata(document, fileobj, scale, metadata, attachments,
bleed_top = trim_top - min(10, bleed['top'])
bleed_right = trim_right + min(10, bleed['right'])
bleed_bottom = trim_bottom + min(10, bleed['bottom'])
page.BleedBox = PdfArray(
(bleed_left, bleed_top, bleed_right, bleed_bottom))
fileobj.seek(0)
PdfWriter().write(fileobj, trailer=trailer)
fileobj.truncate()
pdf.extend_dict(pdf_page, pdf_format(
'/TrimBox [ {} {} {} {} ] /BleedBox [ {} {} {} {} ]'.format(
trim_left, trim_top, trim_right, trim_bottom,
bleed_left, bleed_top, bleed_right, bleed_bottom)))
# Add links to attachments
def w3c_date_to_pdf(string, attr_name):
"""
YYYYMMDDHHmmSSOHH'mm'
# TODO: splitting a link into multiple independent rectangular
# annotations works well for pure links, but rather mediocre for other
# annotations and fails completely for transformed (CSS) or complex
# link shapes (area). It would be better to use /AP for all links and
# coalesce link shapes that originate from the same HTML link. This
# would give a feeling similiar to what browsers do with links that
# span multiple lines.
annotations = []
for link_type, target, rectangle in page_links:
if link_type == 'attachment' and annot_files[target] is not None:
matrix = cairo.Matrix(
xx=scale, yy=-scale, y0=document_page.height * scale)
rect_x, rect_y, width, height = rectangle
rect_x, rect_y = matrix.transform_point(rect_x, rect_y)
width, height = matrix.transform_distance(width, height)
# x, y, w, h => x0, y0, x1, y1
rectangle = rect_x, rect_y, rect_x + width, rect_y + height
content = [pdf_format(
'<< /Type /Annot '
'/Rect [{0:f} {1:f} {2:f} {3:f}] /Border [0 0 0]\n',
*rectangle)]
link_ap = pdf.write_new_object(pdf_format(
'<< /Type /XObject /Subtype /Form '
'/BBox [{0:f} {1:f} {2:f} {3:f}] /Length 0 >>\n'
'stream\n'
'endstream',
*rectangle))
content.append(b'/Subtype /FileAttachment ')
# evince needs /T or fails on an internal assertion. PDF
# doesn't require it.
content.append(pdf_format(
'/T () /FS {0} 0 R /AP << /N {1} 0 R >>',
annot_files[target], link_ap))
content.append(b'>>')
annotations.append(pdf.write_new_object(b''.join(content)))
"""
if string is None:
return None
match = W3C_DATE_RE.match(string)
if match is None:
LOGGER.warning('Invalid %s date: %r', attr_name, string)
return None
groups = match.groupdict()
pdf_date = (groups['year'] +
(groups['month'] or '') +
(groups['day'] or '') +
(groups['hour'] or '') +
(groups['minute'] or '') +
(groups['second'] or ''))
if groups['hour']:
assert groups['minute']
if not groups['second']:
pdf_date += '00'
if groups['tz_hour']:
assert groups['tz_hour'].startswith(('+', '-'))
assert groups['tz_minute']
pdf_date += "%s'%s'" % (groups['tz_hour'], groups['tz_minute'])
else:
pdf_date += 'Z' # UTC
return pdf_date
if annotations:
pdf.extend_dict(pdf_page, pdf_format(
'/Annots [{0}]', ' '.join(
'{0} 0 R'.format(n) for n in annotations)))
pdf.finish()

View File

@ -2,7 +2,7 @@
weasyprint.stacking
-------------------
:copyright: Copyright 2011-2014 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""

View File

@ -4,7 +4,7 @@
The Weasyprint test suite.
:copyright: Copyright 2011-2014 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""

View File

@ -16,12 +16,12 @@ import os
import sys
import unicodedata
import zlib
from pathlib import Path
from urllib.parse import urljoin, uses_relative
import cairocffi as cairo
import py
import pytest
from pdfrw import PdfReader
from .. import CSS, HTML, __main__, default_url_fetcher
from ..urls import path2url
@ -33,10 +33,14 @@ from .testing_utils import (
def _test_resource(class_, basename, check, **kwargs):
"""Common code for testing the HTML and CSS classes."""
absolute_filename = resource_filename(basename)
absolute_path = Path(absolute_filename)
url = path2url(absolute_filename)
check(class_(absolute_filename, **kwargs))
check(class_(absolute_path, **kwargs))
check(class_(guess=absolute_filename, **kwargs))
check(class_(guess=absolute_path, **kwargs))
check(class_(filename=absolute_filename, **kwargs))
check(class_(filename=absolute_path, **kwargs))
check(class_(url, **kwargs))
check(class_(guess=url, **kwargs))
check(class_(url=url, **kwargs))
@ -50,7 +54,9 @@ def _test_resource(class_, basename, check, **kwargs):
content = fd.read()
py.path.local(os.path.dirname(__file__)).chdir()
relative_filename = os.path.join('resources', basename)
relative_path = Path(relative_filename)
check(class_(relative_filename, **kwargs))
check(class_(relative_path, **kwargs))
check(class_(string=content, base_url=relative_filename, **kwargs))
encoding = kwargs.get('encoding') or 'utf8'
check(class_(string=content.decode(encoding), # unicode
@ -59,22 +65,6 @@ def _test_resource(class_, basename, check, **kwargs):
class_(filename='foo', url='bar')
def _assert_equivalent_pdf(pdf_bytes1, pdf_bytes2):
"""Assert that 2 pdf bytestrings are equivalent.
We have to compare various PDF objects to compare PDF files as pdfrw
doesn't produce the same PDF files from the same input.
"""
pdf1, pdf2 = PdfReader(fdata=pdf_bytes1), PdfReader(fdata=pdf_bytes2)
assert pdf1.Size == pdf2.Size
assert len(pdf1.Root.Pages.Kids) == len(pdf2.Root.Pages.Kids)
for page1, page2 in zip(pdf1.Root.Pages.Kids, pdf2.Root.Pages.Kids):
assert page1.MediaBox == page2.MediaBox
assert page1.TrimBox == page2.TrimBox
assert page1.BleedBox == page2.BleedBox
def _check_doc1(html, has_base_url=True):
"""Check that a parsed HTML document looks like resources/doc1.html"""
root = html.etree_element
@ -270,14 +260,14 @@ def test_python_render(tmpdir):
assert png_file.getvalue() == png_bytes
pdf_file = _fake_file()
html.write_pdf(pdf_file, stylesheets=[css])
_assert_equivalent_pdf(pdf_file.getvalue(), pdf_bytes)
# assert pdf_file.read_binary() == pdf_bytes
png_file = tmpdir.join('1.png')
pdf_file = tmpdir.join('1.pdf')
html.write_png(png_file.strpath, stylesheets=[css])
html.write_pdf(pdf_file.strpath, stylesheets=[css])
assert png_file.read_binary() == png_bytes
_assert_equivalent_pdf(pdf_file.read_binary(), pdf_bytes)
# assert pdf_file.read_binary() == pdf_bytes
png_file = tmpdir.join('2.png')
pdf_file = tmpdir.join('2.pdf')
@ -286,7 +276,7 @@ def test_python_render(tmpdir):
with open(pdf_file.strpath, 'wb') as pdf_fd:
html.write_pdf(pdf_fd, stylesheets=[css])
assert png_file.read_binary() == png_bytes
_assert_equivalent_pdf(pdf_file.read_binary(), pdf_bytes)
# assert pdf_file.read_binary() == pdf_bytes
x2_png_bytes = html.write_png(stylesheets=[css], resolution=192)
check_png_pattern(x2_png_bytes, x2=True)
@ -320,7 +310,7 @@ def test_command_line_render(tmpdir):
py.path.local(resource_filename('')).chdir()
# Reference
html_obj = FakeHTML(string=combined, base_url='dummy.html')
pdf_bytes = html_obj.write_pdf()
# pdf_bytes = html_obj.write_pdf()
png_bytes = html_obj.write_png()
x2_png_bytes = html_obj.write_png(resolution=192)
rotated_png_bytes = FakeHTML(string=combined, base_url='dummy.html',
@ -345,7 +335,8 @@ def test_command_line_render(tmpdir):
_run('combined.html out1.png')
_run('combined.html out2.pdf')
assert tmpdir.join('out1.png').read_binary() == png_bytes
_assert_equivalent_pdf(tmpdir.join('out2.pdf').read_binary(), pdf_bytes)
# TODO: check PDF content? How?
# assert tmpdir.join('out2.pdf').read_binary() == pdf_bytes
_run('combined-UTF-16BE.html out3.png --encoding UTF-16BE')
assert tmpdir.join('out3.png').read_binary() == png_bytes
@ -362,12 +353,12 @@ def test_command_line_render(tmpdir):
_run('combined.html out7 -f png')
_run('combined.html out8 --format pdf')
assert tmpdir.join('out7').read_binary() == png_bytes
_assert_equivalent_pdf(tmpdir.join('out8').read_binary(), pdf_bytes)
# assert tmpdir.join('out8').read_binary(), pdf_bytes
_run('no_css.html out9.png')
_run('no_css.html out10.png -s style.css')
assert tmpdir.join('out9.png').read_binary() != png_bytes
assert tmpdir.join('out10.png').read_binary() == png_bytes
# assert tmpdir.join('out10.png').read_binary() == png_bytes
stdout = _run('--format png combined.html -')
assert stdout == png_bytes
@ -454,7 +445,8 @@ def test_low_level_api():
''')
pdf_bytes = html.write_pdf(stylesheets=[css])
assert pdf_bytes.startswith(b'%PDF')
_assert_equivalent_pdf(html.render([css]).write_pdf(), pdf_bytes)
# TODO: check PDF content? How?
# assert html.render([css]).write_pdf() == pdf_bytes
png_bytes = html.write_png(stylesheets=[css])
document = html.render([css], enable_hinting=True)
@ -514,15 +506,8 @@ def test_low_level_api():
assert _png_size(document.copy([page_2]).write_png()) == (6, 4)
@assert_no_logs
def test_bookmarks():
def assert_bookmarks(html, expected_by_page, expected_tree, round=False):
document = FakeHTML(string=html).render()
if round:
_round_meta(document.pages)
assert [p.bookmarks for p in document.pages] == expected_by_page
assert document.make_bookmark_tree() == expected_tree
assert_bookmarks('''
@pytest.mark.parametrize('html, expected_by_page, expected_tree, round', (
('''
<style>* { height: 10px }</style>
<h1>a</h1>
<h4 style="page-break-after: always">b</h4>
@ -538,8 +523,8 @@ def test_bookmarks():
('c', (1, 3, 2), []),
('d', (1, 0, 10), [])]),
('e', (1, 0, 20), []),
])
assert_bookmarks('''
], False),
('''
<style>
* { height: 90px; margin: 0 0 10px 0 }
</style>
@ -582,8 +567,8 @@ def test_bookmarks():
('Title 9', (1, 0, 400), [])])]),
('Title 10', (1, 0, 500), [
('Title 11', (1, 0, 600), [])]),
])
assert_bookmarks('''
], False),
('''
<style>* { height: 10px }</style>
<h2>A</h2> <p>depth 1</p>
<h4>B</h4> <p>depth 2</p>
@ -602,8 +587,8 @@ def test_bookmarks():
('C', (0, 0, 40), [
('D', (0, 0, 60), [
('E', (0, 0, 80), [])])]),
])
assert_bookmarks('''
], False),
('''
<style>* { height: 10px; font-size: 0 }</style>
<h2>A</h2> <p>h2 depth 1</p>
<h4>B</h4> <p>h4 depth 2</p>
@ -634,19 +619,34 @@ def test_bookmarks():
('G', (0, 0, 110), [
('H', (0, 0, 130), [])])]),
('I', (0, 0, 150), []),
])
assert_bookmarks('<h1>é', [[(1, 'é', (0, 0))]], [('é', (0, 0, 0), [])])
assert_bookmarks('''
], False),
('<h1>é', [[(1, 'é', (0, 0))]], [('é', (0, 0, 0), [])], False),
('''
<h1 style="transform: translateX(50px)">!
''', [[(1, '!', (50, 0))]], [('!', (0, 50, 0), [])])
assert_bookmarks('''
''', [[(1, '!', (50, 0))]], [('!', (0, 50, 0), [])], False),
('''
<style>
img { display: block; bookmark-label: attr(alt); bookmark-level: 1 }
</style>
<img src="file://%s" alt="Chocolate" />
''' % resource_filename('pattern.png'),
[[(1, 'Chocolate', (0, 0))]], [('Chocolate', (0, 0, 0), [])], False),
('''
<h1 style="transform-origin: 0 0;
transform: rotate(90deg) translateX(50px)">!
''', [[(1, '!', (0, 50))]], [('!', (0, 0, 50), [])], round=True)
assert_bookmarks('''
''', [[(1, '!', (0, 50))]], [('!', (0, 0, 50), [])], True),
('''
<body style="transform-origin: 0 0; transform: rotate(90deg)">
<h1 style="transform: translateX(50px)">!
''', [[(1, '!', (0, 50))]], [('!', (0, 0, 50), [])], round=True)
''', [[(1, '!', (0, 50))]], [('!', (0, 0, 50), [])], True),
))
@assert_no_logs
def test_assert_bookmarks(html, expected_by_page, expected_tree, round):
document = FakeHTML(string=html).render()
if round:
_round_meta(document.pages)
assert [p.bookmarks for p in document.pages] == expected_by_page
assert document.make_bookmark_tree() == expected_tree
@assert_no_logs
@ -694,13 +694,20 @@ def test_links():
{'hello': (0, 200)},
{'lipsum': (0, 0)}
], [
[
('external', 'http://weasyprint.org', (0, 0, 30, 20)),
('external', 'http://weasyprint.org', (0, 0, 30, 30)),
('internal', (1, 0, 0), (10, 100, 32, 20)),
('internal', (1, 0, 0), (10, 100, 32, 32))
],
[('internal', (0, 0, 200), (0, 0, 200, 30))],
(
[
('external', 'http://weasyprint.org', (0, 0, 30, 20)),
('external', 'http://weasyprint.org', (0, 0, 30, 30)),
('internal', 'lipsum', (10, 100, 32, 20)),
('internal', 'lipsum', (10, 100, 32, 32))
],
[('hello', 0, 200)],
),
(
[
('internal', 'hello', (0, 0, 200, 30))
],
[('lipsum', 0, 0)]),
])
assert_links(
@ -709,8 +716,8 @@ def test_links():
<a href="../lipsum/é_%E9" style="display: block; margin: 10px 5px">
''', [[('external', 'http://weasyprint.org/foo/lipsum/%C3%A9_%E9',
(5, 10, 190, 0))]],
[{}], [[('external', 'http://weasyprint.org/foo/lipsum/%C3%A9_%E9',
(5, 10, 190, 0))]],
[{}], [([('external', 'http://weasyprint.org/foo/lipsum/%C3%A9_%E9',
(5, 10, 190, 0))], [])],
base_url='http://weasyprint.org/foo/bar/')
assert_links(
'''
@ -719,8 +726,8 @@ def test_links():
-weasy-link: url(../lipsum/é_%E9)">
''', [[('external', 'http://weasyprint.org/foo/lipsum/%C3%A9_%E9',
(5, 10, 190, 0))]],
[{}], [[('external', 'http://weasyprint.org/foo/lipsum/%C3%A9_%E9',
(5, 10, 190, 0))]],
[{}], [([('external', 'http://weasyprint.org/foo/lipsum/%C3%A9_%E9',
(5, 10, 190, 0))], [])],
base_url='http://weasyprint.org/foo/bar/')
# Relative URI reference without a base URI: allowed for links
@ -729,7 +736,7 @@ def test_links():
<body style="width: 200px">
<a href="../lipsum" style="display: block; margin: 10px 5px">
''', [[('external', '../lipsum', (5, 10, 190, 0))]], [{}],
[[('external', '../lipsum', (5, 10, 190, 0))]], base_url=None)
[([('external', '../lipsum', (5, 10, 190, 0))], [])], base_url=None)
# Relative URI reference without a base URI: not supported for -weasy-link
assert_links(
@ -737,7 +744,7 @@ def test_links():
<body style="width: 200px">
<div style="-weasy-link: url(../lipsum);
display: block; margin: 10px 5px">
''', [[]], [{}], [[]], base_url=None, warnings=[
''', [[]], [{}], [([], [])], base_url=None, warnings=[
'WARNING: Ignored `-weasy-link: url("../lipsum")` at 1:1, '
'Relative URI reference without a base URI'])
@ -751,8 +758,9 @@ def test_links():
''', [[('internal', 'lipsum', (5, 10, 190, 0)),
('external', 'http://weasyprint.org/', (0, 10, 200, 0))]],
[{'lipsum': (5, 10)}],
[[('internal', (0, 5, 10), (5, 10, 190, 0)),
('external', 'http://weasyprint.org/', (0, 10, 200, 0))]],
[([('internal', 'lipsum', (5, 10, 190, 0)),
('external', 'http://weasyprint.org/', (0, 10, 200, 0))],
[('lipsum', 5, 10)])],
base_url=None)
assert_links(
@ -763,7 +771,7 @@ def test_links():
''',
[[('internal', 'lipsum', (5, 10, 190, 0))]],
[{'lipsum': (5, 10)}],
[[('internal', (0, 5, 10), (5, 10, 190, 0))]],
[([('internal', 'lipsum', (5, 10, 190, 0))], [('lipsum', 5, 10)])],
base_url=None)
assert_links(
@ -776,7 +784,7 @@ def test_links():
[[('internal', 'lipsum', (0, 0, 200, 15)),
('internal', 'missing', (0, 15, 200, 15))]],
[{'lipsum': (0, 15)}],
[[('internal', (0, 0, 15), (0, 0, 200, 15))]],
[([('internal', 'lipsum', (0, 0, 200, 15))], [('lipsum', 0, 15)])],
base_url=None,
warnings=[
'ERROR: No anchor #missing for internal URI reference'])
@ -789,7 +797,7 @@ def test_links():
''',
[[('internal', 'lipsum', (30, 10, 40, 200))]],
[{'lipsum': (70, 10)}],
[[('internal', (0, 70, 10), (30, 10, 40, 200))]],
[([('internal', 'lipsum', (30, 10, 40, 200))], [('lipsum', 70, 10)])],
round=True)
@ -799,7 +807,8 @@ uses_relative.append('weasyprint-custom')
@assert_no_logs
def test_url_fetcher():
with open(resource_filename('pattern.png'), 'rb') as pattern_fd:
filename = resource_filename('pattern.png')
with open(filename, 'rb') as pattern_fd:
pattern_png = pattern_fd.read()
def fetcher(url):
@ -822,6 +831,8 @@ def test_url_fetcher():
check_png_pattern(html.write_png(stylesheets=[css]), blank=blank)
test('<body><img src="pattern.png">') # Test a "normal" URL
test('<body><img src="%s">' % Path(filename).as_uri())
test('<body><img src="%s?ignored">' % Path(filename).as_uri())
test('<body><img src="weasyprint-custom:foo/é_%e9_pattern">')
test('<body style="background: url(weasyprint-custom:foo/é_%e9_pattern)">')
test('<body><li style="list-style: inside '

View File

@ -4,7 +4,7 @@
Test that the "before layout" box tree is correctly constructed.
:copyright: Copyright 2011-2014 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
@ -1471,6 +1471,59 @@ def test_margin_box_string_set_6():
assert bottom_text_box.text == 'before!last-secondclass2|1/I'
def test_margin_box_string_set_7():
# Test regression: https://github.com/Kozea/WeasyPrint/issues/722
page_1, = render_pages('''
<style>
img { string-set: left attr(alt) }
img + img { string-set: right attr(alt) }
@page { @top-left { content: '[' string(left) ']' }
@top-right { content: '{' string(right) '}' } }
</style>
<img src=pattern.png alt="Chocolate">
<img src=no_such_file.png alt="Cake">
''')
html, top_left, top_right = page_1.children
left_line_box, = top_left.children
left_text_box, = left_line_box.children
assert left_text_box.text == '[Chocolate]'
right_line_box, = top_right.children
right_text_box, = right_line_box.children
assert right_text_box.text == '{Cake}'
def test_margin_box_string_set_8():
# Test regression: https://github.com/Kozea/WeasyPrint/issues/726
page_1, page_2, page_3 = render_pages('''
<style>
@page { @top-left { content: '[' string(left) ']' } }
p { page-break-before: always }
.initial { -weasy-string-set: left 'initial' }
.empty { -weasy-string-set: left '' }
.space { -weasy-string-set: left ' ' }
</style>
<p class="initial">Initial</p>
<p class="empty">Empty</p>
<p class="space">Space</p>
''')
html, top_left = page_1.children
left_line_box, = top_left.children
left_text_box, = left_line_box.children
assert left_text_box.text == '[initial]'
html, top_left = page_2.children
left_line_box, = top_left.children
left_text_box, = left_line_box.children
assert left_text_box.text == '[]'
html, top_left = page_3.children
left_line_box, = top_left.children
left_text_box, = left_line_box.children
assert left_text_box.text == '[ ]'
@assert_no_logs
def test_page_counters():
"""Test page-based counters."""

View File

@ -4,7 +4,7 @@
Test the CSS parsing, cascade, inherited and computed values.
:copyright: Copyright 2011-2014 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""

View File

@ -4,7 +4,7 @@
Test CSS descriptors.
:copyright: Copyright 2011-2016 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""

View File

@ -4,7 +4,7 @@
Test expanders for shorthand properties.
:copyright: Copyright 2011-2014 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
@ -15,7 +15,8 @@ import pytest
import tinycss2
from ..css import preprocess_declarations
from ..css.properties import INITIAL_VALUES
from ..css.computed_values import ZERO_PIXELS
from ..css.properties import INITIAL_VALUES, Dimension
from ..images import LinearGradient, RadialGradient
from .testing_utils import assert_no_logs, capture_logs
@ -841,3 +842,46 @@ def test_radial_gradient():
gradient('closest-side circle at right 5em, blue',
shape='circle', size=('keyword', 'closest-side'),
center=('left', (100, '%'), 'top', (5, 'em')))
@assert_no_logs
@pytest.mark.parametrize('rule, result', (
('flex: auto', {
'flex_grow': 1,
'flex_shrink': 1,
'flex_basis': 'auto',
}),
('flex: none', {
'flex_grow': 0,
'flex_shrink': 0,
'flex_basis': 'auto',
}),
('flex: 10', {
'flex_grow': 10,
'flex_shrink': 1,
'flex_basis': ZERO_PIXELS,
}),
('flex: 2 2', {
'flex_grow': 2,
'flex_shrink': 2,
'flex_basis': ZERO_PIXELS,
}),
('flex: 2 2 1px', {
'flex_grow': 2,
'flex_shrink': 2,
'flex_basis': Dimension(1, 'px'),
}),
('flex: 2 2 auto', {
'flex_grow': 2,
'flex_shrink': 2,
'flex_basis': 'auto',
}),
('flex: 2 auto', {
'flex_grow': 2,
'flex_shrink': 1,
'flex_basis': 'auto',
}),
))
def test_flex(rule, result):
"""Test the ``flex`` property."""
assert expand_to_dict(rule) == result

View File

@ -4,11 +4,13 @@
Tests for floating boxes layout.
:copyright: Copyright 2011-2014 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
import pytest
from ..formatting_structure import boxes
from .test_boxes import render_pages
from .testing_utils import assert_no_logs
@ -503,8 +505,239 @@ def test_preferred_widths_5():
' @font-face { src: url(AHEM____.TTF); font-family: ahem }'
' p { font: 20px ahem }'
'</style>'
'<p style="float: left">XX<br> XX<br>X</p>''')
'<p style="float: left">XX<br> XX<br>X</p>')
html, = page.children
body, = html.children
paragraph, = body.children
assert paragraph.width == 40
@assert_no_logs
def test_float_in_inline():
page, = render_pages('''
<style>
@font-face { src: url(AHEM____.TTF); font-family: ahem }
body {
font-family: ahem;
font-size: 20px;
}
p {
width: 14em;
text-align: justify;
}
span {
float: right;
}
</style>
<p>
aa bb <a><span>cc</span> ddd</a> ee ff
</p>
''')
html, = page.children
body, = html.children
paragraph, = body.children
line1, line2 = paragraph.children
p1, a, p2 = line1.children
assert p1.width == 6 * 20
assert p1.text == 'aa bb '
assert p1.position_x == 0 * 20
assert p2.width == 3 * 20
assert p2.text == ' ee'
assert p2.position_x == 9 * 20
span, a_text = a.children
assert a_text.width == 3 * 20 # leading space collapse
assert a_text.text == 'ddd'
assert a_text.position_x == 6 * 20
assert span.width == 2 * 20
assert span.children[0].children[0].text == 'cc'
assert span.position_x == 12 * 20
p3, = line2.children
assert p3.width == 2 * 20
@assert_no_logs
def test_float_next_line():
page, = render_pages('''
<style>
@font-face { src: url(AHEM____.TTF); font-family: ahem }
body {
font-family: ahem;
font-size: 20px;
}
p {
text-align: justify;
width: 13em;
}
span {
float: left;
}
</style>
<p>pp pp pp pp <a><span>ppppp</span> aa</a> pp pp pp pp pp</p>''')
html, = page.children
body, = html.children
paragraph, = body.children
line1, line2, line3 = paragraph.children
assert len(line1.children) == 1
assert len(line3.children) == 1
a, p = line2.children
span, a_text = a.children
assert span.position_x == 0
assert span.width == 5 * 20
assert a_text.position_x == a.position_x == 5 * 20
assert a_text.width == a.width == 2 * 20
assert p.position_x == 7 * 20
@assert_no_logs
def test_float_text_indent_1():
page, = render_pages('''
<style>
@font-face { src: url(AHEM____.TTF); font-family: ahem }
body {
font-family: ahem;
font-size: 20px;
}
p {
text-align: justify;
text-indent: 1em;
width: 14em;
}
span {
float: left;
}
</style>
<p><a>aa <span>float</span> aa</a></p>''')
html, = page.children
body, = html.children
paragraph, = body.children
line1, = paragraph.children
a, = line1.children
a1, span, a2 = a.children
span_text, = span.children
assert span.position_x == span_text.position_x == 0
assert span.width == span_text.width == (
(1 + 5) * 20) # text-indent + span text
assert a1.width == 3 * 20
assert a1.position_x == (1 + 5 + 1) * 20 # span + a1 text-indent
assert a2.width == 2 * 20 # leading space collapse
assert a2.position_x == (1 + 5 + 1 + 3) * 20 # span + a1 t-i + a1
@assert_no_logs
def test_float_text_indent_2():
page, = render_pages('''
<style>
@font-face { src: url(AHEM____.TTF); font-family: ahem }
body {
font-family: ahem;
font-size: 20px;
}
p {
text-align: justify;
text-indent: 1em;
width: 14em;
}
span {
float: left;
}
</style>
<p>
oooooooooooo
<a>aa <span>float</span> aa</a></p>''')
html, = page.children
body, = html.children
paragraph, = body.children
line1, line2 = paragraph.children
p1, = line1.children
assert p1.position_x == 1 * 20 # text-indent
assert p1.width == 12 * 20 # p text
a, = line2.children
a1, span, a2 = a.children
span_text, = span.children
assert span.position_x == span_text.position_x == 0
assert span.width == span_text.width == (
(1 + 5) * 20) # text-indent + span text
assert a1.width == 3 * 20
assert a1.position_x == (1 + 5) * 20 # span
assert a2.width == 2 * 20 # leading space collapse
assert a2.position_x == (1 + 5 + 3) * 20 # span + a1
@assert_no_logs
def test_float_text_indent_3():
page, = render_pages('''
<style>
@font-face { src: url(AHEM____.TTF); font-family: ahem }
body {
font-family: ahem;
font-size: 20px;
}
p {
text-align: justify;
text-indent: 1em;
width: 14em;
}
span {
float: right;
}
</style>
<p>
oooooooooooo
<a>aa <span>float</span> aa</a>
oooooooooooo
</p>''')
html, = page.children
body, = html.children
paragraph, = body.children
line1, line2, line3 = paragraph.children
p1, = line1.children
assert p1.position_x == 1 * 20 # text-indent
assert p1.width == 12 * 20 # p text
a, = line2.children
a1, span, a2 = a.children
span_text, = span.children
assert span.position_x == span_text.position_x == (14 - 5 - 1) * 20
assert span.width == span_text.width == (
(1 + 5) * 20) # text-indent + span text
assert a1.position_x == 0 # span
assert a2.width == 2 * 20 # leading space collapse
assert a2.position_x == (14 - 5 - 1 - 2) * 20
p2, = line3.children
assert p2.position_x == 0
assert p2.width == 12 * 20 # p text
@pytest.mark.xfail
@assert_no_logs
def test_float_fail():
page, = render_pages('''
<style>
@font-face { src: url(AHEM____.TTF); font-family: ahem }
body {
font-family: ahem;
font-size: 20px;
}
p {
text-align: justify;
width: 12em;
}
span {
float: left;
background: red;
}
a {
background: yellow;
}
</style>
<p>bb bb pp bb pp pb <a><span>pp pp</span> apa</a> bb bb</p>''')
html, = page.children
body, = html.children
paragraph, = body.children
line1, line2, line3 = paragraph.children

View File

@ -4,7 +4,7 @@
Test the fonts features.
:copyright: Copyright 2011-2016 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""

View File

@ -4,7 +4,7 @@
Tests for multicolumn layout.
:copyright: Copyright 2011-2017 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""

View File

@ -0,0 +1,355 @@
"""
weasyprint.tests.layout.flex
----------------------------
Tests for flex layout.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
import pytest
from ..test_boxes import render_pages
from ..testing_utils import assert_no_logs
@assert_no_logs
def test_flex_direction_row():
page, = render_pages('''
<article style="display: flex">
<div>A</div>
<div>B</div>
<div>C</div>
</article>
''')
html, = page.children
body, = html.children
article, = body.children
div_1, div_2, div_3 = article.children
assert div_1.children[0].children[0].text == 'A'
assert div_2.children[0].children[0].text == 'B'
assert div_3.children[0].children[0].text == 'C'
assert (
div_1.position_y ==
div_2.position_y ==
div_3.position_y ==
article.position_y)
assert div_1.position_x == article.position_x
assert div_1.position_x < div_2.position_x < div_3.position_x
@assert_no_logs
def test_flex_direction_row_reverse():
page, = render_pages('''
<article style="display: flex; flex-direction: row-reverse">
<div>A</div>
<div>B</div>
<div>C</div>
</article>
''')
html, = page.children
body, = html.children
article, = body.children
div_1, div_2, div_3 = article.children
assert div_1.children[0].children[0].text == 'C'
assert div_2.children[0].children[0].text == 'B'
assert div_3.children[0].children[0].text == 'A'
assert (
div_1.position_y ==
div_2.position_y ==
div_3.position_y ==
article.position_y)
assert (
div_3.position_x + div_3.width ==
article.position_x + article.width)
assert div_1.position_x < div_2.position_x < div_3.position_x
@assert_no_logs
def test_flex_direction_column():
page, = render_pages('''
<article style="display: flex; flex-direction: column">
<div>A</div>
<div>B</div>
<div>C</div>
</article>
''')
html, = page.children
body, = html.children
article, = body.children
div_1, div_2, div_3 = article.children
assert div_1.children[0].children[0].text == 'A'
assert div_2.children[0].children[0].text == 'B'
assert div_3.children[0].children[0].text == 'C'
assert (
div_1.position_x ==
div_2.position_x ==
div_3.position_x ==
article.position_x)
assert div_1.position_y == article.position_y
assert div_1.position_y < div_2.position_y < div_3.position_y
@assert_no_logs
def test_flex_direction_column_reverse():
page, = render_pages('''
<article style="display: flex; flex-direction: column-reverse">
<div>A</div>
<div>B</div>
<div>C</div>
</article>
''')
html, = page.children
body, = html.children
article, = body.children
div_1, div_2, div_3 = article.children
assert div_1.children[0].children[0].text == 'C'
assert div_2.children[0].children[0].text == 'B'
assert div_3.children[0].children[0].text == 'A'
assert (
div_1.position_x ==
div_2.position_x ==
div_3.position_x ==
article.position_x)
assert (
div_3.position_y + div_3.height ==
article.position_y + article.height)
assert div_1.position_y < div_2.position_y < div_3.position_y
@assert_no_logs
def test_flex_row_wrap():
page, = render_pages('''
<article style="display: flex; flex-flow: wrap; width: 50px">
<div style="width: 20px">A</div>
<div style="width: 20px">B</div>
<div style="width: 20px">C</div>
</article>
''')
html, = page.children
body, = html.children
article, = body.children
div_1, div_2, div_3 = article.children
assert div_1.children[0].children[0].text == 'A'
assert div_2.children[0].children[0].text == 'B'
assert div_3.children[0].children[0].text == 'C'
assert div_1.position_y == div_2.position_y == article.position_y
assert div_3.position_y == article.position_y + div_2.height
assert div_1.position_x == div_3.position_x == article.position_x
assert div_1.position_x < div_2.position_x
@assert_no_logs
def test_flex_column_wrap():
page, = render_pages('''
<article style="display: flex; flex-flow: column wrap; height: 50px">
<div style="height: 20px">A</div>
<div style="height: 20px">B</div>
<div style="height: 20px">C</div>
</article>
''')
html, = page.children
body, = html.children
article, = body.children
div_1, div_2, div_3 = article.children
assert div_1.children[0].children[0].text == 'A'
assert div_2.children[0].children[0].text == 'B'
assert div_3.children[0].children[0].text == 'C'
assert div_1.position_x == div_2.position_x == article.position_x
assert div_3.position_x == article.position_x + div_2.width
assert div_1.position_y == div_3.position_y == article.position_y
assert div_1.position_y < div_2.position_y
@assert_no_logs
def test_flex_row_wrap_reverse():
page, = render_pages('''
<article style="display: flex; flex-flow: wrap-reverse; width: 50px">
<div style="width: 20px">A</div>
<div style="width: 20px">B</div>
<div style="width: 20px">C</div>
</article>
''')
html, = page.children
body, = html.children
article, = body.children
div_1, div_2, div_3 = article.children
assert div_1.children[0].children[0].text == 'C'
assert div_2.children[0].children[0].text == 'A'
assert div_3.children[0].children[0].text == 'B'
assert div_1.position_y == article.position_y
assert (
div_2.position_y ==
div_3.position_y ==
article.position_y + div_2.height)
assert div_1.position_x == div_2.position_x == article.position_x
assert div_2.position_x < div_3.position_x
@assert_no_logs
def test_flex_column_wrap_reverse():
page, = render_pages('''
<article style="display: flex; flex-flow: column wrap-reverse;
height: 50px">
<div style="height: 20px">A</div>
<div style="height: 20px">B</div>
<div style="height: 20px">C</div>
</article>
''')
html, = page.children
body, = html.children
article, = body.children
div_1, div_2, div_3 = article.children
assert div_1.children[0].children[0].text == 'C'
assert div_2.children[0].children[0].text == 'A'
assert div_3.children[0].children[0].text == 'B'
assert div_1.position_x == article.position_x
assert (
div_2.position_x ==
div_3.position_x ==
article.position_x + div_2.width)
assert div_1.position_y == div_2.position_y == article.position_y
assert div_2.position_y < div_3.position_y
@assert_no_logs
def test_flex_direction_column_fixed_height_container():
page, = render_pages('''
<section style="height: 10px">
<article style="display: flex; flex-direction: column">
<div>A</div>
<div>B</div>
<div>C</div>
</article>
</section>
''')
html, = page.children
body, = html.children
section, = body.children
article, = section.children
div_1, div_2, div_3 = article.children
assert div_1.children[0].children[0].text == 'A'
assert div_2.children[0].children[0].text == 'B'
assert div_3.children[0].children[0].text == 'C'
assert (
div_1.position_x ==
div_2.position_x ==
div_3.position_x ==
article.position_x)
assert div_1.position_y == article.position_y
assert div_1.position_y < div_2.position_y < div_3.position_y
assert section.height == 10
assert article.height > 10
@pytest.mark.xfail
@assert_no_logs
def test_flex_direction_column_fixed_height():
page, = render_pages('''
<article style="display: flex; flex-direction: column; height: 10px">
<div>A</div>
<div>B</div>
<div>C</div>
</article>
''')
html, = page.children
body, = html.children
article, = body.children
div_1, div_2, div_3 = article.children
assert div_1.children[0].children[0].text == 'A'
assert div_2.children[0].children[0].text == 'B'
assert div_3.children[0].children[0].text == 'C'
assert (
div_1.position_x ==
div_2.position_x ==
div_3.position_x ==
article.position_x)
assert div_1.position_y == article.position_y
assert div_1.position_y < div_2.position_y < div_3.position_y
assert article.height == 10
assert div_3.position_y > 10
@assert_no_logs
def test_flex_direction_column_fixed_height_wrap():
page, = render_pages('''
<article style="display: flex; flex-direction: column; height: 10px;
flex-wrap: wrap">
<div>A</div>
<div>B</div>
<div>C</div>
</article>
''')
html, = page.children
body, = html.children
article, = body.children
div_1, div_2, div_3 = article.children
assert div_1.children[0].children[0].text == 'A'
assert div_2.children[0].children[0].text == 'B'
assert div_3.children[0].children[0].text == 'C'
assert (
div_1.position_x !=
div_2.position_x !=
div_3.position_x)
assert div_1.position_y == article.position_y
assert (
div_1.position_y ==
div_2.position_y ==
div_3.position_y ==
article.position_y)
assert article.height == 10
@assert_no_logs
def test_flex_item_min_width():
page, = render_pages('''
<article style="display: flex">
<div style="min-width: 30px">A</div>
<div style="min-width: 50px">B</div>
<div style="min-width: 5px">C</div>
</article>
''')
html, = page.children
body, = html.children
article, = body.children
div_1, div_2, div_3 = article.children
assert div_1.children[0].children[0].text == 'A'
assert div_2.children[0].children[0].text == 'B'
assert div_3.children[0].children[0].text == 'C'
assert div_1.position_x == 0
assert div_1.width == 30
assert div_2.position_x == 30
assert div_2.width == 50
assert div_3.position_x == 80
assert div_3.width > 5
assert (
div_1.position_y ==
div_2.position_y ==
div_3.position_y ==
article.position_y)
@assert_no_logs
def test_flex_item_min_height():
page, = render_pages('''
<article style="display: flex">
<div style="min-height: 30px">A</div>
<div style="min-height: 50px">B</div>
<div style="min-height: 5px">C</div>
</article>
''')
html, = page.children
body, = html.children
article, = body.children
div_1, div_2, div_3 = article.children
assert div_1.children[0].children[0].text == 'A'
assert div_2.children[0].children[0].text == 'B'
assert div_3.children[0].children[0].text == 'C'
assert (
div_1.height ==
div_2.height ==
div_3.height ==
article.height ==
50)

View File

@ -78,6 +78,66 @@ def test_breaking_linebox():
assert child.style['font_size'] == 13
@assert_no_logs
def test_position_x_ltr():
page, = parse('''
<style>
span {
padding: 0 10px 0 15px;
margin: 0 2px 0 3px;
border: 1px solid;
}
</style>
<body><span>a<br>b<br>c</span>''')
html, = page.children
body, = html.children
line1, line2, line3 = body.children
span1, = line1.children
assert span1.position_x == 0
text1, br1 = span1.children
assert text1.position_x == 15 + 3 + 1
span2, = line2.children
assert span2.position_x == 0
text2, br2 = span2.children
assert text2.position_x == 0
span3, = line3.children
assert span3.position_x == 0
text3, = span3.children
assert text3.position_x == 0
@assert_no_logs
def test_position_x_rtl():
page, = parse('''
<style>
body {
direction: rtl;
width: 100px;
}
span {
padding: 0 10px 0 15px;
margin: 0 2px 0 3px;
border: 1px solid;
}
</style>
<body><span>a<br>b<br>c</span>''')
html, = page.children
body, = html.children
line1, line2, line3 = body.children
span1, = line1.children
text1, br1 = span1.children
assert span1.position_x == 100 - text1.width - (10 + 2 + 1)
assert text1.position_x == 100 - text1.width - (10 + 2 + 1)
span2, = line2.children
text2, br2 = span2.children
assert span2.position_x == 100 - text2.width
assert text2.position_x == 100 - text2.width
span3, = line3.children
text3, = span3.children
assert span3.position_x == 100 - text3.width - (15 + 3 + 1)
assert text3.position_x == 100 - text3.width
@assert_no_logs
def test_breaking_linebox_regression_1():
# See http://unicode.org/reports/tr14/

View File

@ -1438,6 +1438,78 @@ def test_layout_table_auto_47():
assert td.width == 48 # 3 * font-size
@assert_no_logs
def test_layout_table_auto_48():
# Related to:
# https://github.com/Kozea/WeasyPrint/issues/685
page, = render_pages('''
<style>@font-face { src: url(AHEM____.TTF); font-family: ahem }</style>
<table style="font-family: ahem; border-spacing: 100px;
border-collapse: collapse">
<tr>
<td colspan=5>aaa</td>
</tr>
</table>
''')
html, = page.children
body, = html.children
table_wrapper, = body.children
table, = table_wrapper.children
row_group, = table.children
row, = row_group.children
td, = row.children
assert td.width == 48 # 3 * font-size
@pytest.mark.xfail
@assert_no_logs
def test_layout_table_auto_49():
# Related to:
# https://github.com/Kozea/WeasyPrint/issues/685
# See TODO in table_layout.group_layout
page, = render_pages('''
<style>@font-face { src: url(AHEM____.TTF); font-family: ahem }</style>
<table style="font-family: ahem; border-spacing: 100px">
<tr>
<td colspan=5>aaa</td>
</tr>
</table>
''')
html, = page.children
body, = html.children
table_wrapper, = body.children
table, = table_wrapper.children
row_group, = table.children
row, = row_group.children
td, = row.children
assert td.width == 48 # 3 * font-size
@assert_no_logs
def test_layout_table_auto_50():
# Test regression:
# https://github.com/Kozea/WeasyPrint/issues/685
page, = render_pages('''
<style>@font-face { src: url(AHEM____.TTF); font-family: ahem }</style>
<table style="font-family: ahem; border-spacing: 5px">
<tr><td>a</td><td>a</td><td>a</td><td>a</td><td>a</td></tr>
<tr>
<td colspan='5'>aaa aaa aaa aaa</td>
</tr>
</table>
''')
html, = page.children
body, = html.children
table_wrapper, = body.children
table, = table_wrapper.children
row_group, = table.children
row_1, row_2 = row_group.children
for td in row_1.children:
assert td.width == 44 # (15 * font_size - 4 * sp) / 5
td_21, = row_2.children
assert td_21.width == 240 # 15 * font_size
@assert_no_logs
def test_table_column_width_1():
source = '''

View File

@ -4,7 +4,7 @@
Test PDF-related code, including metadata, bookmarks and hyperlinks.
:copyright: Copyright 2011-2014 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
@ -12,16 +12,15 @@
import hashlib
import io
import os
import zlib
import re
import cairocffi
import pytest
from pdfrw import PdfReader
from .. import Attachment
from .. import Attachment, pdf
from ..urls import path2url
from .testing_utils import (
FakeHTML, assert_no_logs, capture_logs, resource_filename)
FakeHTML, assert_no_logs, capture_logs, requires, resource_filename)
# Top of the page is 297mm ~= 842pt
TOP = 842
@ -29,6 +28,19 @@ TOP = 842
RIGHT = 595
def assert_rect_almost_equal(rect, values):
"""Test that PDF rect string equals given values.
We avoid rounding errors by allowing a delta of 1, as both WeasyPrint and
Cairo round coordinates in unpredictable ways.
"""
if isinstance(rect, bytes):
rect = rect.decode('ascii')
for a, b in zip(rect.strip(' []').split(), values):
assert abs(int(a) - b) <= 1
@assert_no_logs
@pytest.mark.parametrize('width, height', (
(100, 100),
@ -42,9 +54,9 @@ def test_pdf_parser(width, height):
surface.show_page()
surface.finish()
fileobj.seek(0)
surface, = [page.MediaBox for page in PdfReader(fileobj).Root.Pages.Kids]
assert surface == ['0', '0', str(width), str(height)]
sizes = [page.get_value('MediaBox', '\\[(.+?)\\]').strip()
for page in pdf.PDFFile(fileobj).pages]
assert sizes == ['0 0 {} {}'.format(width, height).encode('ascii')]
@assert_no_logs
@ -52,52 +64,69 @@ def test_pdf_parser(width, height):
def test_page_size_zoom(zoom):
pdf_bytes = FakeHTML(
string='<style>@page{size:3in 4in').write_pdf(zoom=zoom)
pdf = PdfReader(fdata=pdf_bytes)
assert pdf.Root.Pages.Kids[0].MediaBox == [
'0', '0', str(int(216 * zoom)), str(int(288 * zoom))]
assert '/MediaBox [ 0 0 {} {} ]'.format(
int(216 * zoom), int(288 * zoom)).encode('ascii') in pdf_bytes
@assert_no_logs
@requires('cairo', (1, 15, 4))
def test_bookmarks_1():
pdf_bytes = FakeHTML(string='''
fileobj = io.BytesIO()
FakeHTML(string='''
<h1>a</h1> #
<h4>b</h4> ####
<h3>c</h3> ###
<h2>d</h2> ##
<h1>e</h1> #
''').write_pdf()
outlines = PdfReader(fdata=pdf_bytes).Root.Outlines
''').write_pdf(target=fileobj)
# a
# |_ b
# |_ c
# L_ d
# e
assert outlines.Count == '5'
assert outlines.First.Title == '(a)'
assert outlines.First.First.Title == '(b)'
assert outlines.First.First.Next.Title == '(c)'
assert outlines.First.First.Next.Next.Title == '(d)'
assert outlines.First.Last.Title == '(d)'
assert outlines.First.Next.Title == '(e)'
assert outlines.Last.Title == '(e)'
pdf_file = pdf.PDFFile(fileobj)
outlines = pdf_file.catalog.get_indirect_dict('Outlines', pdf_file)
assert outlines.get_type() == 'Outlines'
assert outlines.get_value('Count', '(.*)') == b'-2'
o1 = outlines.get_indirect_dict('First', pdf_file)
assert o1.get_value('Title', '(.*)') == b'(a)'
o11 = o1.get_indirect_dict('First', pdf_file)
assert o11.get_value('Title', '(.*)') == b'(b)'
o12 = o11.get_indirect_dict('Next', pdf_file)
assert o12.get_value('Title', '(.*)') == b'(c)'
o12 = o12.get_indirect_dict('Next', pdf_file)
assert o12.get_value('Title', '(.*)') == b'(d)'
o2 = o1.get_indirect_dict('Next', pdf_file)
assert o2.get_value('Title', '(.*)') == b'(e)'
@assert_no_logs
def test_bookmarks_2():
pdf_bytes = FakeHTML(string='<body>').write_pdf()
assert PdfReader(fdata=pdf_bytes).Root.Outlines is None
fileobj = io.BytesIO()
FakeHTML(string='<body>').write_pdf(target=fileobj)
pdf_file = pdf.PDFFile(fileobj)
with pytest.raises(AttributeError):
pdf_file.catalog.get_indirect_dict('Outlines', pdf_file)
@assert_no_logs
@requires('cairo', (1, 15, 4))
def test_bookmarks_3():
pdf_bytes = FakeHTML(string='<h1>a nbsp…</h1>').write_pdf()
outlines = PdfReader(fdata=pdf_bytes).Root.Outlines
assert outlines.First.Title.decode() == 'a nbsp…'
fileobj = io.BytesIO()
FakeHTML(string='<h1>a nbsp…</h1>').write_pdf(target=fileobj)
pdf_file = pdf.PDFFile(fileobj)
outlines = pdf_file.catalog.get_indirect_dict('Outlines', pdf_file)
o1 = outlines.get_indirect_dict('First', pdf_file)
# <FEFF006100A0006E0062007300702026> is the PDF representation of a nbsp…
assert (
o1.get_value('Title', '(.*)') == b'<FEFF006100A0006E0062007300702026>')
@assert_no_logs
@requires('cairo', (1, 15, 4))
def test_bookmarks_4():
pdf_bytes = FakeHTML(string='''
fileobj = io.BytesIO()
FakeHTML(string='''
<style>
* { height: 90pt; margin: 0 0 10pt 0 }
</style>
@ -113,8 +142,7 @@ def test_bookmarks_4():
<h3>Title 9</h3>
<h1>Title 10</h1>
<h2>Title 11</h2>
''').write_pdf()
outlines = PdfReader(fdata=pdf_bytes).Root.Outlines
''').write_pdf(target=fileobj)
# 1
# 2
# |_ 3
@ -126,57 +154,76 @@ def test_bookmarks_4():
# L_ 9
# 10
# L_ 11
assert outlines.Count == '11'
assert outlines.First.Title == '(Title 1)'
assert outlines.First.Next.Title == '(Title 2)'
assert outlines.First.Next.Count == '5'
assert outlines.First.Next.First.Title == '(Title 3)'
assert outlines.First.Next.First.Parent.Title == '(Title 2)'
assert outlines.First.Next.First.Next.Title == '(Title 4)'
assert outlines.First.Next.First.Next.Count == '2'
assert outlines.First.Next.First.Next.First.Title == '(Title 5)'
assert outlines.First.Next.First.Next.Last.Title == '(Title 5)'
assert outlines.First.Next.First.Next.Next.Title == '(Title 6)'
assert outlines.First.Next.Last.Title == '(Title 6)'
assert outlines.First.Next.Next.Title == '(Title 7)'
assert outlines.First.Next.Next.Count == '3'
assert outlines.First.Next.Next.First.Title == '(Title 8)'
assert outlines.First.Next.Next.Last.Title == '(Title 8)'
assert outlines.First.Next.Next.Last.Count == '2'
assert outlines.First.Next.Next.First.First.Title == '(Title 9)'
assert outlines.First.Next.Next.First.Last.Title == '(Title 9)'
assert outlines.First.Next.Next.Next.Title == '(Title 10)'
assert outlines.Last.Title == '(Title 10)'
assert outlines.Last.First.Title == '(Title 11)'
assert outlines.Last.Last.Title == '(Title 11)'
pdf_file = pdf.PDFFile(fileobj)
outlines = pdf_file.catalog.get_indirect_dict('Outlines', pdf_file)
assert outlines.get_type() == 'Outlines'
assert outlines.get_value('Count', '(.*)') == b'-4'
o1 = outlines.get_indirect_dict('First', pdf_file)
assert o1.get_value('Title', '(.*)') == b'(Title 1)'
o2 = o1.get_indirect_dict('Next', pdf_file)
assert o2.get_value('Title', '(.*)') == b'(Title 2)'
assert o2.get_value('Count', '(.*)') == b'-3'
o3 = o2.get_indirect_dict('First', pdf_file)
assert o3.get_value('Title', '(.*)') == b'(Title 3)'
o4 = o3.get_indirect_dict('Next', pdf_file)
assert o4.get_value('Title', '(.*)') == b'(Title 4)'
assert o4.get_value('Count', '(.*)') == b'-1'
o5 = o4.get_indirect_dict('First', pdf_file)
assert o5.get_value('Title', '(.*)') == b'(Title 5)'
o6 = o4.get_indirect_dict('Next', pdf_file)
assert o6.get_value('Title', '(.*)') == b'(Title 6)'
o7 = o2.get_indirect_dict('Next', pdf_file)
assert o7.get_value('Title', '(.*)') == b'(Title 7)'
assert o7.get_value('Count', '(.*)') == b'-1'
o8 = o7.get_indirect_dict('First', pdf_file)
assert o8.get_value('Title', '(.*)') == b'(Title 8)'
assert o8.get_value('Count', '(.*)') == b'-1'
o9 = o8.get_indirect_dict('First', pdf_file)
assert o9.get_value('Title', '(.*)') == b'(Title 9)'
o10 = o7.get_indirect_dict('Next', pdf_file)
assert o10.get_value('Title', '(.*)') == b'(Title 10)'
assert o10.get_value('Count', '(.*)') == b'-1'
o11 = o10.get_indirect_dict('First', pdf_file)
assert o11.get_value('Title', '(.*)') == b'(Title 11)'
@assert_no_logs
@requires('cairo', (1, 15, 4))
def test_bookmarks_5():
pdf_bytes = FakeHTML(string='''
fileobj = io.BytesIO()
FakeHTML(string='''
<h2>1</h2> level 1
<h4>2</h4> level 2
<h2>3</h2> level 1
<h3>4</h3> level 2
<h4>5</h4> level 3
''').write_pdf()
outlines = PdfReader(fdata=pdf_bytes).Root.Outlines
''').write_pdf(target=fileobj)
# 1
# L_ 2
# 3
# L_ 4
# L_ 5
assert outlines.Count == '5'
assert outlines.First.Title == '(1)'
assert outlines.First.First.Title == '(2)'
assert outlines.Last.Title == '(3)'
assert outlines.Last.First.Title == '(4)'
assert outlines.Last.First.First.Title == '(5)'
pdf_file = pdf.PDFFile(fileobj)
outlines = pdf_file.catalog.get_indirect_dict('Outlines', pdf_file)
assert outlines.get_type() == 'Outlines'
assert outlines.get_value('Count', '(.*)') == b'-2'
o1 = outlines.get_indirect_dict('First', pdf_file)
assert o1.get_value('Title', '(.*)') == b'(1)'
o2 = o1.get_indirect_dict('First', pdf_file)
assert o2.get_value('Title', '(.*)') == b'(2)'
o3 = o1.get_indirect_dict('Next', pdf_file)
assert o3.get_value('Title', '(.*)') == b'(3)'
o4 = o3.get_indirect_dict('First', pdf_file)
assert o4.get_value('Title', '(.*)') == b'(4)'
o5 = o4.get_indirect_dict('First', pdf_file)
assert o5.get_value('Title', '(.*)') == b'(5)'
@assert_no_logs
@requires('cairo', (1, 15, 4))
def test_bookmarks_6():
pdf_bytes = FakeHTML(string='''
fileobj = io.BytesIO()
FakeHTML(string='''
<h2>1</h2> h2 level 1
<h4>2</h4> h4 level 2
<h3>3</h3> h3 level 2
@ -186,7 +233,7 @@ def test_bookmarks_6():
<h2>7</h2> h2 level 2
<h4>8</h4> h4 level 3
<h1>9</h1> h1 level 1
''').write_pdf()
''').write_pdf(target=fileobj)
# 1
# |_ 2
# L_ 3
@ -196,39 +243,70 @@ def test_bookmarks_6():
# L_ 7
# L_ 8
# 9
outlines = PdfReader(fdata=pdf_bytes).Root.Outlines
assert outlines.Count == '9'
assert outlines.First.Title == '(1)'
assert outlines.First.First.Title == '(2)'
assert outlines.First.First.Next.Title == '(3)'
assert outlines.First.First.Next.First.Title == '(4)'
assert outlines.First.Next.Title == '(5)'
assert outlines.First.Next.First.Title == '(6)'
assert outlines.First.Next.First.Next.Title == '(7)'
assert outlines.First.Next.First.Next.First.Title == '(8)'
assert outlines.Last.Title == '(9)'
pdf_file = pdf.PDFFile(fileobj)
outlines = pdf_file.catalog.get_indirect_dict('Outlines', pdf_file)
assert outlines.get_type() == 'Outlines'
assert outlines.get_value('Count', '(.*)') == b'-3'
o1 = outlines.get_indirect_dict('First', pdf_file)
assert o1.get_value('Title', '(.*)') == b'(1)'
o2 = o1.get_indirect_dict('First', pdf_file)
assert o2.get_value('Title', '(.*)') == b'(2)'
o3 = o2.get_indirect_dict('Next', pdf_file)
assert o3.get_value('Title', '(.*)') == b'(3)'
o4 = o3.get_indirect_dict('First', pdf_file)
assert o4.get_value('Title', '(.*)') == b'(4)'
o5 = o1.get_indirect_dict('Next', pdf_file)
assert o5.get_value('Title', '(.*)') == b'(5)'
o6 = o5.get_indirect_dict('First', pdf_file)
assert o6.get_value('Title', '(.*)') == b'(6)'
o7 = o6.get_indirect_dict('Next', pdf_file)
assert o7.get_value('Title', '(.*)') == b'(7)'
o8 = o7.get_indirect_dict('First', pdf_file)
assert o8.get_value('Title', '(.*)') == b'(8)'
o9 = o5.get_indirect_dict('Next', pdf_file)
assert o9.get_value('Title', '(.*)') == b'(9)'
@assert_no_logs
@requires('cairo', (1, 15, 4))
def test_bookmarks_7():
# Reference for the next test. zoom=1
pdf_bytes = FakeHTML(string='<h2>a</h2>').write_pdf()
outlines = PdfReader(fdata=pdf_bytes).Root.Outlines
assert outlines.First.Title == '(a)'
y = float(outlines.First.A.D[3])
fileobj = io.BytesIO()
FakeHTML(string='<h2>a</h2>').write_pdf(target=fileobj)
pdf_file = pdf.PDFFile(fileobj)
outlines = pdf_file.catalog.get_indirect_dict('Outlines', pdf_file)
assert outlines.get_type() == 'Outlines'
o1 = outlines.get_indirect_dict('First', pdf_file)
assert o1.get_value('Title', '(.*)') == b'(a)'
y = float(o1.get_value('Dest', '\\[(.+?)\\]').strip().split()[-2])
pdf_bytes = FakeHTML(string='<h2>a</h2>').write_pdf(zoom=1.5)
outlines = PdfReader(fdata=pdf_bytes).Root.Outlines
assert outlines.First.Title == '(a)'
assert round(float(outlines.First.A.D[3])) == round(y * 1.5)
fileobj = io.BytesIO()
FakeHTML(string='<h2>a</h2>').write_pdf(zoom=1.5, target=fileobj)
pdf_file = pdf.PDFFile(fileobj)
pdf_file = pdf.PDFFile(fileobj)
outlines = pdf_file.catalog.get_indirect_dict('Outlines', pdf_file)
assert outlines.get_type() == 'Outlines'
o1 = outlines.get_indirect_dict('First', pdf_file)
assert o1.get_value('Title', '(.*)') == b'(a)'
assert (
float(o1.get_value('Dest', '\\[(.+?)\\]').strip().split()[-2]) ==
round(y * 1.5))
@assert_no_logs
def test_links():
pdf_bytes = FakeHTML(string='<body>').write_pdf()
assert PdfReader(fdata=pdf_bytes).Root.Pages.Kids[0].Annots is None
def test_links_none():
fileobj = io.BytesIO()
FakeHTML(string='<body>').write_pdf(target=fileobj)
pdf_file = pdf.PDFFile(fileobj)
with pytest.raises(AttributeError):
pdf_file.pages[0].get_indirect_dict_array('Annots', pdf_file)
pdf_bytes = FakeHTML(string='''
@assert_no_logs
@requires('cairo', (1, 15, 4))
def test_links():
fileobj = io.BytesIO()
FakeHTML(string='''
<style>
body { margin: 0; font-size: 10pt; line-height: 2 }
p { display: block; height: 90pt; margin: 0 0 10pt 0 }
@ -243,126 +321,186 @@ def test_links():
<a style="display: block; page-break-before: always; height: 30pt"
href="#hel%6Co"></a>
</p>
''', base_url=resource_filename('<inline HTML>')).write_pdf()
''', base_url=resource_filename('<inline HTML>')).write_pdf(target=fileobj)
pdf_file = pdf.PDFFile(fileobj)
links = [
annot for page in PdfReader(fdata=pdf_bytes).Root.Pages.Kids
for annot in page.Annots]
annot for page in pdf_file.pages
for annot in page.get_indirect_dict_array('Annots', pdf_file)]
# 30pt wide (like the image), 20pt high (like line-height)
assert links[0].A == {
'/URI': '(http://weasyprint.org)', '/S': '/URI', '/Type': '/Action'}
assert [round(float(value)) for value in links[0].Rect] == [
0, TOP, 30, TOP - 20]
assert links[0].get_value('URI', '(.*)') == b'(http://weasyprint.org)'
assert links[0].get_value('S', '(.*)') == b'/URI'
assert_rect_almost_equal(
links[0].get_value('Rect', '(.*)'), (0, TOP - 20, 30, TOP))
# The image itself: 30*30pt
assert links[1].A == {
'/URI': '(http://weasyprint.org)', '/S': '/URI', '/Type': '/Action'}
assert [round(float(value)) for value in links[1].Rect] == [
0, TOP, 30, TOP - 30]
assert links[1].get_value('URI', '(.*)') == b'(http://weasyprint.org)'
assert links[1].get_value('S', '(.*)') == b'/URI'
assert_rect_almost_equal(
links[1].get_value('Rect', '(.*)'), (0, TOP - 30, 30, TOP))
# 32pt wide (image + 2 * 1pt of border), 20pt high
assert links[2].A.S == '/GoTo'
assert links[2].A.Type == '/Action'
assert links[2].A.D[1] == '/XYZ'
assert round(float(links[2].A.D[3])) == TOP
assert [round(float(value)) for value in links[2].Rect] == [
10, TOP - 100, 10 + 32, TOP - 100 - 20]
# TODO: replace these commented tests now that we use named destinations
# assert links[2].get_value('Subtype', '(.*)') == b'/Link'
# dest = links[2].get_value('Dest', '(.*)').strip(b'[]').split()
# assert dest[-4] == b'/XYZ'
# assert [round(float(value)) for value in dest[-3:]] == […]
assert_rect_almost_equal(
links[2].get_value('Rect', '(.*)'),
(10, TOP - 100 - 20, 10 + 32, TOP - 100))
# The image itself: 32*32pt
assert links[3].A.S == '/GoTo'
assert links[3].A.Type == '/Action'
assert links[3].A.D[1] == '/XYZ'
assert round(float(links[3].A.D[3])) == TOP
assert [round(float(value)) for value in links[3].Rect] == [
10, TOP - 100, 10 + 32, TOP - 100 - 32]
# TODO: same as above
# assert links[3].get_value('Subtype', '(.*)') == b'/Link'
# dest = links[3].get_value('Dest', '(.*)').strip(b'[]').split()
# assert dest[-4] == b'/XYZ'
# assert [round(float(value)) for value in dest[-3:]] == […]
assert_rect_almost_equal(
links[3].get_value('Rect', '(.*)'),
(10, TOP - 100 - 32, 10 + 32, TOP - 100))
# 100% wide (block), 30pt high
assert links[4].A.S == '/GoTo'
assert links[4].A.Type == '/Action'
assert links[4].A.D[1] == '/XYZ'
assert round(float(links[4].A.D[3])) == TOP - 200
assert [round(float(value)) for value in links[4].Rect] == [
0, TOP, RIGHT, TOP - 30]
assert links[4].get_value('Subtype', '(.*)') == b'/Link'
dest = links[4].get_value('Dest', '(.*)').strip(b'[]').split()
assert dest == [b'(hello)']
names = (
pdf_file.catalog
.get_indirect_dict('Names', pdf_file)
.get_indirect_dict('Dests', pdf_file)
.byte_string).decode('ascii')
assert_rect_almost_equal(
re.search(
'\\(hello\\) \\[\\d+ \\d+ R /XYZ (\\d+ \\d+ \\d+)]', names
).group(1),
(0, TOP - 200, 0))
assert_rect_almost_equal(
links[4].get_value('Rect', '(.*)'), (0, TOP - 30, RIGHT, TOP))
# 100% wide (block), 0pt high
pdf_bytes = FakeHTML(
fileobj = io.BytesIO()
FakeHTML(
string='<a href="../lipsum" style="display: block">',
base_url='http://weasyprint.org/foo/bar/').write_pdf()
base_url='http://weasyprint.org/foo/bar/').write_pdf(target=fileobj)
pdf_file = pdf.PDFFile(fileobj)
link, = [
annot for page in PdfReader(fdata=pdf_bytes).Root.Pages.Kids
for annot in page.Annots]
assert link.A == {
'/URI': '(http://weasyprint.org/foo/lipsum)',
'/S': '/URI',
'/Type': '/Action',
}
assert [round(float(value)) for value in link.Rect] == [0, TOP, RIGHT, TOP]
annot for page in pdf_file.pages
for annot in page.get_indirect_dict_array('Annots', pdf_file)]
assert (
link.get_value('URI', '(.*)') == b'(http://weasyprint.org/foo/lipsum)')
assert link.get_value('S', '(.*)') == b'/URI'
assert_rect_almost_equal(
link.get_value('Rect', '(.*)'), (0, TOP, RIGHT, TOP))
@assert_no_logs
def test_relative_links_relative():
@requires('cairo', (1, 15, 4))
def test_relative_links():
# Relative URI reference without a base URI: allowed for anchors
pdf_bytes = FakeHTML(
fileobj = io.BytesIO()
FakeHTML(
string='<a href="../lipsum" style="display: block">',
base_url=None).write_pdf()
link, = PdfReader(fdata=pdf_bytes).Root.Pages.Kids[0].Annots
assert link.A == {'/URI': '(../lipsum)', '/S': '/URI', '/Type': '/Action'}
assert [round(float(value)) for value in link.Rect] == [0, TOP, RIGHT, TOP]
base_url=None).write_pdf(target=fileobj)
pdf_file = pdf.PDFFile(fileobj)
annots = pdf_file.pages[0].get_indirect_dict_array('Annots', pdf_file)[0]
assert annots.get_value('URI', '(.*)') == b'(../lipsum)'
assert annots.get_value('S', '(.*)') == b'/URI'
assert_rect_almost_equal(
annots.get_value('Rect', '(.*)'), (0, TOP, RIGHT, TOP))
@assert_no_logs
def test_relative_links_links():
def test_relative_links_missing_base():
# Relative URI reference without a base URI: not supported for -weasy-link
fileobj = io.BytesIO()
with capture_logs() as logs:
pdf_bytes = FakeHTML(
FakeHTML(
string='<div style="-weasy-link: url(../lipsum)">',
base_url=None).write_pdf()
assert PdfReader(fdata=pdf_bytes).Root.Pages.Kids[0].Annots is None
base_url=None).write_pdf(target=fileobj)
pdf_file = pdf.PDFFile(fileobj)
with pytest.raises(AttributeError):
pdf_file.pages[0].get_indirect_dict_array('Annots', pdf_file)
assert len(logs) == 1
assert 'WARNING: Ignored `-weasy-link: url("../lipsum")`' in logs[0]
assert 'Relative URI reference without a base URI' in logs[0]
@assert_no_logs
@requires('cairo', (1, 15, 4))
def test_relative_links_internal():
# Internal URI reference without a base URI: OK
pdf_bytes = FakeHTML(
fileobj = io.BytesIO()
FakeHTML(
string='<a href="#lipsum" id="lipsum" style="display: block">',
base_url=None).write_pdf()
link, = PdfReader(fdata=pdf_bytes).Root.Pages.Kids[0].Annots
assert link.A.S == '/GoTo'
assert link.A.Type == '/Action'
assert link.A.D[1] == '/XYZ'
assert round(float(link.A.D[3])) == TOP
assert [round(float(value)) for value in link.Rect] == [0, TOP, RIGHT, TOP]
base_url=None).write_pdf(target=fileobj)
pdf_file = pdf.PDFFile(fileobj)
annots = pdf_file.pages[0].get_indirect_dict_array('Annots', pdf_file)[0]
dest = annots.get_value('Dest', '(.*)')
assert dest == b'(lipsum)'
names = (
pdf_file.catalog
.get_indirect_dict('Names', pdf_file)
.get_indirect_dict('Dests', pdf_file)
.byte_string).decode('ascii')
assert_rect_almost_equal(
re.search(
'\\(lipsum\\) \\[\\d+ \\d+ R /XYZ (\\d+ \\d+ \\d+)]', names
).group(1),
(0, TOP, 0))
assert_rect_almost_equal(
annots.get_value('Rect', '(.*)'), (0, TOP, RIGHT, TOP))
@assert_no_logs
@requires('cairo', (1, 15, 4))
def test_relative_links_anchors():
pdf_bytes = FakeHTML(
fileobj = io.BytesIO()
FakeHTML(
string='<div style="-weasy-link: url(#lipsum)" id="lipsum">',
base_url=None).write_pdf()
link, = PdfReader(fdata=pdf_bytes).Root.Pages.Kids[0].Annots
assert link.A.S == '/GoTo'
assert link.A.Type == '/Action'
assert link.A.D[1] == '/XYZ'
assert round(float(link.A.D[3])) == TOP
assert [round(float(value)) for value in link.Rect] == [0, TOP, RIGHT, TOP]
base_url=None).write_pdf(target=fileobj)
pdf_file = pdf.PDFFile(fileobj)
annots = pdf_file.pages[0].get_indirect_dict_array('Annots', pdf_file)[0]
dest = annots.get_value('Dest', '(.*)')
assert dest == b'(lipsum)'
names = (
pdf_file.catalog
.get_indirect_dict('Names', pdf_file)
.get_indirect_dict('Dests', pdf_file)
.byte_string).decode('ascii')
assert_rect_almost_equal(
re.search(
'\\(lipsum\\) \\[\\d+ \\d+ R /XYZ (\\d+ \\d+ \\d+)]', names
).group(1),
(0, TOP, 0))
assert_rect_almost_equal(
annots.get_value('Rect', '(.*)'), (0, TOP, RIGHT, TOP))
@assert_no_logs
@requires('cairo', (1, 15, 4))
def test_missing_links():
fileobj = io.BytesIO()
with capture_logs() as logs:
pdf_bytes = FakeHTML(string='''
FakeHTML(string='''
<style> a { display: block; height: 15pt } </style>
<a href="#lipsum"></a>
<a href="#missing" id="lipsum"></a>
''', base_url=None).write_pdf()
link, = PdfReader(fdata=pdf_bytes).Root.Pages.Kids[0].Annots
assert link.A.S == '/GoTo'
assert link.A.Type == '/Action'
assert link.A.D[1] == '/XYZ'
assert round(float(link.A.D[3])) == TOP - 15
assert [round(float(value)) for value in link.Rect] == [
0, TOP, RIGHT, TOP - 15]
''', base_url=None).write_pdf(target=fileobj)
pdf_file = pdf.PDFFile(fileobj)
annots = pdf_file.pages[0].get_indirect_dict_array('Annots', pdf_file)[0]
dest = annots.get_value('Dest', '(.*)')
assert dest == b'(lipsum)'
names = (
pdf_file.catalog
.get_indirect_dict('Names', pdf_file)
.get_indirect_dict('Dests', pdf_file)
.byte_string).decode('ascii')
assert_rect_almost_equal(
re.search(
'\\(lipsum\\) \\[\\d+ \\d+ R /XYZ (\\d+ \\d+ \\d+)]', names
).group(1),
(0, TOP - 15, 0))
assert_rect_almost_equal(
annots.get_value('Rect', '(.*)'), (0, TOP - 15, RIGHT, TOP))
assert len(logs) == 1
assert 'ERROR: No anchor #missing for internal URI reference' in logs[0]
@ -383,8 +521,10 @@ def test_embed_jpeg():
@assert_no_logs
@requires('cairo', (1, 15, 4))
def test_document_info():
pdf_bytes = FakeHTML(string='''
fileobj = io.BytesIO()
FakeHTML(string='''
<meta name=author content="I Me &amp; Myself">
<title>Test document</title>
<h1>Another title</h1>
@ -394,18 +534,21 @@ def test_document_info():
<meta name=description content="Blah… ">
<meta name=dcterms.created content=2011-04>
<meta name=dcterms.modified content=2013-07-21T23:46+01:00>
''').write_pdf()
info = PdfReader(fdata=pdf_bytes).Info
assert info.Author.decode() == 'I Me & Myself'
assert info.Title.decode() == 'Test document'
assert info.Creator.decode() == 'Human after all'
assert info.Keywords.decode() == 'html, css, pdf'
assert info.Subject.decode() == 'Blah… '
assert info.CreationDate.decode() == '201104'
assert info.ModDate.decode() == "20130721234600+01'00'"
''').write_pdf(target=fileobj)
info = pdf.PDFFile(fileobj).info
assert info.get_value('Author', '(.*)') == b'(I Me & Myself)'
assert info.get_value('Title', '(.*)') == b'(Test document)'
assert info.get_value('Creator', '(.*)') == (
b'<FEFF00480075006D0061006E00A00061006600740065007200A00061006C006C>')
assert info.get_value('Keywords', '(.*)') == b'(html, css, pdf)'
assert info.get_value('Subject', '(.*)') == (
b'<FEFF0042006C0061006820260020>')
assert info.get_value('CreationDate', '(.*)') == b'(20110401000000)'
assert info.get_value('ModDate', '(.*)') == b"(20130721234600+01'00)"
@assert_no_logs
@requires('cairo', (1, 15, 4))
def test_embedded_files_attachments(tmpdir):
absolute_tmp_file = tmpdir.join('some_file.txt').strpath
adata = b'12345678'
@ -419,7 +562,8 @@ def test_embedded_files_attachments(tmpdir):
with open(relative_tmp_file, 'wb') as rfile:
rfile.write(rdata)
pdf_bytes = FakeHTML(
fileobj = io.BytesIO()
FakeHTML(
string='''
<title>Test document</title>
<meta charset="utf-8">
@ -434,87 +578,87 @@ def test_embedded_files_attachments(tmpdir):
'''.format(absolute_url, os.path.basename(relative_tmp_file)),
base_url=tmpdir.strpath,
).write_pdf(
target=fileobj,
attachments=[
Attachment('data:,oob attachment', description='Hello'),
'data:,raw URL',
io.BytesIO(b'file like obj')
]
)
pdf = PdfReader(fdata=pdf_bytes)
embedded = pdf.Root.Names.EmbeddedFiles.Names
pdf_bytes = fileobj.getvalue()
assert (
'<{}>'.format(hashlib.md5(b'hi there').hexdigest()).encode('ascii')
in pdf_bytes)
assert b'/F ()' in pdf_bytes
assert (
b'/UF (\xfe\xff\x00a\x00t\x00t\x00a\x00c\x00h\x00m\x00e\x00n'
b'\x00t\x00.\x00b\x00i\x00n)' in pdf_bytes)
assert (
b'/Desc (\xfe\xff\x00s\x00o\x00m\x00e\x00 \x00f\x00i\x00l\x00e'
b'\x00 \x00a\x00t\x00t\x00a\x00c\x00h\x00m\x00e\x00n\x00t\x00 '
b'\x00\xe4\x00\xf6\x00\xfc)' in pdf_bytes)
assert zlib.decompress(
embedded[1].EF.F.stream.encode('latin-1')) == b'hi there'
assert embedded[1].EF.F.Params.CheckSum == (
'<{}>'.format(hashlib.md5(b'hi there').hexdigest()))
assert embedded[1].F.decode() == ''
assert embedded[1].UF.decode() == 'attachment.bin'
assert embedded[1].Desc.decode() == 'some file attachment äöü'
assert hashlib.md5(adata).hexdigest().encode('ascii') in pdf_bytes
assert (
os.path.basename(absolute_tmp_file).encode('utf-16-be')
in pdf_bytes)
assert zlib.decompress(
embedded[3].EF.F.stream.encode('latin-1')) == b'12345678'
assert embedded[3].EF.F.Params.CheckSum == (
'<{}>'.format(hashlib.md5(adata).hexdigest()))
assert embedded[3].UF.decode() == os.path.basename(absolute_tmp_file)
assert hashlib.md5(rdata).hexdigest().encode('ascii') in pdf_bytes
assert (
os.path.basename(relative_tmp_file).encode('utf-16-be')
in pdf_bytes)
assert zlib.decompress(
embedded[5].EF.F.stream.encode('latin-1')) == b'abcdefgh'
assert embedded[5].EF.F.Params.CheckSum == (
'<{}>'.format(hashlib.md5(rdata).hexdigest()))
assert embedded[5].UF.decode() == os.path.basename(relative_tmp_file)
assert (
hashlib.md5(b'oob attachment').hexdigest().encode('ascii')
in pdf_bytes)
assert b'/Desc (\xfe\xff\x00H\x00e\x00l\x00l\x00o)' in pdf_bytes
assert (
hashlib.md5(b'raw URL').hexdigest().encode('ascii')
in pdf_bytes)
assert (
hashlib.md5(b'file like obj').hexdigest().encode('ascii')
in pdf_bytes)
assert zlib.decompress(
embedded[7].EF.F.stream.encode('latin-1')) == b'oob attachment'
assert embedded[7].EF.F.Params.CheckSum == (
'<{}>'.format(hashlib.md5(b'oob attachment').hexdigest()))
assert embedded[7].Desc.decode() == 'Hello'
assert zlib.decompress(
embedded[9].EF.F.stream.encode('latin-1')) == b'raw URL'
assert embedded[9].EF.F.Params.CheckSum == (
'<{}>'.format(hashlib.md5(b'raw URL').hexdigest()))
assert zlib.decompress(
embedded[11].EF.F.stream.encode('latin-1')) == b'file like obj'
assert embedded[11].EF.F.Params.CheckSum == (
'<{}>'.format(hashlib.md5(b'file like obj').hexdigest()))
assert b'/EmbeddedFiles' in pdf_bytes
assert b'/Outlines' in pdf_bytes
@assert_no_logs
def test_attachments_data():
pdf_bytes = FakeHTML(string='''
fileobj = io.BytesIO()
FakeHTML(string='''
<title>Test document 2</title>
<meta charset="utf-8">
<link rel="attachment" href="data:,some data">
''').write_pdf()
pdf = PdfReader(fdata=pdf_bytes)
embedded = pdf.Root.Names.EmbeddedFiles.Names
assert embedded[1].EF.F.Params.CheckSum == (
'<{}>'.format(hashlib.md5(b'some data').hexdigest()))
''').write_pdf(target=fileobj)
md5 = '<{}>'.format(hashlib.md5(b'some data').hexdigest()).encode('ascii')
assert md5 in fileobj.getvalue()
@assert_no_logs
@requires('cairo', (1, 15, 4))
def test_attachments_none():
pdf_bytes = FakeHTML(string='''
fileobj = io.BytesIO()
FakeHTML(string='''
<title>Test document 3</title>
<meta charset="utf-8">
<h1>Heading</h1>
''').write_pdf()
pdf = PdfReader(fdata=pdf_bytes)
assert pdf.Root.Names is None
assert pdf.Root.Outlines is not None
''').write_pdf(target=fileobj)
pdf_bytes = fileobj.getvalue()
assert b'Names' not in pdf_bytes
assert b'Outlines' in pdf_bytes
@assert_no_logs
def test_attachments_none_empty():
pdf_bytes = FakeHTML(string='''
<title>Test document 4</title>
fileobj = io.BytesIO()
FakeHTML(string='''
<title>Test document 3</title>
<meta charset="utf-8">
''').write_pdf()
pdf = PdfReader(fdata=pdf_bytes)
assert pdf.Root.Names is None
assert pdf.Root.Outlines is None
''').write_pdf(target=fileobj)
pdf_bytes = fileobj.getvalue()
assert b'Names' not in pdf_bytes
assert b'Outlines' not in pdf_bytes
@assert_no_logs
@ -535,22 +679,29 @@ def test_annotations():
@pytest.mark.parametrize('style, media, bleed, trim', (
('bleed: 30pt; size: 10pt',
['0', '0', '70', '70'],
['20', '20', '50', '50'],
['30', '30', '40', '40']),
[0, 0, 70, 70],
[20.0, 20.0, 50.0, 50.0],
[30.0, 30.0, 40.0, 40.0]),
('bleed: 15pt 3pt 6pt 18pt; size: 12pt 15pt',
['0', '0', '33', '36'],
['8', '5', '33', '36'],
['18', '15', '30', '30']),
[0, 0, 33, 36],
[8.0, 5.0, 33.0, 36.0],
[18.0, 15.0, 30.0, 30.0]),
))
@assert_no_logs
def test_bleed(style, media, bleed, trim):
pdf_bytes = FakeHTML(string='''
fileobj = io.BytesIO()
FakeHTML(string='''
<title>Test document</title>
<style>@page { %s }</style>
<body>test
''' % style).write_pdf()
pdf = PdfReader(fdata=pdf_bytes)
assert pdf.Root.Pages.Kids[0].MediaBox == media
assert pdf.Root.Pages.Kids[0].BleedBox == bleed
assert pdf.Root.Pages.Kids[0].TrimBox == trim
''' % style).write_pdf(target=fileobj)
pdf_bytes = fileobj.getvalue()
assert (
'/MediaBox [ {} {} {} {} ]'.format(*media).encode('ascii')
in pdf_bytes)
assert (
'/BleedBox [ {} {} {} {} ]'.format(*bleed).encode('ascii')
in pdf_bytes)
assert (
'/TrimBox [ {} {} {} {} ]'.format(*trim).encode('ascii')
in pdf_bytes)

View File

@ -4,7 +4,7 @@
Test the HTML presentational hints.
:copyright: Copyright 2016-2018 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""

View File

@ -4,7 +4,7 @@
Test the CSS cross references using target-*() functions.
:copyright: Copyright 2018 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
@ -143,3 +143,26 @@ def test_target_text():
assert len(a3.children[0].children[0].children) == 0
before = a4.children[0].children[0].children[0]
assert before.text == '1'
@assert_no_logs
def test_target_float():
document = FakeHTML(string='''
<style>
a::after {
content: target-counter('#h', page);
float: right;
}
</style>
<div><a id="span">link</a></div>
<h1 id="h">abc</h1>
''')
page, = document.render().pages
html, = page._page_box.children
body, = html.children
div, h1 = body.children
line, = div.children
inline, = line.children
text_box, after = inline.children
assert text_box.text == 'link'
assert after.children[0].children[0].text == '1'

View File

@ -290,6 +290,39 @@ def test_text_align_justify_text_indent():
assert image_5.position_x == 0
@assert_no_logs
def test_text_align_justify_no_break_between_children():
# Test justification when line break happens between two inline children
# that must stay together.
# Test regression: https://github.com/Kozea/WeasyPrint/issues/637
page, = render_pages('''
<style>
@font-face {src: url(AHEM____.TTF); font-family: ahem}
p { text-align: justify; font-family: ahem; width: 7em }
</style>
<p>
<span>a</span>
<span>b</span>
<span>bla</span><span>,</span>
<span>b</span>
</p>
''')
html, = page.children
body, = html.children
paragraph, = body.children
line_1, line_2 = paragraph.children
span_1, space_1, span_2, space_2 = line_1.children
assert span_1.position_x == 0
assert span_2.position_x == 6 * 16 # 1 character + 5 spaces
assert line_1.width == 7 * 16 # 7em
span_1, span_2, space_1, span_3, space_2 = line_2.children
assert span_1.position_x == 0
assert span_2.position_x == 3 * 16 # 3 characters
assert span_3.position_x == 5 * 16 # (3 + 1) characters + 1 space
@assert_no_logs
def test_word_spacing():
# keep the empty <style> as a regression test: element.text is None
@ -365,27 +398,56 @@ def test_letter_spacing_1():
assert line1.children[0].width == strong_2.width
@pytest.mark.parametrize('indent', ('12px', '6%'))
@assert_no_logs
def test_text_indent():
for indent in ['12px', '6%']: # 6% of 200px is 12px
page, = render_pages('''
<style>
@page { size: 220px }
body { margin: 10px; text-indent: %(indent)s }
</style>
<p>Some text that is long enough that it take at least three line,
but maybe more.
''' % {'indent': indent})
html, = page.children
body, = html.children
paragraph, = body.children
lines = paragraph.children
text_1, = lines[0].children
text_2, = lines[1].children
text_3, = lines[2].children
assert text_1.position_x == 22 # 10px margin-left + 12px indent
assert text_2.position_x == 10 # No indent
assert text_3.position_x == 10 # No indent
def test_text_indent(indent):
page, = render_pages('''
<style>
@page { size: 220px }
body { margin: 10px; text-indent: %(indent)s }
</style>
<p>Some text that is long enough that it take at least three line,
but maybe more.
''' % {'indent': indent})
html, = page.children
body, = html.children
paragraph, = body.children
lines = paragraph.children
text_1, = lines[0].children
text_2, = lines[1].children
text_3, = lines[2].children
assert text_1.position_x == 22 # 10px margin-left + 12px indent
assert text_2.position_x == 10 # No indent
assert text_3.position_x == 10 # No indent
@pytest.mark.parametrize('indent', ('12px', '6%'))
@assert_no_logs
def test_text_indent_multipage(indent):
# Test regression: https://github.com/Kozea/WeasyPrint/issues/706
pages = render_pages('''
<style>
@page { size: 220px 1.5em; margin: 0 }
body { margin: 10px; text-indent: %(indent)s }
</style>
<p>Some text that is long enough that it take at least three line,
but maybe more.
''' % {'indent': indent})
page = pages.pop(0)
html, = page.children
body, = html.children
paragraph, = body.children
line, = paragraph.children
text, = line.children
assert text.position_x == 22 # 10px margin-left + 12px indent
page = pages.pop(0)
html, = page.children
body, = html.children
paragraph, = body.children
line, = paragraph.children
text, = line.children
assert text.position_x == 10 # No indent
@assert_no_logs
@ -522,6 +584,23 @@ def test_hyphenate_manual_2():
hyphenate_character)
@assert_no_logs
def test_hyphenate_manual_3():
# Automatic hyphenation opportunities within a word must be ignored if the
# word contains a conditional hyphen, in favor of the conditional
# hyphen(s).
page, = render_pages(
'<html style="width: 0.1em" lang="en">'
'<body style="hyphens: auto">in&shy;lighten&shy;lighten&shy;in')
html, = page.children
body, = html.children
line_1, line_2, line_3, line_4 = body.children
assert line_1.children[0].text == 'in\xad'
assert line_2.children[0].text == 'lighten\xad'
assert line_3.children[0].text == 'lighten\xad'
assert line_4.children[0].text == 'in'
@assert_no_logs
def test_hyphenate_limit_zone_1():
page, = render_pages(
@ -619,6 +698,27 @@ def test_hyphenate_limit_chars(css, result):
assert len(lines) == result
@assert_no_logs
@pytest.mark.parametrize('css', (
# light·en
'3 3 3', # 'en' is shorter than 3
'3 6 2', # 'light' is shorter than 6
'8', # 'lighten' is shorter than 8
))
def test_hyphenate_limit_chars_punctuation(css):
# See https://github.com/Kozea/WeasyPrint/issues/109
page, = render_pages(
'<html style="width: 1em; font-family: ahem">'
'<style>@font-face {src: url(AHEM____.TTF); font-family: ahem}</style>'
'<body style="hyphens: auto;'
'hyphenate-limit-chars: %s" lang=en>'
'..lighten..' % css)
html, = page.children
body, = html.children
lines = body.children
assert len(lines) == 1
@assert_no_logs
@pytest.mark.parametrize('wrap, text, test, full_text', (
('break-word', 'aaaaaaaa', lambda a: a > 1, 'aaaaaaaa'),

View File

@ -4,7 +4,7 @@
Test WeasyPrint Web tools.
:copyright: Copyright 2018 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
@ -12,7 +12,7 @@
import io
from urllib.parse import urlencode
from pdfrw import PdfReader
import cairocffi as cairo
from ..tools import navigator, renderer
from ..urls import path2url
@ -73,12 +73,10 @@ def test_navigator(tmpdir):
status, headers, body = wsgi_client(navigator, '/pdf/' + url)
assert status == '200 OK'
assert headers['Content-Type'] == 'application/pdf'
pdf = PdfReader(fdata=body)
assert pdf.Root.Pages.Kids[0].Annots[0].A == {
'/Type': '/Action', '/URI': '(http://weasyprint.org)',
'/S': '/URI'}
assert pdf.Root.Outlines.First.Title == '(Lorem ipsum)'
assert pdf.Root.Outlines.Last.Title == '(Lorem ipsum)'
assert body.startswith(b'%PDF')
if cairo.cairo_version() >= 11504:
assert b'/URI (http://weasyprint.org)' in body
assert b'/Title (Lorem ipsum)' in body
@assert_no_logs

View File

@ -4,7 +4,7 @@
Test various unicode texts and filenames.
:copyright: Copyright 2018 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""

View File

@ -4,7 +4,7 @@
Helpers for tests.
:copyright: Copyright 2011-2014 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
@ -70,16 +70,21 @@ def capture_logs():
messages = []
def emit(record):
if record.name == 'weasyprint.progress':
return
message = '%s: %s' % (record.levelname.upper(), record.getMessage())
messages.append(message)
previous_handlers = logger.handlers
previous_level = logger.level
logger.handlers = []
logger.addHandler(CallbackHandler(emit))
logger.setLevel(logging.DEBUG)
try:
yield messages
finally:
logger.handlers = previous_handlers
logger.level = previous_level
def assert_no_logs(function):

View File

@ -4,7 +4,7 @@
Interface with Pango to decide where to do line breaks and to draw text.
:copyright: Copyright 2011-2014 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
@ -118,7 +118,18 @@ ffi.cdef('''
typedef struct {
guint is_line_break: 1;
/* ... */
guint is_mandatory_break : 1;
guint is_char_break : 1;
guint is_white : 1;
guint is_cursor_position : 1;
guint is_word_start : 1;
guint is_word_end : 1;
guint is_sentence_boundary : 1;
guint is_sentence_start : 1;
guint is_sentence_end : 1;
guint backspace_deletes_character : 1;
guint is_expandable_space : 1;
guint is_word_boundary : 1;
} PangoLogAttr;
int pango_version (void);
@ -139,13 +150,16 @@ ffi.cdef('''
PangoLayout *layout, const PangoFontDescription *desc);
void pango_layout_set_wrap (
PangoLayout *layout, PangoWrapMode wrap);
void pango_layout_set_single_paragraph_mode (
PangoLayout *layout, gboolean setting);
int pango_layout_get_baseline (PangoLayout *layout);
PangoLayoutIter * pango_layout_get_iter (PangoLayout *layout);
void pango_layout_iter_free (PangoLayoutIter *iter);
gboolean pango_layout_iter_next_line (PangoLayoutIter *iter);
PangoLayoutLine * pango_layout_iter_get_line_readonly (
PangoLayoutIter *iter);
int pango_layout_iter_get_baseline (PangoLayoutIter *iter);
int pango_layout_iter_get_index (PangoLayoutIter *iter);
PangoFontDescription * pango_font_description_new (void);
void pango_font_description_free (PangoFontDescription *desc);
@ -590,39 +604,44 @@ def first_line_metrics(first_line, text, layout, resume_at, space_collapse,
first_line_text = utf8_slice(text, slice(length))
# Remove trailing spaces if spaces collapse
if space_collapse:
first_line_text = first_line_text.rstrip(u' ')
first_line_text = first_line_text.rstrip(' ')
# Remove soft hyphens
layout.set_text(first_line_text.replace(u'\u00ad', u''))
first_line = next(layout.iter_lines(), None)
layout.set_text(first_line_text.replace('\u00ad', ''))
first_line, _ = layout.get_first_line()
length = first_line.length if first_line is not None else 0
soft_hyphens = 0
if u'\u00ad' in first_line_text:
if first_line_text[0] == u'\u00ad':
length += 2 # len(u'\u00ad'.encode('utf8'))
for i in range(len(layout.text_bytes.decode('utf8'))):
if '\u00ad' in first_line_text:
if first_line_text[0] == '\u00ad':
length += 2 # len('\u00ad'.encode('utf8'))
for i in range(len(layout.text)):
while i + soft_hyphens + 1 < len(first_line_text):
if first_line_text[i + soft_hyphens + 1] == u'\u00ad':
if first_line_text[i + soft_hyphens + 1] == '\u00ad':
soft_hyphens += 1
else:
break
length += soft_hyphens * 2 # len(u'\u00ad'.encode('utf8'))
length += soft_hyphens * 2 # len('\u00ad'.encode('utf8'))
width, height = get_size(first_line, style)
baseline = units_to_double(pango.pango_layout_iter_get_baseline(ffi.gc(
pango.pango_layout_get_iter(layout.layout),
pango.pango_layout_iter_free)))
baseline = units_to_double(pango.pango_layout_get_baseline(layout.layout))
layout.deactivate()
return layout, length, resume_at, width, height, baseline
class Layout(object):
"""Object holding PangoLayout-related cdata pointers."""
def __init__(self, context, font_size, style):
def __init__(self, context, font_size, style, justification_spacing=0):
self.justification_spacing = justification_spacing
self.setup(context, font_size, style)
def setup(self, context, font_size, style):
from .fonts import ZERO_FONTSIZE_CRASHES_CAIRO
self.context = context
self.style = style
# Cairo crashes with font-size: 0 when using Win32 API
# See https://github.com/Kozea/WeasyPrint/pull/599
if font_size == 0 and ZERO_FONTSIZE_CRASHES_CAIRO:
font_size = 1
self.context = context
hinting = context.enable_hinting if context else False
self.layout = ffi.gc(
pangocairo.pango_cairo_create_layout(ffi.cast(
@ -662,21 +681,86 @@ class Layout(object):
self.font, units_from_double(font_size))
pango.pango_layout_set_font_description(self.layout, self.font)
def iter_lines(self):
features = get_font_features(
style['font_kerning'], style['font_variant_ligatures'],
style['font_variant_position'], style['font_variant_caps'],
style['font_variant_numeric'], style['font_variant_alternates'],
style['font_variant_east_asian'], style['font_feature_settings'])
if features:
features = ','.join(
('%s %i' % (key, value)) for key, value in features.items())
# TODO: attributes should be freed.
# In the meantime, keep a cache to avoid leaking too many of them.
attr = context.font_features.get(features)
if attr is None:
try:
attr = pango.pango_attr_font_features_new(
features.encode('ascii'))
except AttributeError:
LOGGER.error(
'OpenType features are not available '
'with Pango < 1.38')
else:
context.font_features[features] = attr
if attr is not None:
attr_list = pango.pango_attr_list_new()
pango.pango_attr_list_insert(attr_list, attr)
pango.pango_layout_set_attributes(self.layout, attr_list)
# Tabs width
if style['tab_size'] != 8: # Default Pango value is 8
self.set_tabs()
def get_first_line(self):
layout_iter = ffi.gc(
pango.pango_layout_get_iter(self.layout),
pango.pango_layout_iter_free)
while 1:
yield pango.pango_layout_iter_get_line_readonly(layout_iter)
if not pango.pango_layout_iter_next_line(layout_iter):
return
first_line = pango.pango_layout_iter_get_line_readonly(layout_iter)
if pango.pango_layout_iter_next_line(layout_iter):
index = pango.pango_layout_iter_get_index(layout_iter)
else:
index = None
return first_line, index
def set_text(self, text):
text, bytestring = unicode_to_char_p(text)
self.text = text
self.text_bytes = bytestring
def set_text(self, text, justify=False):
# Keep only the first two lines, we don't need the other ones
text, bytestring = unicode_to_char_p(
'\n'.join(text.split('\n', 3)[:2]))
self.text = bytestring.decode('utf-8')
pango.pango_layout_set_text(self.layout, text, -1)
word_spacing = self.style['word_spacing']
if justify:
# Justification is needed when drawing text but is useless during
# layout. Ignore it before layout is reactivated before the drawing
# step.
word_spacing += self.justification_spacing
letter_spacing = self.style['letter_spacing']
if letter_spacing == 'normal':
letter_spacing = 0
if text and (word_spacing != 0 or letter_spacing != 0):
letter_spacing = units_from_double(letter_spacing)
space_spacing = units_from_double(word_spacing) + letter_spacing
attr_list = pango.pango_attr_list_new()
def add_attr(start, end, spacing):
# TODO: attributes should be freed
attr = pango.pango_attr_letter_spacing_new(spacing)
attr.start_index, attr.end_index = start, end
pango.pango_attr_list_insert(attr_list, attr)
add_attr(0, len(bytestring) + 1, letter_spacing)
position = bytestring.find(b' ')
while position != -1:
add_attr(position, position + 1, space_spacing)
position = bytestring.find(b' ', position + 1)
pango.pango_layout_set_attributes(self.layout, attr_list)
pango.pango_attr_list_unref(attr_list)
def get_font_metrics(self):
context = pango.pango_layout_get_context(self.layout)
return FontMetrics(context, self.font, self.language)
@ -684,17 +768,19 @@ class Layout(object):
def set_wrap(self, wrap_mode):
pango.pango_layout_set_wrap(self.layout, wrap_mode)
def set_tabs(self, style):
if isinstance(style['tab_size'], int):
def set_tabs(self):
if isinstance(self.style['tab_size'], int):
style = self.style.copy()
style['tab_size'] = 8
layout = Layout(
context=self.context, font_size=style['font_size'],
style=style)
layout.set_text(' ' * style['tab_size'])
line, = layout.iter_lines()
self.context, style['font_size'], style,
self.justification_spacing)
layout.set_text(' ' * self.style['tab_size'])
line, _ = layout.get_first_line()
width, _ = get_size(line, style)
width = int(round(width))
else:
width = int(style['tab_size'].value)
width = int(self.style['tab_size'].value)
# TODO: 0 is not handled correctly by Pango
array = ffi.gc(
pango.pango_tab_array_new_with_positions(
@ -702,6 +788,16 @@ class Layout(object):
pango.pango_tab_array_free)
pango.pango_layout_set_tabs(self.layout, array)
def deactivate(self):
del self.layout
del self.font
del self.language
del self.style
def reactivate(self, style):
self.setup(self.context, style['font_size'], style)
self.set_text(self.text, justify=True)
class FontMetrics(object):
def __init__(self, context, font, language):
@ -838,8 +934,7 @@ def create_layout(text, style, context, max_width, justification_spacing):
if not text_wrap:
max_width = None
layout = Layout(context, style['font_size'], style)
layout.set_text(text)
layout = Layout(context, style['font_size'], style, justification_spacing)
# Make sure that max_width * Pango.SCALE == max_width * 1024 fits in a
# signed integer. Treat bigger values same as None: unconstrained width.
@ -847,63 +942,7 @@ def create_layout(text, style, context, max_width, justification_spacing):
pango.pango_layout_set_width(
layout.layout, units_from_double(max_width))
text_bytes = layout.text_bytes
# Word and letter spacings
word_spacing = style['word_spacing'] + justification_spacing
letter_spacing = style['letter_spacing']
if letter_spacing == 'normal':
letter_spacing = 0
if text and (word_spacing != 0 or letter_spacing != 0):
letter_spacing = units_from_double(letter_spacing)
space_spacing = units_from_double(word_spacing) + letter_spacing
attr_list = pango.pango_attr_list_new()
def add_attr(start, end, spacing):
# TODO: attributes should be freed
attr = pango.pango_attr_letter_spacing_new(spacing)
attr.start_index, attr.end_index = start, end
pango.pango_attr_list_insert(attr_list, attr)
add_attr(0, len(text_bytes) + 1, letter_spacing)
position = text_bytes.find(b' ')
while position != -1:
add_attr(position, position + 1, space_spacing)
position = text_bytes.find(b' ', position + 1)
pango.pango_layout_set_attributes(layout.layout, attr_list)
pango.pango_attr_list_unref(attr_list)
features = get_font_features(
style['font_kerning'], style['font_variant_ligatures'],
style['font_variant_position'], style['font_variant_caps'],
style['font_variant_numeric'], style['font_variant_alternates'],
style['font_variant_east_asian'], style['font_feature_settings'])
if features:
features = ','.join(
('%s %i' % (key, value)) for key, value in features.items())
# TODO: attributes should be freed.
# In the meantime, keep a cache to avoid leaking too many of them.
attr = context.font_features.get(features)
if attr is None:
try:
attr = pango.pango_attr_font_features_new(
features.encode('ascii'))
except AttributeError:
LOGGER.error(
'OpenType features are not available with Pango < 1.38')
else:
context.font_features[features] = attr
if attr is not None:
attr_list = pango.pango_attr_list_new()
pango.pango_attr_list_insert(attr_list, attr)
pango.pango_layout_set_attributes(layout.layout, attr_list)
# Tabs width
if style['tab_size'] != 8: # Default Pango value is 8
layout.set_tabs(style)
layout.set_text(text)
return layout
@ -935,26 +974,31 @@ def split_first_line(text, style, context, max_width, justification_spacing,
layout = None
if (max_width is not None and max_width != float('inf') and
style['font_size']):
expected_length = int(max_width / style['font_size'] * 2.5)
if max_width == 0:
# Trying to find minimum size, let's naively split on spaces and
# keep one word + one letter
space_index = text.find(' ')
if space_index == -1:
expected_length = len(text)
else:
expected_length = space_index + 2 # index + space + one letter
else:
expected_length = int(max_width / style['font_size'] * 2.5)
if expected_length < len(text):
# Try to use a small amount of text instead of the whole text
layout = create_layout(
text[:expected_length], style, context, max_width,
justification_spacing)
lines = layout.iter_lines()
first_line = next(lines, None)
second_line = next(lines, None)
if second_line is None:
first_line, index = layout.get_first_line()
if index is None:
# The small amount of text fits in one line, give up and use
# the whole text
layout = None
if layout is None:
layout = create_layout(
text, style, context, max_width, justification_spacing)
lines = layout.iter_lines()
first_line = next(lines, None)
second_line = next(lines, None)
resume_at = None if second_line is None else second_line.start_index
first_line, index = layout.get_first_line()
resume_at = index
# Step #2: Don't split lines when it's not needed
if max_width is None:
@ -962,7 +1006,7 @@ def split_first_line(text, style, context, max_width, justification_spacing,
return first_line_metrics(
first_line, text, layout, resume_at, space_collapse, style)
first_line_width, _ = get_size(first_line, style)
if second_line is None and first_line_width <= max_width:
if index is None and first_line_width <= max_width:
# The first line fits in the available width
return first_line_metrics(
first_line, text, layout, resume_at, space_collapse, style)
@ -972,11 +1016,10 @@ def split_first_line(text, style, context, max_width, justification_spacing,
# is a good thread related to this problem.
if first_line_width <= max_width:
# The first line may have been cut too early by Pango
second_line_index = second_line.start_index
first_line_text = utf8_slice(text, slice(second_line_index))
second_line_text = utf8_slice(text, slice(second_line_index, None))
first_line_text = utf8_slice(text, slice(index))
second_line_text = utf8_slice(text, slice(index, None))
else:
# The first word is longer than the line, try to hyphenize it
# The first word is longer than the line, try to hyphenate it
first_line_text = ''
second_line_text = text
@ -987,18 +1030,16 @@ def split_first_line(text, style, context, max_width, justification_spacing,
# only try when space collapsing is allowed
new_first_line_text = first_line_text + next_word
layout.set_text(new_first_line_text)
lines = layout.iter_lines()
first_line = next(lines, None)
second_line = next(lines, None)
first_line, index = layout.get_first_line()
first_line_width, _ = get_size(first_line, style)
if second_line is None and first_line_text:
if index is None and first_line_text:
# The next word fits in the first line, keep the layout
resume_at = len(new_first_line_text.encode('utf-8')) + 1
return first_line_metrics(
first_line, text, layout, resume_at, space_collapse, style)
elif second_line:
elif index:
# Text may have been split elsewhere by Pango earlier
resume_at = second_line.start_index
resume_at = index
else:
# Second line is none
resume_at = first_line.length + 1
@ -1008,97 +1049,119 @@ def split_first_line(text, style, context, max_width, justification_spacing,
return first_line_metrics(
first_line, text, layout, resume_at, space_collapse, style)
# Step #4: Try to hyphenize
# Step #4: Try to hyphenate
hyphens = style['hyphens']
lang = style['lang'] and pyphen.language_fallback(style['lang'])
total, left, right = style['hyphenate_limit_chars']
hyphenated = False
soft_hyphen = u'\u00ad'
soft_hyphen = '\u00ad'
# Automatic hyphenation possible and next word is long enough
if hyphens != 'none' and len(next_word) >= total:
first_line_width, _ = get_size(first_line, style)
space = max_width - first_line_width
if style['hyphenate_limit_zone'].unit == '%':
limit_zone = max_width * style['hyphenate_limit_zone'].value / 100.
try_hyphenate = False
if hyphens != 'none':
next_word_boundaries = get_next_word_boundaries(second_line_text, lang)
if next_word_boundaries:
# We have a word to hyphenate
start_word, stop_word = next_word_boundaries
next_word = second_line_text[start_word:stop_word]
if stop_word - start_word >= total:
# This word is long enough
first_line_width, _ = get_size(first_line, style)
space = max_width - first_line_width
if style['hyphenate_limit_zone'].unit == '%':
limit_zone = (
max_width * style['hyphenate_limit_zone'].value / 100.)
else:
limit_zone = style['hyphenate_limit_zone'].value
if space > limit_zone or space < 0:
# Available space is worth the try, or the line is even too
# long to fit: try to hyphenate
try_hyphenate = True
if try_hyphenate:
# Automatic hyphenation possible and next word is long enough
auto_hyphenation = hyphens == 'auto' and lang
manual_hyphenation = False
if auto_hyphenation:
if soft_hyphen in first_line_text or soft_hyphen in next_word:
# Automatic hyphenation opportunities within a word must be
# ignored if the word contains a conditional hyphen, in favor
# of the conditional hyphen(s).
# See https://drafts.csswg.org/css-text-3/#valdef-hyphens-auto
manual_hyphenation = True
else:
limit_zone = style['hyphenate_limit_zone'].value
manual_hyphenation = hyphens == 'manual'
if space > limit_zone or space < 0:
# Manual hyphenation: check that the line ends with a soft hyphen
# and add the missing hyphen
if hyphens == 'manual':
if first_line_text.endswith(soft_hyphen):
# The first line has been split on a soft hyphen
if u' ' in first_line_text:
first_line_text, next_word = (
first_line_text.rsplit(u' ', 1))
next_word = u' ' + next_word
layout.set_text(first_line_text)
lines = layout.iter_lines()
first_line = next(lines, None)
second_line = next(lines, None)
resume_at = len(
(first_line_text + u' ').encode('utf8'))
else:
first_line_text, next_word = u'', first_line_text
soft_hyphen_indexes = [
match.start() for match in
re.finditer(soft_hyphen, next_word)]
soft_hyphen_indexes.reverse()
dictionary_iterations = [
next_word[:i + 1] for i in soft_hyphen_indexes]
elif hyphens == 'auto' and lang:
# The next word does not fit, try hyphenation
dictionary_key = (lang, left, right, total)
dictionary = context.dictionaries.get(dictionary_key)
if dictionary is None:
dictionary = pyphen.Pyphen(
lang=lang, left=left, right=right)
context.dictionaries[dictionary_key] = dictionary
dictionary_iterations = [
start for start, end in dictionary.iterate(next_word)]
else:
dictionary_iterations = []
if manual_hyphenation:
# Manual hyphenation: check that the line ends with a soft
# hyphen and add the missing hyphen
if first_line_text.endswith(soft_hyphen):
# The first line has been split on a soft hyphen
if ' ' in first_line_text:
first_line_text, next_word = (
first_line_text.rsplit(' ', 1))
next_word = ' ' + next_word
layout.set_text(first_line_text)
first_line, index = layout.get_first_line()
resume_at = len((first_line_text + ' ').encode('utf8'))
else:
first_line_text, next_word = '', first_line_text
soft_hyphen_indexes = [
match.start() for match in re.finditer(soft_hyphen, next_word)]
soft_hyphen_indexes.reverse()
dictionary_iterations = [
next_word[:i + 1] for i in soft_hyphen_indexes]
elif auto_hyphenation:
dictionary_key = (lang, left, right, total)
dictionary = context.dictionaries.get(dictionary_key)
if dictionary is None:
dictionary = pyphen.Pyphen(lang=lang, left=left, right=right)
context.dictionaries[dictionary_key] = dictionary
dictionary_iterations = [
start for start, end in dictionary.iterate(next_word)]
else:
dictionary_iterations = []
if dictionary_iterations:
for first_word_part in dictionary_iterations:
new_first_line_text = first_line_text + first_word_part
hyphenated_first_line_text = (
new_first_line_text + style['hyphenate_character'])
new_layout = create_layout(
hyphenated_first_line_text, style, context, max_width,
justification_spacing)
new_lines = new_layout.iter_lines()
new_first_line = next(new_lines, None)
new_second_line = next(new_lines, None)
new_first_line_width, _ = get_size(new_first_line, style)
new_space = max_width - new_first_line_width
if new_second_line is None and (
new_space >= 0 or
first_word_part == dictionary_iterations[-1]):
hyphenated = True
layout = new_layout
first_line = new_first_line
second_line = new_second_line
resume_at = len(new_first_line_text.encode('utf8'))
if text[len(new_first_line_text)] == soft_hyphen:
resume_at += len(soft_hyphen.encode('utf8'))
break
if not hyphenated and not first_line_text:
# Recreate the layout with no max_width to be sure that
# we don't break inside the hyphenate-character string
if dictionary_iterations:
for first_word_part in dictionary_iterations:
new_first_line_text = (
first_line_text +
second_line_text[:start_word] +
first_word_part)
hyphenated_first_line_text = (
new_first_line_text + style['hyphenate_character'])
new_layout = create_layout(
hyphenated_first_line_text, style, context, max_width,
justification_spacing)
new_first_line, new_index = new_layout.get_first_line()
new_first_line_width, _ = get_size(new_first_line, style)
new_space = max_width - new_first_line_width
if new_index is None and (
new_space >= 0 or
first_word_part == dictionary_iterations[-1]):
hyphenated = True
layout.set_text(hyphenated_first_line_text)
pango.pango_layout_set_width(
layout.layout, units_from_double(-1))
lines = layout.iter_lines()
first_line = next(lines, None)
second_line = next(lines, None)
layout = new_layout
first_line = new_first_line
index = new_index
resume_at = len(new_first_line_text.encode('utf8'))
if text[len(first_line_text)] == soft_hyphen:
if text[len(new_first_line_text)] == soft_hyphen:
# Recreate the layout with no max_width to be sure that
# we don't break before the soft hyphen
pango.pango_layout_set_width(
layout.layout, units_from_double(-1))
resume_at += len(soft_hyphen.encode('utf8'))
break
if not hyphenated and not first_line_text:
# Recreate the layout with no max_width to be sure that
# we don't break before or inside the hyphenate character
hyphenated = True
layout.set_text(hyphenated_first_line_text)
pango.pango_layout_set_width(
layout.layout, units_from_double(-1))
first_line, index = layout.get_first_line()
resume_at = len(new_first_line_text.encode('utf8'))
if text[len(first_line_text)] == soft_hyphen:
resume_at += len(soft_hyphen.encode('utf8'))
if not hyphenated and first_line_text.endswith(soft_hyphen):
# Recreate the layout with no max_width to be sure that
@ -1109,9 +1172,7 @@ def split_first_line(text, style, context, max_width, justification_spacing,
layout.set_text(hyphenated_first_line_text)
pango.pango_layout_set_width(
layout.layout, units_from_double(-1))
lines = layout.iter_lines()
first_line = next(lines, None)
second_line = next(lines, None)
first_line, index = layout.get_first_line()
resume_at = len(first_line_text.encode('utf8'))
# Step 5: Try to break word if it's too long for the line
@ -1132,26 +1193,18 @@ def split_first_line(text, style, context, max_width, justification_spacing,
pango.pango_layout_set_width(
layout.layout, units_from_double(max_width))
layout.set_wrap(PANGO_WRAP_MODE['WRAP_CHAR'])
temp_lines = layout.iter_lines()
next(temp_lines, None)
temp_second_line = next(temp_lines, None)
temp_second_line_index = (
len(text.encode('utf-8')) if temp_second_line is None
else temp_second_line.start_index)
_, temp_index = layout.get_first_line()
temp_index = temp_index or len(text.encode('utf-8'))
# TODO: WRAP_CHAR is said to "wrap lines at character boundaries", but
# it doesn't. Looks like it tries to split at word boundaries and then
# at character boundaries if there's no enough space for a full word,
# just as WRAP_WORD_CHAR does. That's why we have to split this text
# twice. Find why. It may be related to the problem described in the
# link given in step #3.
first_line_text = utf8_slice(text, slice(temp_second_line_index))
first_line_text = utf8_slice(text, slice(temp_index))
layout.set_text(first_line_text)
lines = layout.iter_lines()
first_line = next(lines, None)
second_line = next(lines, None)
resume_at = (
first_line.length if second_line is None
else second_line.start_index)
first_line, index = layout.get_first_line()
resume_at = index or first_line.length
if resume_at is not None and resume_at >= len(text.encode('utf-8')):
resume_at = None
@ -1160,22 +1213,16 @@ def split_first_line(text, style, context, max_width, justification_spacing,
style['hyphenate_character'])
def show_first_line(context, pango_layout, hinting):
"""Draw the given ``line`` to the Cairo ``context``."""
def show_first_line(context, textbox):
"""Draw the given ``textbox`` line to the Cairo ``context``."""
pango.pango_layout_set_single_paragraph_mode(
textbox.pango_layout.layout, True)
first_line, _ = textbox.pango_layout.get_first_line()
context = ffi.cast('cairo_t *', context._pointer)
if hinting:
pangocairo.pango_cairo_update_layout(context, pango_layout.layout)
# Set an infinite width as we don't want to break lines when drawing, the
# lines have already been split and the size may differ for example because
# of hinting.
pango.pango_layout_set_width(pango_layout.layout, -1)
pangocairo.pango_cairo_show_layout_line(
context, next(pango_layout.iter_lines()))
pangocairo.pango_cairo_show_layout_line(context, first_line)
def can_break_text(text, lang):
if not text or len(text) < 2:
return False
def get_log_attrs(text, lang):
if lang:
lang_p, lang = unicode_to_char_p(lang)
else:
@ -1191,4 +1238,27 @@ def can_break_text(text, lang):
log_attrs = ffi.new('PangoLogAttr[]', length)
pango.pango_get_log_attrs(
text_p, len(bytestring), -1, language, log_attrs, length)
return bytestring, log_attrs
def can_break_text(text, lang):
if not text or len(text) < 2:
return None
bytestring, log_attrs = get_log_attrs(text, lang)
length = len(bytestring) + 1
return any(attr.is_line_break for attr in log_attrs[1:length - 1])
def get_next_word_boundaries(text, lang):
if not text or len(text) < 2:
return None
bytestring, log_attrs = get_log_attrs(text, lang)
for i, attr in enumerate(log_attrs):
if attr.is_word_end:
word_end = i
break
if attr.is_word_boundary:
word_start = i
else:
return None
return word_start, word_end

View File

View File

@ -4,7 +4,7 @@
A WeasyPrint-based web browser. In your web browser.
:copyright: Copyright 2011-2014 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""

View File

@ -5,7 +5,7 @@
A simple web application allowing to type HTML and instantly visualize the
result rendered by WeasyPrint.
:copyright: Copyright 2011-2014 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""

View File

@ -4,7 +4,7 @@
Various utility functions and classes.
:copyright: Copyright 2011-2014 Simon Sapin and contributors, see AUTHORS.
:copyright: Copyright 2011-2018 Simon Sapin and contributors, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
@ -232,6 +232,10 @@ def default_url_fetcher(url, timeout=10):
"""
if UNICODE_SCHEME_RE.match(url):
# See https://bugs.python.org/issue34702
if url.startswith('file://'):
url = url.split('?')[0]
url = iri_to_uri(url)
response = urlopen(Request(url, headers=HTTP_HEADERS), timeout=timeout)
response_info = response.info()
@ -271,9 +275,7 @@ def fetch(url_fetcher, url):
try:
result = url_fetcher(url)
except Exception as exc:
name = type(exc).__name__
value = str(exc)
raise URLFetchingError('%s: %s' % (name, value) if value else name)
raise URLFetchingError('%s: %s' % (type(exc).__name__, str(exc)))
result.setdefault('redirected_url', url)
result.setdefault('mime_type', None)
if 'file_obj' in result: